Refactor benchmark to use randomized vectors instead of sets

- Replaced std::set with std::vector for sequence handling.
- Added randomization of sequences to avoid ordered input bias.
- Removed unused balance function and related code in bst.cpp and bst.h.
- Fixed bugs in list insertion and search logic.
- Updated plot.py to allow custom y-axis labels and enable log scale for build plots.
This commit is contained in:
2025-05-14 19:42:14 +02:00
parent aae0ce7241
commit 899c844c78
5 changed files with 36 additions and 59 deletions
+18 -8
View File
@@ -2,6 +2,7 @@
#include "bst/bst.h"
#include "list/list.h"
#include <algorithm>
#include <chrono>
#include <cstdio>
#include <iostream>
@@ -9,7 +10,7 @@
#include <random>
#include <set>
void measureList(std::set<int> *sequence, FILE *file) {
void measureList(std::vector<int> *sequence, FILE *file) {
float buildTime = 0, searchTime = 0, deleteTime = 0;
for (int i = 0; i < 10; i++) {
@@ -55,7 +56,7 @@ void measureList(std::set<int> *sequence, FILE *file) {
fprintf(file, "List,%f,%f,%f\n", buildTime, searchTime, deleteTime);
}
void measureBST(std::set<int> *sequence, FILE *file) {
void measureBST(std::vector<int> *sequence, FILE *file) {
float buildTime = 0, searchTime = 0, deleteTime = 0;
for (int i = 0; i < 10; i++) {
@@ -68,7 +69,7 @@ void measureBST(std::set<int> *sequence, FILE *file) {
for (int value : *sequence) {
root = insert(root, value);
}
root = balance(root);
// root = balance(root);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
buildTime += elapsed_seconds.count() * 1000; // Convert to milliseconds
@@ -100,7 +101,7 @@ void measureBST(std::set<int> *sequence, FILE *file) {
fprintf(file, "BST,%f,%f,%f\n", buildTime, searchTime, deleteTime);
}
void benchmarkAVL(std::set<int> *sequence, FILE *file) {
void benchmarkAVL(std::vector<int> *sequence, FILE *file) {
Tree *bst = nullptr;
for (int value : *sequence) {
bst = insert(bst, value);
@@ -147,19 +148,28 @@ int main() {
}
for (int n = 1; n < 26; n++) {
// Using a set here ensures that there are no duplicates
std::set<int> sequence;
while (sequence.size() < n * 1000) {
sequence.insert(dis(gen));
}
// Display this like a cascade
std::vector<int> random_sequence_vec;
for (int val : sequence) {
random_sequence_vec.push_back(val);
}
// Then randomize the sequence so the bst isn't a list (Set keeps the elements in order)
std::shuffle(random_sequence_vec.begin(), random_sequence_vec.end(), gen);
// Display the times like a cascade
std::cout << "Running tests for " << n * 1000 << " elements..."
<< std::endl;
if (mode == 1) {
measureList(&sequence, file);
measureBST(&sequence, file);
measureList(&random_sequence_vec, file);
measureBST(&random_sequence_vec, file);
} else {
benchmarkAVL(&sequence, file);
benchmarkAVL(&random_sequence_vec, file);
}
}
+2 -28
View File
@@ -24,33 +24,6 @@ Tree *search(Tree *root, int value) {
return nullptr;
}
void traverseInOrder(Tree *root, std::vector<int> *vec) {
if (root != nullptr) {
traverseInOrder(root->left, vec);
vec->push_back(root->info);
traverseInOrder(root->right, vec);
}
}
Tree *rebuild(std::vector<int> *vec, int start, int end) {
if (start > end)
return nullptr;
int mid = (start + end) / 2;
Tree *node = new Tree();
node->info = vec->at(mid);
node->left = rebuild(vec, start, mid - 1);
node->right = rebuild(vec, mid + 1, end);
return node;
}
Tree *balance(Tree *root) {
std::vector<int> vec;
traverseInOrder(root, &vec);
return rebuild(&vec, 0, vec.size() - 1);
}
void deleteTree(Tree *root) {
if (root != nullptr) {
deleteTree(root->left);
@@ -60,7 +33,8 @@ void deleteTree(Tree *root) {
}
int getHeight(Tree *root, int height) {
if (root == nullptr) return height;
if (root == nullptr)
return height;
height += 1;
int leftHeight = getHeight(root->left, height);
int rightHeight = getHeight(root->right, height);
-3
View File
@@ -8,8 +8,5 @@ struct Tree {
Tree *insert(Tree *root, int value);
Tree *search(Tree *root, int value);
void traverseInOrder(Tree *root,std::vector<int> &vec);
Tree *rebuild(std::vector<int> *vec, int start, int end);
Tree *balance(Tree *root);
void deleteTree(Tree *root);
int getHeight(Tree *root, int height);
+12 -16
View File
@@ -16,9 +16,10 @@ List *insert(List *head, int value) {
head = newHead;
} else {
List *tmp = head;
while (tmp->next != nullptr && tmp->data < value) {
tmp = tmp->next;
while (tmp->next != nullptr && tmp->next->data < value) {
tmp = tmp->next;
}
List *tail = new List();
tail->data = value;
// Set the pointer to the next, we don't know if its at the end or not
@@ -32,24 +33,19 @@ List *insert(List *head, int value) {
List *search(List *list, int value) {
List *ptr = list;
while (ptr != nullptr && ptr->data != value) {
ptr = ptr->next;
if (ptr->data > value) return nullptr;
ptr = ptr->next;
}
if (ptr->data == value) {
return ptr;
}
return nullptr;
return ptr;
}
// Remove the first element
List *remove(List *head) {
if (head == nullptr) {
return nullptr;
}
List *newHead = head->next;
delete head;
return newHead;
if (head == nullptr) {
return nullptr;
}
List *newHead = head->next;
delete head;
return newHead;
}
+4 -4
View File
@@ -1,11 +1,11 @@
import matplotlib.pyplot as plt
def plot(header:str, first:list[int], second:list[int], log=False, labels=("Lista", "Drzewo BST")):
def plot(header:str, first:list[int], second:list[int], log=False, labels=("Lista", "Drzewo BST"), ylabel= "Czas (ms)"):
plt.figure(figsize=(10, 6))
plt.plot(range(1000,25001,1000), first, label=labels[0])
plt.plot(range(1000,25001,1000), second, label=labels[1])
plt.xlabel("Rozmiar tablicy")
plt.ylabel("Czas (ms)")
plt.ylabel(ylabel)
if log:
plt.yscale('log', base=2)
plt.title(header)
@@ -42,7 +42,7 @@ if __name__ == "__main__":
listTimes['delete'].append(float(row[3]))
plot("Tworzenie", listTimes['build'], bstTimes['build'])
plot("Tworzenie", listTimes['build'], bstTimes['build'], log=True)
plot("Wyszukiwanie", listTimes['search'], bstTimes['search'], log=True)
plot("Usuwanie", listTimes['delete'], bstTimes['delete'])
@@ -55,4 +55,4 @@ if __name__ == "__main__":
bstHeights = [int(x[0]) for x in values]
avlHeights = [int(x[1]) for x in values]
plot("AVl", bstHeights, avlHeights, log=True, labels=("Drzewo BST", "Drzewo AVL"))
plot("AVL", bstHeights, avlHeights, log=True, labels=("Drzewo BST", "Drzewo AVL"), ylabel="Wysokość drzewa")