Skip to content

Commit

Permalink
Merge pull request #8 from Dr-Emann/commited_comparison_benchmark
Browse files Browse the repository at this point in the history
Add a committed comparison benchmark
  • Loading branch information
KenanHanke authored Oct 1, 2024
2 parents bd8b19d + ac031e8 commit 198f84e
Show file tree
Hide file tree
Showing 7 changed files with 374 additions and 3 deletions.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
/Cargo.lock

/target

# Byte-compiled / optimized / DLL files
Expand Down
295 changes: 295 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ edge cases.
## Benchmarks

I implemented the following simple benchmark in the respective API of
each library:
each library (see the [comparison benchmarks](benchmarks/compare.py)):

```python
bf = Bloom(10_000_000, 0.01)
Expand Down
59 changes: 59 additions & 0 deletions benchmarks/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import struct
import timeit

import pybloomfilter
import bloom_filter2
import flor
import pybloom
import rbloom


NUM_ITEMS = 10_000_000


def run(ty):
bf = ty(NUM_ITEMS, 0.01)

for i in range(NUM_ITEMS):
bf.add(i + 0.5) # floats because ints are hashed as themselves

for i in range(NUM_ITEMS):
if i + 0.5 not in bf:
raise ValueError("Should be no false negatives")


def run_bytes(ty):
bf = ty(NUM_ITEMS, 0.01)

for i in range(NUM_ITEMS):
bf.add(struct.pack("d", i + 0.5))

for i in range(NUM_ITEMS):
if struct.pack("d", i + 0.5) not in bf:
raise ValueError("Should be no false negatives")


types = {
"rbloom": rbloom.Bloom,
"pybloomfiltermmap3": pybloomfilter.BloomFilter,
"pybloom3": pybloom.BloomFilter,
"flor": flor.BloomFilter,
"bloomfilter2": bloom_filter2.BloomFilter,
}


def main():
for name, ty in types.items():
print(f"Running {name}")
try:
results = timeit.repeat(lambda: run(ty), number=1, repeat=5)
extras = ""
except Exception as e:
results = timeit.repeat(lambda: run_bytes(ty), number=1, repeat=5)
extras = f" (via bytes because {e})"
avg = sum(results) / len(results)
print(f" {avg:6.2f}s{extras}")


if __name__ == "__main__":
main()
File renamed without changes.
5 changes: 5 additions & 0 deletions benchmarks/requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-e ../
pybloomfiltermmap3
pybloom3
Flor
bloom-filter2
14 changes: 14 additions & 0 deletions benchmarks/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# This file was autogenerated by uv via the following command:
# uv pip compile ./requirements.in
-e ../
# via -r ./requirements.in
bitarray==2.9.2
# via pybloom3
bloom-filter2==2.0.0
# via -r ./requirements.in
flor==1.1.3
# via -r ./requirements.in
pybloom3==0.0.3
# via -r ./requirements.in
pybloomfiltermmap3==0.6.0
# via -r ./requirements.in

0 comments on commit 198f84e

Please sign in to comment.