From f51c1bbbb64c949917ac887d52ecf0137bff8e37 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 21 May 2024 11:12:27 -0400 Subject: [PATCH 1/2] Commit code to reproduce comparison tests At least on my machine (M1 Pro), I get the following on python 3.11: ``` Running rbloom 2.52s Running pybloomfiltermmap3 4.78s Running pybloom3 46.76s Running flor 76.94s (via bytes because cannot convert 'float' object to bytes) Running bloomfilter2 165.54s (via bytes because unsupported operand type(s) for <<: 'int' and 'float') ``` I didn't update the benchmark results in the README. --- README.md | 2 +- benchmarks/compare.py | 59 +++++++++++++++++++ .../per_operation.py | 0 benchmarks/requirements.in | 5 ++ benchmarks/requirements.txt | 14 +++++ 5 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 benchmarks/compare.py rename tests/benchmark.py => benchmarks/per_operation.py (100%) create mode 100644 benchmarks/requirements.in create mode 100644 benchmarks/requirements.txt diff --git a/README.md b/README.md index 06a544f..5a54265 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ edge cases. ## Benchmarks I implemented the following simple benchmark in the respective API of -each library: +each library (see the [comparison benchmarks](benchmarks/compare.py)): ```python bf = Bloom(10_000_000, 0.01) diff --git a/benchmarks/compare.py b/benchmarks/compare.py new file mode 100644 index 0000000..5e72c56 --- /dev/null +++ b/benchmarks/compare.py @@ -0,0 +1,59 @@ +import struct +import timeit + +import pybloomfilter +import bloom_filter2 +import flor +import pybloom +import rbloom + + +NUM_ITEMS = 10_000_000 + + +def run(ty): + bf = ty(NUM_ITEMS, 0.01) + + for i in range(NUM_ITEMS): + bf.add(i + 0.5) # floats because ints are hashed as themselves + + for i in range(NUM_ITEMS): + if i + 0.5 not in bf: + raise ValueError("Should be no false negatives") + + +def run_bytes(ty): + bf = ty(NUM_ITEMS, 0.01) + + for i in range(NUM_ITEMS): + bf.add(struct.pack("d", i + 0.5)) + + for i in range(NUM_ITEMS): + if struct.pack("d", i + 0.5) not in bf: + raise ValueError("Should be no false negatives") + + +types = { + "rbloom": rbloom.Bloom, + "pybloomfiltermmap3": pybloomfilter.BloomFilter, + "pybloom3": pybloom.BloomFilter, + "flor": flor.BloomFilter, + "bloomfilter2": bloom_filter2.BloomFilter, +} + + +def main(): + for name, ty in types.items(): + print(f"Running {name}") + try: + results = timeit.repeat(lambda: run(ty), number=1, repeat=5) + extras = "" + except Exception as e: + results = timeit.repeat(lambda: run_bytes(ty), number=1, repeat=5) + extras = f" (via bytes because {e})" + avg = sum(results) / len(results) + print(f" {avg:6.2f}s{extras}") + + +if __name__ == "__main__": + main() diff --git a/tests/benchmark.py b/benchmarks/per_operation.py similarity index 100% rename from tests/benchmark.py rename to benchmarks/per_operation.py diff --git a/benchmarks/requirements.in b/benchmarks/requirements.in new file mode 100644 index 0000000..23a7160 --- /dev/null +++ b/benchmarks/requirements.in @@ -0,0 +1,5 @@ +-e ../ +pybloomfiltermmap3 +pybloom3 +Flor +bloom-filter2 diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt new file mode 100644 index 0000000..6f304dd --- /dev/null +++ b/benchmarks/requirements.txt @@ -0,0 +1,14 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile ./requirements.in +-e ../ + # via -r ./requirements.in +bitarray==2.9.2 + # via pybloom3 +bloom-filter2==2.0.0 + # via -r ./requirements.in +flor==1.1.3 + # via -r ./requirements.in +pybloom3==0.0.3 + # via -r ./requirements.in +pybloomfiltermmap3==0.6.0 + # via -r ./requirements.in From ac031e8dff03072ea8d4425a46a84849748d4f60 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 21 May 2024 12:17:24 -0400 Subject: [PATCH 2/2] Include Cargo.lock in VCS Per [updated guidance from rust][1], we should probably include the Cargo.lock. [1]: https://blog.rust-lang.org/2023/08/29/committing-lockfiles.html --- .gitignore | 2 - Cargo.lock | 295 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+), 2 deletions(-) create mode 100644 Cargo.lock diff --git a/.gitignore b/.gitignore index 70a70f1..c8f0442 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,3 @@ -/Cargo.lock - /target # Byte-compiled / optimized / DLL files diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..b1b02c0 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,295 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "parking_lot" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "proc-macro2" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rbloom" +version = "1.5.1" +dependencies = [ + "pyo3", +] + +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "syn" +version = "2.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"