-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Bug] HNSW bugs #3486
Comments
I did some more experimentation on the node saturation with deleted neighbors. Turns out periodic compaction, more frequent for lower Ms almost entirely eliminates here's the test script I used (it runs for about 3 days to completion): from chromadb.types import SegmentScope
import traceback
from chromadb.segment.impl.manager.local import LocalSegmentManager
import time
import uuid
import chromadb
import numpy as np
from itertools import product
from chroma_ops.hnsw import rebuild_hnsw
np.random.seed(42)
search_ef = [50, 100, 200, 300]
m = [32, 64, 128, 256]
construction_ef = [300, 400, 500]
iterations = 10000
records_to_add = 1000
client = chromadb.PersistentClient("contiguous2d")
f = open("hnsw_failure_2620_with_compactions.txt", "a+")
rebuild_iterations = 10
def close_collection(chroma_client: chromadb.ClientAPI, collection: chromadb.Collection) -> None:
local_manager: LocalSegmentManager = chroma_client._server._manager
vector_segment = local_manager._get_segment_sysdb(collection.id, SegmentScope.VECTOR)
metadata_segment = local_manager._get_segment_sysdb(collection.id, SegmentScope.METADATA)
local_manager.callback_cache_evict(vector_segment)
local_manager.callback_cache_evict(metadata_segment)
pass
def run_test(m_value: int, construction_ef_value: int, search_ef_value: int) -> None:
start = time.perf_counter()
print(f"Running test with m: {m_value}, construction_ef: {construction_ef_value}, search_ef: {search_ef_value}")
collection_name = f"test_collection-m{m_value}-construction_{construction_ef_value}-search_{search_ef_value}"
collection = client.get_or_create_collection(collection_name,
metadata={"hnsw:M": int(m_value),
"hnsw:construction_ef": int(construction_ef_value),
"hnsw:search_ef": int(search_ef_value)},
)
compactions = 0
total_iterations = 0
average_compaction_time = 0
for i in range(iterations):
try:
data = np.random.uniform(-1, 1, (records_to_add, 384))
ids = [f"{uuid.uuid4()}" for _ in range(records_to_add)]
collection.add(ids=ids, embeddings=data.tolist())
collection.query(query_embeddings=[data[np.random.choice(range(records_to_add))].tolist()],
n_results=10)
collection.delete(ids=ids)
if i % rebuild_iterations == 0:
close_collection(client, collection)
compaction_time_start = time.perf_counter()
# Index compaction
rebuild_hnsw("contiguous2d", collection_name=collection_name, backup=False, yes=True)
collection = client.get_collection(collection_name)
compactions += 1
average_compaction_time = (time.perf_counter() - compaction_time_start) / compactions
print(
f"Compaction {compactions} took {time.perf_counter() - compaction_time_start} seconds, iteration {i}, average compaction time: {average_compaction_time}")
except Exception as e:
traceback.print_exc()
print(
f"iteration: {i}, m: {m_value}, construction_ef: {construction_ef_value}, search_ef: {search_ef_value},records_added:{i * records_to_add},records_deleted: {(i * records_to_add) - records_to_add}, records_to_add: {records_to_add}, time_to_error: {time.perf_counter() - start}, exception: {str(e)}\n",
file=f)
f.flush()
break
total_iterations += 1
print(
f"iteration: {total_iterations}, m: {m_value}, construction_ef: {construction_ef_value}, search_ef: {search_ef_value},records_added:{total_iterations * records_to_add},records_deleted: {(total_iterations * records_to_add) - records_to_add}, total_time: {time.perf_counter() - start}, average_compaction_time: {average_compaction_time}\n",
file=f)
f.flush()
def main():
permutations = product(m, construction_ef, search_ef)
for permutation in permutations:
run_test(*permutation)
if __name__ == "__main__":
main()
f.close() And here are the results: |
Our fork of
hnswlib
has several issues that require investigation. Tracking in this master issue until we have bandwidth to take a closer look.ef
, causing an "M is too small" excpetion: [Bug]: HNSW - Cannot return the results in a contigious 2D array. Probably ef or M is too small #2620The text was updated successfully, but these errors were encountered: