Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug] HNSW bugs #3486

Open
itaismith opened this issue Jan 14, 2025 · 1 comment
Open

[Bug] HNSW bugs #3486

itaismith opened this issue Jan 14, 2025 · 1 comment
Labels
bug Something isn't working by-chroma master

Comments

@itaismith
Copy link
Contributor

itaismith commented Jan 14, 2025

Our fork of hnswlib has several issues that require investigation. Tracking in this master issue until we have bandwidth to take a closer look.

@itaismith itaismith added bug Something isn't working by-chroma labels Jan 14, 2025
@itaismith itaismith changed the title [Bug] HNSW memory leaks [Bug] HNSW bugs Jan 16, 2025
@tazarov
Copy link
Contributor

tazarov commented Jan 23, 2025

I did some more experimentation on the node saturation with deleted neighbors. Turns out periodic compaction, more frequent for lower Ms almost entirely eliminates M is too small type of errors. In my experimentation I simply rebuilt the HNSW index by copying over active labels from old to new, but this can be done at the hnswlib level too, with probably minimal memory requirement (as compared to my method which needs to load both HNSW indices in memory).

here's the test script I used (it runs for about 3 days to completion):

from chromadb.types import SegmentScope
import traceback
from chromadb.segment.impl.manager.local import LocalSegmentManager
import time
import uuid

import chromadb
import numpy as np
from itertools import product
from chroma_ops.hnsw import rebuild_hnsw

np.random.seed(42)

search_ef = [50, 100, 200, 300]
m = [32, 64, 128, 256]
construction_ef = [300, 400, 500]
iterations = 10000
records_to_add = 1000
client = chromadb.PersistentClient("contiguous2d")
f = open("hnsw_failure_2620_with_compactions.txt", "a+")
rebuild_iterations = 10


def close_collection(chroma_client: chromadb.ClientAPI, collection: chromadb.Collection) -> None:
    local_manager: LocalSegmentManager = chroma_client._server._manager
    vector_segment = local_manager._get_segment_sysdb(collection.id, SegmentScope.VECTOR)
    metadata_segment = local_manager._get_segment_sysdb(collection.id, SegmentScope.METADATA)
    local_manager.callback_cache_evict(vector_segment)
    local_manager.callback_cache_evict(metadata_segment)
    pass


def run_test(m_value: int, construction_ef_value: int, search_ef_value: int) -> None:
    start = time.perf_counter()
    print(f"Running test with m: {m_value}, construction_ef: {construction_ef_value}, search_ef: {search_ef_value}")
    collection_name = f"test_collection-m{m_value}-construction_{construction_ef_value}-search_{search_ef_value}"
    collection = client.get_or_create_collection(collection_name,
                                                 metadata={"hnsw:M": int(m_value),
                                                           "hnsw:construction_ef": int(construction_ef_value),
                                                           "hnsw:search_ef": int(search_ef_value)},
                                                 )
    compactions = 0
    total_iterations = 0
    average_compaction_time = 0
    for i in range(iterations):
        try:
            data = np.random.uniform(-1, 1, (records_to_add, 384))
            ids = [f"{uuid.uuid4()}" for _ in range(records_to_add)]
            collection.add(ids=ids, embeddings=data.tolist())
            collection.query(query_embeddings=[data[np.random.choice(range(records_to_add))].tolist()],
                             n_results=10)
            collection.delete(ids=ids)
            if i % rebuild_iterations == 0:
                close_collection(client, collection)
                compaction_time_start = time.perf_counter()
                # Index compaction
                rebuild_hnsw("contiguous2d", collection_name=collection_name, backup=False, yes=True)
                collection = client.get_collection(collection_name)
                compactions += 1
                average_compaction_time = (time.perf_counter() - compaction_time_start) / compactions
                print(
                    f"Compaction {compactions} took {time.perf_counter() - compaction_time_start} seconds, iteration {i}, average compaction time: {average_compaction_time}")
        except Exception as e:
            traceback.print_exc()
            print(
                f"iteration: {i}, m: {m_value}, construction_ef: {construction_ef_value}, search_ef: {search_ef_value},records_added:{i * records_to_add},records_deleted: {(i * records_to_add) - records_to_add}, records_to_add: {records_to_add}, time_to_error: {time.perf_counter() - start}, exception: {str(e)}\n",
                file=f)
            f.flush()
            break
        total_iterations += 1
    print(
        f"iteration: {total_iterations}, m: {m_value}, construction_ef: {construction_ef_value}, search_ef: {search_ef_value},records_added:{total_iterations * records_to_add},records_deleted: {(total_iterations * records_to_add) - records_to_add}, total_time: {time.perf_counter() - start}, average_compaction_time: {average_compaction_time}\n",
        file=f)
    f.flush()


def main():
    permutations = product(m, construction_ef, search_ef)
    for permutation in permutations:
        run_test(*permutation)


if __name__ == "__main__":
    main()
    f.close()

And here are the results:

hnsw_failure_2620_with_compactions.txt

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working by-chroma master
Projects
None yet
Development

No branches or pull requests

2 participants