Skip to content

Commit

Permalink
1. delta did not scaled up on serialization; 2. beir script wrong pro…
Browse files Browse the repository at this point in the history
…gress bar
  • Loading branch information
Guest400123064 committed Apr 14, 2024
1 parent c1803f5 commit fbcbad4
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 6 deletions.
7 changes: 2 additions & 5 deletions scripts/benchmark_beir.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,17 @@ def __init__(self, store: BetterBM25DocumentStore) -> None:
self._store = store
self._indexed = False

def index(self, corpus: dict[str, dict[str, str]]) -> int:
def index(self, corpus: dict[str, dict[str, str]]) -> None:
"""Index the corpus for retrieval."""

documents = []
for idx, raw in tqdm.tqdm(corpus.items(), desc="Indexing corpus"):
raw_title = raw.get("title", "")
raw_text = raw.get("text", "")

content = f"title: {raw_title}; text: {raw_text}"
document = Document(idx, content=content)
documents.append(document)

self._store.write_documents([document])
self._indexed = True
return self._store.write_documents(documents)

def search(
self,
Expand Down
2 changes: 1 addition & 1 deletion src/bbm25_haystack/bbm25_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def to_dict(self) -> dict[str, Any]:
self,
k=self.k,
b=self.b,
delta=self.delta,
delta=self.delta * (self.k + 1.0), # Because we scaled it on init
sp_file=self._sp_file,
haystack_filter_logic=self._haystack_filter_logic,
)
Expand Down

0 comments on commit fbcbad4

Please sign in to comment.