Skip to content

Commit

Permalink
refactor!: set scale_score default value to False (#6276)
Browse files Browse the repository at this point in the history
* set default scale_score to False

* release note
  • Loading branch information
anakin87 authored Nov 13, 2023
1 parent 8e7ce20 commit f708cf6
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(
document_store: InMemoryDocumentStore,
filters: Optional[Dict[str, Any]] = None,
top_k: int = 10,
scale_score: bool = True,
scale_score: bool = False,
):
"""
Create the InMemoryBM25Retriever component.
Expand All @@ -26,7 +26,7 @@ def __init__(
:param filters: A dictionary with filters to narrow down the search space. Defaults to `None`.
:param top_k: The maximum number of documents to retrieve. Defaults to `10`.
:param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
Defaults to `True`.
Defaults to `False`.
:raises ValueError: If the specified `top_k` is not > 0.
"""
Expand Down Expand Up @@ -90,7 +90,7 @@ def run(
:param filters: A dictionary with filters to narrow down the search space.
:param top_k: The maximum number of documents to return.
:param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
Defaults to `True`.
If not specified, the value provided at initialization is used.
:return: The retrieved documents.
:raises ValueError: If the specified DocumentStore is not found or is not a InMemoryDocumentStore instance.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(
document_store: InMemoryDocumentStore,
filters: Optional[Dict[str, Any]] = None,
top_k: int = 10,
scale_score: bool = True,
scale_score: bool = False,
return_embedding: bool = False,
):
"""
Expand All @@ -27,7 +27,7 @@ def __init__(
:param filters: A dictionary with filters to narrow down the search space. Defaults to `None`.
:param top_k: The maximum number of documents to retrieve. Defaults to `10`.
:param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
Defaults to `True`.
Defaults to `False`.
:param return_embedding: Whether to return the embedding of the retrieved Documents. Default is `False`.
:raises ValueError: If the specified top_k is not > 0.
Expand Down Expand Up @@ -98,8 +98,8 @@ def run(
:param query_embedding: Embedding of the query.
:param filters: A dictionary with filters to narrow down the search space.
:param top_k: The maximum number of documents to return.
:param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
Defaults to `True`.
:param scale_score: Scales the similarity score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
If not specified, the value provided at initialization is used.
:param return_embedding: Whether to return the embedding of the retrieved Documents.
:return: The retrieved documents.
Expand Down
12 changes: 6 additions & 6 deletions haystack/preview/document_stores/in_memory/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,15 +204,15 @@ def delete_documents(self, document_ids: List[str]) -> None:
del self.storage[doc_id]

def bm25_retrieval(
self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: int = 10, scale_score: bool = True
self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: int = 10, scale_score: bool = False
) -> List[Document]:
"""
Retrieves documents that are most relevant to the query using BM25 algorithm.
:param query: The query string.
:param filters: A dictionary with filters to narrow down the search space.
:param top_k: The number of top documents to retrieve. Default is 10.
:param scale_score: Whether to scale the scores of the retrieved documents. Default is True.
:param scale_score: Whether to scale the scores of the retrieved documents. Default is False.
:return: A list of the top_k documents most relevant to the query.
"""
if not query:
Expand Down Expand Up @@ -279,7 +279,7 @@ def embedding_retrieval(
query_embedding: List[float],
filters: Optional[Dict[str, Any]] = None,
top_k: int = 10,
scale_score: bool = True,
scale_score: bool = False,
return_embedding: bool = False,
) -> List[Document]:
"""
Expand All @@ -288,7 +288,7 @@ def embedding_retrieval(
:param query_embedding: Embedding of the query.
:param filters: A dictionary with filters to narrow down the search space.
:param top_k: The number of top documents to retrieve. Default is 10.
:param scale_score: Whether to scale the scores of the retrieved Documents. Default is True.
:param scale_score: Whether to scale the scores of the retrieved Documents. Default is False.
:param return_embedding: Whether to return the embedding of the retrieved Documents. Default is False.
:return: A list of the top_k documents most relevant to the query.
"""
Expand Down Expand Up @@ -327,14 +327,14 @@ def embedding_retrieval(
return top_documents

def _compute_query_embedding_similarity_scores(
self, embedding: List[float], documents: List[Document], scale_score: bool = True
self, embedding: List[float], documents: List[Document], scale_score: bool = False
) -> List[float]:
"""
Computes the similarity scores between the query embedding and the embeddings of the documents.
:param embedding: Embedding of the query.
:param documents: A list of Documents.
:param scale_score: Whether to scale the scores of the Documents. Default is True.
:param scale_score: Whether to scale the scores of the Documents. Default is False.
:return: A list of scores.
"""

Expand Down
6 changes: 6 additions & 0 deletions releasenotes/notes/scale-score-false-h2-3c8e4e7e543e8cce.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
preview:
- |
Change the default value of `scale_score` to False for Retrievers.
Users can still explicitly set `scale_score` to True to get relevance
scores in the range [0, 1].
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,21 @@ def test_init_default(self):
retriever = InMemoryBM25Retriever(InMemoryDocumentStore())
assert retriever.filters is None
assert retriever.top_k == 10
assert retriever.scale_score
assert retriever.scale_score is False

@pytest.mark.unit
def test_init_with_parameters(self):
retriever = InMemoryBM25Retriever(
InMemoryDocumentStore(), filters={"name": "test.txt"}, top_k=5, scale_score=False
InMemoryDocumentStore(), filters={"name": "test.txt"}, top_k=5, scale_score=True
)
assert retriever.filters == {"name": "test.txt"}
assert retriever.top_k == 5
assert not retriever.scale_score
assert retriever.scale_score

@pytest.mark.unit
def test_init_with_invalid_top_k_parameter(self):
with pytest.raises(ValueError):
InMemoryBM25Retriever(InMemoryDocumentStore(), top_k=-2, scale_score=False)
InMemoryBM25Retriever(InMemoryDocumentStore(), top_k=-2)

@pytest.mark.unit
def test_to_dict(self):
Expand All @@ -56,7 +56,7 @@ def test_to_dict(self):
"document_store": {"type": "MyFakeStore", "init_parameters": {}},
"filters": None,
"top_k": 10,
"scale_score": True,
"scale_score": False,
},
}

Expand All @@ -66,7 +66,7 @@ def test_to_dict_with_custom_init_parameters(self):
document_store = MyFakeStore()
document_store.to_dict = lambda: {"type": "MyFakeStore", "init_parameters": {}}
component = InMemoryBM25Retriever(
document_store=document_store, filters={"name": "test.txt"}, top_k=5, scale_score=False
document_store=document_store, filters={"name": "test.txt"}, top_k=5, scale_score=True
)
data = component.to_dict()
assert data == {
Expand All @@ -75,7 +75,7 @@ def test_to_dict_with_custom_init_parameters(self):
"document_store": {"type": "MyFakeStore", "init_parameters": {}},
"filters": {"name": "test.txt"},
"top_k": 5,
"scale_score": False,
"scale_score": True,
},
}

Expand All @@ -94,7 +94,7 @@ def test_from_dict(self):
assert isinstance(component.document_store, InMemoryDocumentStore)
assert component.filters == {"name": "test.txt"}
assert component.top_k == 5
assert component.scale_score
assert component.scale_score is False

@pytest.mark.unit
def test_from_dict_without_docstore(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@ def test_init_default(self):
retriever = InMemoryEmbeddingRetriever(InMemoryDocumentStore())
assert retriever.filters is None
assert retriever.top_k == 10
assert retriever.scale_score
assert retriever.scale_score is False

@pytest.mark.unit
def test_init_with_parameters(self):
retriever = InMemoryEmbeddingRetriever(
InMemoryDocumentStore(), filters={"name": "test.txt"}, top_k=5, scale_score=False
InMemoryDocumentStore(), filters={"name": "test.txt"}, top_k=5, scale_score=True
)
assert retriever.filters == {"name": "test.txt"}
assert retriever.top_k == 5
assert not retriever.scale_score
assert retriever.scale_score

@pytest.mark.unit
def test_init_with_invalid_top_k_parameter(self):
with pytest.raises(ValueError):
InMemoryEmbeddingRetriever(InMemoryDocumentStore(), top_k=-2, scale_score=False)
InMemoryEmbeddingRetriever(InMemoryDocumentStore(), top_k=-2)

@pytest.mark.unit
def test_to_dict(self):
Expand All @@ -46,7 +46,7 @@ def test_to_dict(self):
"document_store": {"type": "MyFakeStore", "init_parameters": {}},
"filters": None,
"top_k": 10,
"scale_score": True,
"scale_score": False,
"return_embedding": False,
},
}
Expand All @@ -60,7 +60,7 @@ def test_to_dict_with_custom_init_parameters(self):
document_store=document_store,
filters={"name": "test.txt"},
top_k=5,
scale_score=False,
scale_score=True,
return_embedding=True,
)
data = component.to_dict()
Expand All @@ -70,7 +70,7 @@ def test_to_dict_with_custom_init_parameters(self):
"document_store": {"type": "MyFakeStore", "init_parameters": {}},
"filters": {"name": "test.txt"},
"top_k": 5,
"scale_score": False,
"scale_score": True,
"return_embedding": True,
},
}
Expand All @@ -90,7 +90,7 @@ def test_from_dict(self):
assert isinstance(component.document_store, InMemoryDocumentStore)
assert component.filters == {"name": "test.txt"}
assert component.top_k == 5
assert component.scale_score
assert component.scale_score is False

@pytest.mark.unit
def test_from_dict_without_docstore(self):
Expand Down

0 comments on commit f708cf6

Please sign in to comment.