refactor!: set scale_score default value to False (#6276)

* set default scale_score to False * release note
deepset-ai · Nov 13, 2023 · f708cf6 · f708cf6
1 parent 8e7ce20
commit f708cf6
Show file tree

Hide file tree

Showing 6 changed files with 35 additions and 29 deletions.
diff --git a/haystack/preview/components/retrievers/in_memory_bm25_retriever.py b/haystack/preview/components/retrievers/in_memory_bm25_retriever.py
@@ -17,7 +17,7 @@ def __init__(
         document_store: InMemoryDocumentStore,
         filters: Optional[Dict[str, Any]] = None,
         top_k: int = 10,
-        scale_score: bool = True,
+        scale_score: bool = False,
     ):
         """
         Create the InMemoryBM25Retriever component.
@@ -26,7 +26,7 @@ def __init__(
         :param filters: A dictionary with filters to narrow down the search space. Defaults to `None`.
         :param top_k: The maximum number of documents to retrieve. Defaults to `10`.
         :param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
-        Defaults to `True`.
+        Defaults to `False`.
 
         :raises ValueError: If the specified `top_k` is not > 0.
         """
@@ -90,7 +90,7 @@ def run(
         :param filters: A dictionary with filters to narrow down the search space.
         :param top_k: The maximum number of documents to return.
         :param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
-        Defaults to `True`.
+            If not specified, the value provided at initialization is used.
         :return: The retrieved documents.
 
         :raises ValueError: If the specified DocumentStore is not found or is not a InMemoryDocumentStore instance.

diff --git a/haystack/preview/components/retrievers/in_memory_embedding_retriever.py b/haystack/preview/components/retrievers/in_memory_embedding_retriever.py
@@ -17,7 +17,7 @@ def __init__(
         document_store: InMemoryDocumentStore,
         filters: Optional[Dict[str, Any]] = None,
         top_k: int = 10,
-        scale_score: bool = True,
+        scale_score: bool = False,
         return_embedding: bool = False,
     ):
         """
@@ -27,7 +27,7 @@ def __init__(
         :param filters: A dictionary with filters to narrow down the search space. Defaults to `None`.
         :param top_k: The maximum number of documents to retrieve. Defaults to `10`.
         :param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
-        Defaults to `True`.
+        Defaults to `False`.
         :param return_embedding: Whether to return the embedding of the retrieved Documents. Default is `False`.
 
         :raises ValueError: If the specified top_k is not > 0.
@@ -98,8 +98,8 @@ def run(
         :param query_embedding: Embedding of the query.
         :param filters: A dictionary with filters to narrow down the search space.
         :param top_k: The maximum number of documents to return.
-        :param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
-        Defaults to `True`.
+        :param scale_score: Scales the similarity score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
+            If not specified, the value provided at initialization is used.
         :param return_embedding: Whether to return the embedding of the retrieved Documents.
         :return: The retrieved documents.
 

diff --git a/haystack/preview/document_stores/in_memory/document_store.py b/haystack/preview/document_stores/in_memory/document_store.py
@@ -204,15 +204,15 @@ def delete_documents(self, document_ids: List[str]) -> None:
             del self.storage[doc_id]
 
     def bm25_retrieval(
-        self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: int = 10, scale_score: bool = True
+        self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: int = 10, scale_score: bool = False
     ) -> List[Document]:
         """
         Retrieves documents that are most relevant to the query using BM25 algorithm.
 
         :param query: The query string.
         :param filters: A dictionary with filters to narrow down the search space.
         :param top_k: The number of top documents to retrieve. Default is 10.
-        :param scale_score: Whether to scale the scores of the retrieved documents. Default is True.
+        :param scale_score: Whether to scale the scores of the retrieved documents. Default is False.
         :return: A list of the top_k documents most relevant to the query.
         """
         if not query:
@@ -279,7 +279,7 @@ def embedding_retrieval(
         query_embedding: List[float],
         filters: Optional[Dict[str, Any]] = None,
         top_k: int = 10,
-        scale_score: bool = True,
+        scale_score: bool = False,
         return_embedding: bool = False,
     ) -> List[Document]:
         """
@@ -288,7 +288,7 @@ def embedding_retrieval(
         :param query_embedding: Embedding of the query.
         :param filters: A dictionary with filters to narrow down the search space.
         :param top_k: The number of top documents to retrieve. Default is 10.
-        :param scale_score: Whether to scale the scores of the retrieved Documents. Default is True.
+        :param scale_score: Whether to scale the scores of the retrieved Documents. Default is False.
         :param return_embedding: Whether to return the embedding of the retrieved Documents. Default is False.
         :return: A list of the top_k documents most relevant to the query.
         """
@@ -327,14 +327,14 @@ def embedding_retrieval(
         return top_documents
 
     def _compute_query_embedding_similarity_scores(
-        self, embedding: List[float], documents: List[Document], scale_score: bool = True
+        self, embedding: List[float], documents: List[Document], scale_score: bool = False
     ) -> List[float]:
         """
         Computes the similarity scores between the query embedding and the embeddings of the documents.
 
         :param embedding: Embedding of the query.
         :param documents: A list of Documents.
-        :param scale_score: Whether to scale the scores of the Documents. Default is True.
+        :param scale_score: Whether to scale the scores of the Documents. Default is False.
         :return: A list of scores.
         """
 

diff --git a/releasenotes/notes/scale-score-false-h2-3c8e4e7e543e8cce.yaml b/releasenotes/notes/scale-score-false-h2-3c8e4e7e543e8cce.yaml
@@ -0,0 +1,6 @@
+---
+preview:
+  - |
+    Change the default value of `scale_score` to False for Retrievers.
+    Users can still explicitly set `scale_score` to True to get relevance
+    scores in the range [0, 1].
diff --git a/test/preview/components/retrievers/test_in_memory_bm25_retriever.py b/test/preview/components/retrievers/test_in_memory_bm25_retriever.py
@@ -26,21 +26,21 @@ def test_init_default(self):
         retriever = InMemoryBM25Retriever(InMemoryDocumentStore())
         assert retriever.filters is None
         assert retriever.top_k == 10
-        assert retriever.scale_score
+        assert retriever.scale_score is False
 
     @pytest.mark.unit
     def test_init_with_parameters(self):
         retriever = InMemoryBM25Retriever(
-            InMemoryDocumentStore(), filters={"name": "test.txt"}, top_k=5, scale_score=False
+            InMemoryDocumentStore(), filters={"name": "test.txt"}, top_k=5, scale_score=True
         )
         assert retriever.filters == {"name": "test.txt"}
         assert retriever.top_k == 5
-        assert not retriever.scale_score
+        assert retriever.scale_score
 
     @pytest.mark.unit
     def test_init_with_invalid_top_k_parameter(self):
         with pytest.raises(ValueError):
-            InMemoryBM25Retriever(InMemoryDocumentStore(), top_k=-2, scale_score=False)
+            InMemoryBM25Retriever(InMemoryDocumentStore(), top_k=-2)
 
     @pytest.mark.unit
     def test_to_dict(self):
@@ -56,7 +56,7 @@ def test_to_dict(self):
                 "document_store": {"type": "MyFakeStore", "init_parameters": {}},
                 "filters": None,
                 "top_k": 10,
-                "scale_score": True,
+                "scale_score": False,
             },
         }
 
@@ -66,7 +66,7 @@ def test_to_dict_with_custom_init_parameters(self):
         document_store = MyFakeStore()
         document_store.to_dict = lambda: {"type": "MyFakeStore", "init_parameters": {}}
         component = InMemoryBM25Retriever(
-            document_store=document_store, filters={"name": "test.txt"}, top_k=5, scale_score=False
+            document_store=document_store, filters={"name": "test.txt"}, top_k=5, scale_score=True
         )
         data = component.to_dict()
         assert data == {
@@ -75,7 +75,7 @@ def test_to_dict_with_custom_init_parameters(self):
                 "document_store": {"type": "MyFakeStore", "init_parameters": {}},
                 "filters": {"name": "test.txt"},
                 "top_k": 5,
-                "scale_score": False,
+                "scale_score": True,
             },
         }
 
@@ -94,7 +94,7 @@ def test_from_dict(self):
         assert isinstance(component.document_store, InMemoryDocumentStore)
         assert component.filters == {"name": "test.txt"}
         assert component.top_k == 5
-        assert component.scale_score
+        assert component.scale_score is False
 
     @pytest.mark.unit
     def test_from_dict_without_docstore(self):

diff --git a/test/preview/components/retrievers/test_in_memory_embedding_retriever.py b/test/preview/components/retrievers/test_in_memory_embedding_retriever.py
@@ -16,21 +16,21 @@ def test_init_default(self):
         retriever = InMemoryEmbeddingRetriever(InMemoryDocumentStore())
         assert retriever.filters is None
         assert retriever.top_k == 10
-        assert retriever.scale_score
+        assert retriever.scale_score is False
 
     @pytest.mark.unit
     def test_init_with_parameters(self):
         retriever = InMemoryEmbeddingRetriever(
-            InMemoryDocumentStore(), filters={"name": "test.txt"}, top_k=5, scale_score=False
+            InMemoryDocumentStore(), filters={"name": "test.txt"}, top_k=5, scale_score=True
         )
         assert retriever.filters == {"name": "test.txt"}
         assert retriever.top_k == 5
-        assert not retriever.scale_score
+        assert retriever.scale_score
 
     @pytest.mark.unit
     def test_init_with_invalid_top_k_parameter(self):
         with pytest.raises(ValueError):
-            InMemoryEmbeddingRetriever(InMemoryDocumentStore(), top_k=-2, scale_score=False)
+            InMemoryEmbeddingRetriever(InMemoryDocumentStore(), top_k=-2)
 
     @pytest.mark.unit
     def test_to_dict(self):
@@ -46,7 +46,7 @@ def test_to_dict(self):
                 "document_store": {"type": "MyFakeStore", "init_parameters": {}},
                 "filters": None,
                 "top_k": 10,
-                "scale_score": True,
+                "scale_score": False,
                 "return_embedding": False,
             },
         }
@@ -60,7 +60,7 @@ def test_to_dict_with_custom_init_parameters(self):
             document_store=document_store,
             filters={"name": "test.txt"},
             top_k=5,
-            scale_score=False,
+            scale_score=True,
             return_embedding=True,
         )
         data = component.to_dict()
@@ -70,7 +70,7 @@ def test_to_dict_with_custom_init_parameters(self):
                 "document_store": {"type": "MyFakeStore", "init_parameters": {}},
                 "filters": {"name": "test.txt"},
                 "top_k": 5,
-                "scale_score": False,
+                "scale_score": True,
                 "return_embedding": True,
             },
         }
@@ -90,7 +90,7 @@ def test_from_dict(self):
         assert isinstance(component.document_store, InMemoryDocumentStore)
         assert component.filters == {"name": "test.txt"}
         assert component.top_k == 5
-        assert component.scale_score
+        assert component.scale_score is False
 
     @pytest.mark.unit
     def test_from_dict_without_docstore(self):