biopragmatics · bgyori · Jan 20, 2025 · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025
diff --git a/src/bioregistry/analysis/paper_ranking.py b/src/bioregistry/analysis/paper_ranking.py
@@ -141,37 +141,42 @@
     return fetched_metadata
 
 
-def _get_ids(term: str, use_text_word: bool, relative_date: int) -> set[str]:
+def _get_ids(term: str, use_text_word: bool, start_date: str, end_date: str) -> set[str]:
     from indra.literature import pubmed_client
 
     return {
         str(pubmed_id)
         for pubmed_id in pubmed_client.get_ids(
-            term, use_text_word=use_text_word, reldate=relative_date
+            term, use_text_word=use_text_word, mindate=start_date, maxdate=end_date
         )
     }
 
 
 def _search(
-    terms: list[str], pubmed_ids_to_filter: set[str], relative_date: int
+    terms: list[str], pubmed_ids_to_filter: set[str], start_date: str, end_date: str
 ) -> dict[str, list[str]]:
     paper_to_terms: defaultdict[str, list[str]] = defaultdict(list)
     for term in tqdm(terms, desc="Searching PubMed", unit="search term", leave=False):
-        for pubmed_id in _get_ids(term, use_text_word=True, relative_date=relative_date):
+        for pubmed_id in _get_ids(term, use_text_word=True, start_date=start_date, end_date=end_date):
             if pubmed_id not in pubmed_ids_to_filter:
                 paper_to_terms[pubmed_id].append(term)
     return dict(paper_to_terms)
 
 
-def fetch_pubmed_papers(*, pubmed_ids_to_filter: set[str], relative_date: int) -> pd.DataFrame:
+
+def fetch_pubmed_papers(*, pubmed_ids_to_filter: set[str], start_date: str, end_date: str) -> pd.DataFrame:
     """Fetch PubMed papers from the last 30 days using specific search terms, excluding curated papers.
 
-    :param pubmed_ids_to_filter: List containing already curated PMIDs
-    :param relative_date: the number of recent days to search
+    :param pubmed_ids_to_filter: List containing already curated PMIDs.
+    :param start_date: The start date of the period for which papers are being ranked.
+    :param end_date: The end date of the period for which papers are being ranked.
     :return: DataFrame containing PubMed paper details.
     """
     paper_to_terms = _search(
-        DEFAULT_SEARCH_TERMS, pubmed_ids_to_filter=pubmed_ids_to_filter, relative_date=relative_date
+        DEFAULT_SEARCH_TERMS,
+        pubmed_ids_to_filter=pubmed_ids_to_filter,
+        start_date=start_date,
+        end_date=end_date
     )
 
     papers = _get_metadata_for_ids(paper_to_terms)
@@ -477,11 +482,13 @@
     # These have already been curated and will therefore be filtered out
     curated_pubmed_ids: set[str] = {str(pubmed) for pubmed in df["pubmed"] if pd.notna(pubmed)}
 
-    # FIXME the fetch_pubmed_papers function should
-    #  take into account the start and end date. as
-    predictions_df = fetch_pubmed_papers(pubmed_ids_to_filter=curated_pubmed_ids, relative_date=30)
+    predictions_df = fetch_pubmed_papers(
+        pubmed_ids_to_filter=curated_pubmed_ids,
+        start_date=start_date,
+        end_date=end_date
+    )
     if not predictions_df.empty:
-        predictions_path = output_path.joinpath(f"predictions.tsv")
+        predictions_path = output_path.joinpath("predictions.tsv")
         predict_and_save(predictions_df, vectorizer, classifiers, meta_clf, predictions_path)