Skip to content

Commit

Permalink
Add start and end date parameters to main
Browse files Browse the repository at this point in the history
  • Loading branch information
nagutm committed Jan 16, 2025
1 parent 9c9c9a3 commit e56273a
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/bioregistry/analysis/paper_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,10 +396,12 @@ def _get_evaluation_df(
help="End date of the period",
default=datetime.date.today().isoformat(),
)
def main(bioregistry_file: Path) -> None:
def main(bioregistry_file: Path, start_date: str, end_date: str) -> None:
"""Load data, train classifiers, evaluate models, and predict new data.
:param bioregistry_file: Path to the bioregistry JSON file.
:param start_date: The start date of the period for which papers are being ranked.
:param end_date: The end date of the period for which papers are being ranked.
"""
runner(
bioregistry_file=bioregistry_file,
Expand Down Expand Up @@ -481,9 +483,9 @@ def runner(
if not predictions_df.empty:
# TODO update the way naming this file works, see discussion on
# https://github.com/biopragmatics/bioregistry/pull/1350
predictions_path = output_path.joinpath(f"predictions_{start_date}_to_{end_date}.tsv")
predictions_path = output_path.joinpath(f"predictions.tsv")
predict_and_save(predictions_df, vectorizer, classifiers, meta_clf, predictions_path)


if __name__ == "__main__":
main()
main()

0 comments on commit e56273a

Please sign in to comment.