From 0750ab26eabcbbb0cb239c90a2d0d2259da28ec0 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 12 Feb 2025 20:36:31 -0500 Subject: [PATCH 1/3] added second version of usage stats and commented old version --- MANIFEST.in | 1 + bbconf/bbagent.py | 94 +++++++++++++++++++++++++++++++- bbconf/db_utils.py | 100 ++++++++++++++++++++++++++++++++++ bbconf/models/base_models.py | 34 +++++++++++- bbconf/utils/usage_events.csv | 7 +++ tests/test_common.py | 100 +++++++++++++++++++++++++++++++++- 6 files changed, 332 insertions(+), 4 deletions(-) create mode 100644 bbconf/utils/usage_events.csv diff --git a/MANIFEST.in b/MANIFEST.in index 0126442a..a6f1b9c0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,3 +5,4 @@ include bbconf/schemas/* include bbconf/modules/* include bbconf/config_parser/* include bbconf/models/* +include bbconf/utils/* \ No newline at end of file diff --git a/bbconf/bbagent.py b/bbconf/bbagent.py index b1cc6209..fcc8999a 100644 --- a/bbconf/bbagent.py +++ b/bbconf/bbagent.py @@ -5,13 +5,23 @@ from sqlalchemy.orm import Session from sqlalchemy.sql import distinct, func, select +from sqlalchemy.exc import IntegrityError from bbconf.config_parser.bedbaseconfig import BedBaseConfig -from bbconf.db_utils import Bed, BedSets, License -from bbconf.models.base_models import StatsReturn +from bbconf.db_utils import ( + Bed, + BedSets, + License, + StatsBedSetMeta, + StatsBedMeta, + StatsFiles, + StatsSearch, +) +from bbconf.models.base_models import StatsReturn, UsageModel from bbconf.modules.bedfiles import BedAgentBedFile from bbconf.modules.bedsets import BedAgentBedSet from bbconf.modules.objects import BBObjects +from bbconf.exceptions import BedBaseConfError from .const import PKG_NAME @@ -99,3 +109,83 @@ def list_of_licenses(self) -> List[str]: with Session(self.config.db_engine.engine) as session: licenses = session.execute(statement).all() return [result[0] for result in licenses] + + # def add_usage(self, usage: UsageModel) -> None: + # """ + # Add usage to the database + # + # :param usage: usage model + # """ + # + # event_name = usage.event + # usage = usage.model_dump( + # exclude_unset=True, exclude_defaults=True, exclude={"event"} + # ) + # + # try: + # with Session(self.config.db_engine.engine) as session: + # + # event = session.scalar( + # select(UsageEvent).where(UsageEvent.event == event_name) + # ) + # + # session.add(UsageStats(**usage, event_mapper=event)) + # session.commit() + # + # _LOGGER.debug(f"Usage added: {usage}") + # except IntegrityError as e: + # _LOGGER.error(f"Error adding usage: {e}") + # _LOGGER.error(f"Usage: {usage}") + # raise BedBaseConfError("Error adding usage") + + def add_usage(self, stats: UsageModel) -> None: + + with Session(self.config.db_engine.engine) as session: + for key, value in stats.files.items(): + new_stats = StatsFiles( + file_path=key, + count=value, + date_from=stats.date_from, + date_to=stats.date_to, + ) + session.add(new_stats) + + for key, value in stats.bed_meta.items(): + new_stats = StatsBedMeta( + bed_id=key, + count=value, + date_from=stats.date_from, + date_to=stats.date_to, + ) + session.add(new_stats) + + for key, value in stats.bedset_meta.items(): + new_stats = StatsBedSetMeta( + bedset_id=key, + count=value, + date_from=stats.date_from, + date_to=stats.date_to, + ) + session.add(new_stats) + + for key, value in stats.bed_search.items(): + new_stats = StatsSearch( + query=key, + count=value, + type="bed", + date_from=stats.date_from, + date_to=stats.date_to, + ) + session.add(new_stats) + + for key, value in stats.bedset_search.items(): + new_stats = StatsSearch( + query=key, + count=value, + type="bedset", + date_from=stats.date_from, + date_to=stats.date_to, + ) + session.add(new_stats) + + session.commit() diff --git a/bbconf/db_utils.py b/bbconf/db_utils.py index 81ae361c..f2be7aaf 100644 --- a/bbconf/db_utils.py +++ b/bbconf/db_utils.py @@ -1,5 +1,6 @@ import datetime import logging +import os from typing import List, Optional import pandas as pd @@ -473,6 +474,105 @@ class GeoGsmStatus(Base): ) +# class UsageEvent(Base): +# __tablename__ = "usage_events" +# +# id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) +# event: Mapped[str] = mapped_column(nullable=False, comment="Event name") +# +# usage_stats: Mapped[List["UsageStats"]] = relationship( +# "UsageStats", back_populates="event_mapper" +# ) + + +# class UsageStats(Base): +# __tablename__ = "usage_stats" +# +# id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) +# event: Mapped[int] = mapped_column( +# ForeignKey("usage_events.id", ondelete="CASCADE"), nullable=False, index=True +# ) +# +# bed_id: Mapped[str] = mapped_column( +# ForeignKey("bed.id", ondelete="CASCADE"), nullable=True, index=True +# ) +# bedset_id: Mapped[str] = mapped_column( +# ForeignKey("bedsets.id", ondelete="CASCADE"), nullable=True, index=True +# ) +# query: Mapped[str] = mapped_column(nullable=True, comment="Search query if any") +# +# file_name: Mapped[str] = mapped_column(nullable=True, comment="File name if any") +# +# timestamp: Mapped[datetime.datetime] = mapped_column( +# default=deliver_update_date, onupdate=deliver_update_date +# ) +# +# ipaddress: Mapped[str] = mapped_column( +# nullable=True, comment="IP address of the user" +# ) +# user_agent: Mapped[str] = mapped_column( +# nullable=True, comment="User agent of the user" +# ) +# +# event_mapper: Mapped["UsageEvent"] = relationship( +# "UsageEvent", back_populates="usage_stats" +# ) + + +class StatsBedMeta(Base): + __tablename__ = "stats_bed_meta" + + id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) + + bed_id: Mapped[str] = mapped_column( + ForeignKey("bed.id", ondelete="CASCADE"), nullable=True, index=True + ) + + count: Mapped[int] = mapped_column(default=0, comment="Number of visits") + + date_from: Mapped[datetime.datetime] = mapped_column(comment="Date from") + date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to") + + +class StatsBedSetMeta(Base): + __tablename__ = "stats_bedset_meta" + + id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) + + bedset_id: Mapped[str] = mapped_column( + ForeignKey("bedsets.id", ondelete="CASCADE"), nullable=True, index=True + ) + count: Mapped[int] = mapped_column(default=0, comment="Number of visits") + + date_from: Mapped[datetime.datetime] = mapped_column(comment="Date from") + date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to") + + +class StatsFiles(Base): + __tablename__ = "stats_files" + + id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) + file_path: Mapped[str] = mapped_column(nullable=False, comment="Path to the file") + count: Mapped[int] = mapped_column(default=0, comment="Number of downloads") + + date_from: Mapped[datetime.datetime] = mapped_column(comment="Date from") + date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to") + + +class StatsSearch(Base): + __tablename__ = "stats_search" + + id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) + query: Mapped[str] = mapped_column(nullable=False, comment="Search query") + type: Mapped[str] = mapped_column( + nullable=False, comment="Type of the search. Bed/Bedset" + ) + count: Mapped[int] = mapped_column(default=0, comment="Number of searches") + + date_from: Mapped[datetime.datetime] = mapped_column(comment="Date from") + date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to") + + class BaseEngine: """ A class with base methods, that are used in several classes. diff --git a/bbconf/models/base_models.py b/bbconf/models/base_models.py index 91e4e5d6..1cb4e7d1 100644 --- a/bbconf/models/base_models.py +++ b/bbconf/models/base_models.py @@ -1,4 +1,5 @@ -from typing import List, Optional, Union +from typing import List, Optional, Union, Dict +import datetime from pydantic import BaseModel, ConfigDict, Field @@ -22,3 +23,34 @@ class StatsReturn(BaseModel): bedfiles_number: int = 0 bedsets_number: int = 0 genomes_number: int = 0 + + +# class UsageModel(BaseModel): +# """ +# Usage model. Used to track usage of the bedbase. +# """ +# +# event: str +# bed_id: Optional[Union[str, None]] = None +# bedset_id: Optional[Union[str, None]] = None +# query: Optional[Union[str, None]] = None +# file_name: Optional[Union[str, None]] = None +# +# ipaddress: str +# user_agent: str + + +class UsageModel(BaseModel): + """ + Usage model. Used to track usage of the bedbase. + """ + + bed_meta: Union[dict, None] = Dict[str, int] + bedset_meta: Union[dict, None] = Dict[str, int] + + bed_search: Union[dict, None] = Dict[str, int] + bedset_search: Union[dict, None] = Dict[str, int] + files: Union[dict, None] = Dict[str, int] + + date_from: datetime.datetime + date_to: Union[datetime.datetime, None] = None diff --git a/bbconf/utils/usage_events.csv b/bbconf/utils/usage_events.csv new file mode 100644 index 00000000..a1e8dc79 --- /dev/null +++ b/bbconf/utils/usage_events.csv @@ -0,0 +1,7 @@ +id,event +1,bed_search +2,bedset_search +3,bed_metadata +4,bedset_metadata +5,bed_files +6,bedset_files \ No newline at end of file diff --git a/tests/test_common.py b/tests/test_common.py index 45ad8af5..a86051ea 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1,9 +1,11 @@ import pytest from bbconf.const import DEFAULT_LICENSE +from bbconf.models.base_models import UsageModel +from bbconf.exceptions import BedBaseConfError from .conftest import SERVICE_UNAVAILABLE -from .utils import ContextManagerDBTesting +from .utils import ContextManagerDBTesting, BED_TEST_ID, BEDSET_TEST_ID @pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="Database is not available") @@ -23,3 +25,99 @@ def test_get_licenses(bbagent_obj): assert return_result assert DEFAULT_LICENSE in return_result + + +# +# @pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="Database is not available") +# class TestAddUsage: +# def test_add_bed_search(self, bbagent_obj): +# +# usage = UsageModel( +# event="bed_search", +# query="test", +# ipaddress="123.09.09.123", +# user_agent="test-agent", +# ) +# +# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): +# bbagent_obj.add_usage(usage) +# +# def test_add_bedset_search(self, bbagent_obj): +# +# usage = UsageModel( +# event="bedset_search", +# query="test", +# ipaddress="12345", +# user_agent="test-agent", +# ) +# +# with ContextManagerDBTesting( +# config=bbagent_obj.config, add_data=True, bedset=True +# ): +# bbagent_obj.add_usage(usage) +# +# def test_add_bedset_meta(self, bbagent_obj): +# usage = UsageModel( +# event="bedset_metadata", +# query=None, +# bedset_id=BEDSET_TEST_ID, +# ipaddress="1234", +# user_agent="test-agent", +# ) +# +# with ContextManagerDBTesting( +# config=bbagent_obj.config, add_data=True, bedset=True +# ): +# bbagent_obj.add_usage(usage) +# +# def test_add_bed_meta(self, bbagent_obj): +# usage = UsageModel( +# event="bed_metadata", +# query=None, +# bed_id=BED_TEST_ID, +# ipaddress="1234", +# user_agent="test-agent", +# ) +# +# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): +# bbagent_obj.add_usage(usage) +# +# def test_add_bedset_meta_error(self, bbagent_obj): +# usage = UsageModel( +# event="bedset_metadata", +# query=None, +# bedset_id="error", +# ipaddress="1234", +# user_agent="test-agent", +# ) +# +# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): +# +# with pytest.raises(BedBaseConfError): +# bbagent_obj.add_usage(usage) +# +# def test_add_incorrect_event(self, bbagent_obj): +# usage = UsageModel( +# event="bed_metadata", +# query=None, +# bed_id="error", +# ipaddress="1234", +# user_agent="test-agent", +# ) +# +# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): +# with pytest.raises(BedBaseConfError): +# bbagent_obj.add_usage(usage) +# +# def test_add_bed_file(self, bbagent_obj): +# usage = UsageModel( +# event="bed_files", +# query=None, +# bed_id=BED_TEST_ID, +# file_name="test_file", +# ipaddress="1234", +# user_agent="test-agent", +# ) +# +# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): +# bbagent_obj.add_usage(usage) From 18d932bfc8b74eb527eda24d26091982c32713a8 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 17 Feb 2025 15:57:06 -0500 Subject: [PATCH 2/3] naming changes --- bbconf/bbagent.py | 46 +++----------- bbconf/db_utils.py | 61 +++---------------- bbconf/models/base_models.py | 15 ----- tests/test_common.py | 112 ++++++----------------------------- 4 files changed, 35 insertions(+), 199 deletions(-) diff --git a/bbconf/bbagent.py b/bbconf/bbagent.py index fcc8999a..c91cd292 100644 --- a/bbconf/bbagent.py +++ b/bbconf/bbagent.py @@ -12,10 +12,10 @@ Bed, BedSets, License, - StatsBedSetMeta, - StatsBedMeta, - StatsFiles, - StatsSearch, + UsageBedSetMeta, + UsageBedMeta, + UsageFiles, + UsageSearch, ) from bbconf.models.base_models import StatsReturn, UsageModel from bbconf.modules.bedfiles import BedAgentBedFile @@ -110,39 +110,11 @@ def list_of_licenses(self) -> List[str]: licenses = session.execute(statement).all() return [result[0] for result in licenses] - # def add_usage(self, usage: UsageModel) -> None: - # """ - # Add usage to the database - # - # :param usage: usage model - # """ - # - # event_name = usage.event - # usage = usage.model_dump( - # exclude_unset=True, exclude_defaults=True, exclude={"event"} - # ) - # - # try: - # with Session(self.config.db_engine.engine) as session: - # - # event = session.scalar( - # select(UsageEvent).where(UsageEvent.event == event_name) - # ) - # - # session.add(UsageStats(**usage, event_mapper=event)) - # session.commit() - # - # _LOGGER.debug(f"Usage added: {usage}") - # except IntegrityError as e: - # _LOGGER.error(f"Error adding usage: {e}") - # _LOGGER.error(f"Usage: {usage}") - # raise BedBaseConfError("Error adding usage") - def add_usage(self, stats: UsageModel) -> None: with Session(self.config.db_engine.engine) as session: for key, value in stats.files.items(): - new_stats = StatsFiles( + new_stats = UsageFiles( file_path=key, count=value, date_from=stats.date_from, @@ -151,7 +123,7 @@ def add_usage(self, stats: UsageModel) -> None: session.add(new_stats) for key, value in stats.bed_meta.items(): - new_stats = StatsBedMeta( + new_stats = UsageBedMeta( bed_id=key, count=value, date_from=stats.date_from, @@ -160,7 +132,7 @@ def add_usage(self, stats: UsageModel) -> None: session.add(new_stats) for key, value in stats.bedset_meta.items(): - new_stats = StatsBedSetMeta( + new_stats = UsageBedSetMeta( bedset_id=key, count=value, date_from=stats.date_from, @@ -169,7 +141,7 @@ def add_usage(self, stats: UsageModel) -> None: session.add(new_stats) for key, value in stats.bed_search.items(): - new_stats = StatsSearch( + new_stats = UsageSearch( query=key, count=value, type="bed", @@ -179,7 +151,7 @@ def add_usage(self, stats: UsageModel) -> None: session.add(new_stats) for key, value in stats.bedset_search.items(): - new_stats = StatsSearch( + new_stats = UsageSearch( query=key, count=value, type="bedset", diff --git a/bbconf/db_utils.py b/bbconf/db_utils.py index f2be7aaf..6424c19b 100644 --- a/bbconf/db_utils.py +++ b/bbconf/db_utils.py @@ -474,53 +474,8 @@ class GeoGsmStatus(Base): ) -# class UsageEvent(Base): -# __tablename__ = "usage_events" -# -# id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) -# event: Mapped[str] = mapped_column(nullable=False, comment="Event name") -# -# usage_stats: Mapped[List["UsageStats"]] = relationship( -# "UsageStats", back_populates="event_mapper" -# ) - - -# class UsageStats(Base): -# __tablename__ = "usage_stats" -# -# id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) -# event: Mapped[int] = mapped_column( -# ForeignKey("usage_events.id", ondelete="CASCADE"), nullable=False, index=True -# ) -# -# bed_id: Mapped[str] = mapped_column( -# ForeignKey("bed.id", ondelete="CASCADE"), nullable=True, index=True -# ) -# bedset_id: Mapped[str] = mapped_column( -# ForeignKey("bedsets.id", ondelete="CASCADE"), nullable=True, index=True -# ) -# query: Mapped[str] = mapped_column(nullable=True, comment="Search query if any") -# -# file_name: Mapped[str] = mapped_column(nullable=True, comment="File name if any") -# -# timestamp: Mapped[datetime.datetime] = mapped_column( -# default=deliver_update_date, onupdate=deliver_update_date -# ) -# -# ipaddress: Mapped[str] = mapped_column( -# nullable=True, comment="IP address of the user" -# ) -# user_agent: Mapped[str] = mapped_column( -# nullable=True, comment="User agent of the user" -# ) -# -# event_mapper: Mapped["UsageEvent"] = relationship( -# "UsageEvent", back_populates="usage_stats" -# ) - - -class StatsBedMeta(Base): - __tablename__ = "stats_bed_meta" +class UsageBedMeta(Base): + __tablename__ = "usage_bed_meta" id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) @@ -534,8 +489,8 @@ class StatsBedMeta(Base): date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to") -class StatsBedSetMeta(Base): - __tablename__ = "stats_bedset_meta" +class UsageBedSetMeta(Base): + __tablename__ = "usage_bedset_meta" id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) @@ -548,8 +503,8 @@ class StatsBedSetMeta(Base): date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to") -class StatsFiles(Base): - __tablename__ = "stats_files" +class UsageFiles(Base): + __tablename__ = "usage_files" id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) file_path: Mapped[str] = mapped_column(nullable=False, comment="Path to the file") @@ -559,8 +514,8 @@ class StatsFiles(Base): date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to") -class StatsSearch(Base): - __tablename__ = "stats_search" +class UsageSearch(Base): + __tablename__ = "usage_search" id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True) query: Mapped[str] = mapped_column(nullable=False, comment="Search query") diff --git a/bbconf/models/base_models.py b/bbconf/models/base_models.py index 1cb4e7d1..5329d7e9 100644 --- a/bbconf/models/base_models.py +++ b/bbconf/models/base_models.py @@ -25,21 +25,6 @@ class StatsReturn(BaseModel): genomes_number: int = 0 -# class UsageModel(BaseModel): -# """ -# Usage model. Used to track usage of the bedbase. -# """ -# -# event: str -# bed_id: Optional[Union[str, None]] = None -# bedset_id: Optional[Union[str, None]] = None -# query: Optional[Union[str, None]] = None -# file_name: Optional[Union[str, None]] = None -# -# ipaddress: str -# user_agent: str - - class UsageModel(BaseModel): """ Usage model. Used to track usage of the bedbase. diff --git a/tests/test_common.py b/tests/test_common.py index a86051ea..d7921e9b 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -3,6 +3,7 @@ from bbconf.const import DEFAULT_LICENSE from bbconf.models.base_models import UsageModel from bbconf.exceptions import BedBaseConfError +import datetime from .conftest import SERVICE_UNAVAILABLE from .utils import ContextManagerDBTesting, BED_TEST_ID, BEDSET_TEST_ID @@ -27,97 +28,20 @@ def test_get_licenses(bbagent_obj): assert DEFAULT_LICENSE in return_result -# -# @pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="Database is not available") -# class TestAddUsage: -# def test_add_bed_search(self, bbagent_obj): -# -# usage = UsageModel( -# event="bed_search", -# query="test", -# ipaddress="123.09.09.123", -# user_agent="test-agent", -# ) -# -# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): -# bbagent_obj.add_usage(usage) -# -# def test_add_bedset_search(self, bbagent_obj): -# -# usage = UsageModel( -# event="bedset_search", -# query="test", -# ipaddress="12345", -# user_agent="test-agent", -# ) -# -# with ContextManagerDBTesting( -# config=bbagent_obj.config, add_data=True, bedset=True -# ): -# bbagent_obj.add_usage(usage) -# -# def test_add_bedset_meta(self, bbagent_obj): -# usage = UsageModel( -# event="bedset_metadata", -# query=None, -# bedset_id=BEDSET_TEST_ID, -# ipaddress="1234", -# user_agent="test-agent", -# ) -# -# with ContextManagerDBTesting( -# config=bbagent_obj.config, add_data=True, bedset=True -# ): -# bbagent_obj.add_usage(usage) -# -# def test_add_bed_meta(self, bbagent_obj): -# usage = UsageModel( -# event="bed_metadata", -# query=None, -# bed_id=BED_TEST_ID, -# ipaddress="1234", -# user_agent="test-agent", -# ) -# -# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): -# bbagent_obj.add_usage(usage) -# -# def test_add_bedset_meta_error(self, bbagent_obj): -# usage = UsageModel( -# event="bedset_metadata", -# query=None, -# bedset_id="error", -# ipaddress="1234", -# user_agent="test-agent", -# ) -# -# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): -# -# with pytest.raises(BedBaseConfError): -# bbagent_obj.add_usage(usage) -# -# def test_add_incorrect_event(self, bbagent_obj): -# usage = UsageModel( -# event="bed_metadata", -# query=None, -# bed_id="error", -# ipaddress="1234", -# user_agent="test-agent", -# ) -# -# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): -# with pytest.raises(BedBaseConfError): -# bbagent_obj.add_usage(usage) -# -# def test_add_bed_file(self, bbagent_obj): -# usage = UsageModel( -# event="bed_files", -# query=None, -# bed_id=BED_TEST_ID, -# file_name="test_file", -# ipaddress="1234", -# user_agent="test-agent", -# ) -# -# with ContextManagerDBTesting(config=bbagent_obj.config, add_data=True): -# bbagent_obj.add_usage(usage) +@pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="Database is not available") +class TestAddUsage: + def test_add_usages(self, bbagent_obj): + usage = UsageModel( + bed_meta={BED_TEST_ID: 3}, + bedset_meta={BEDSET_TEST_ID: 4}, + bed_search={"ff": 2}, + bedset_search={"ase": 1}, + files={"bin.bed.gz": 432}, + date_from=datetime.datetime.now(), + date_to=datetime.datetime.now(), + ) + + with ContextManagerDBTesting( + config=bbagent_obj.config, add_data=True, bedset=True + ): + bbagent_obj.add_usage(usage) From 141ef2d7d9b672d231598690e2230a6fd0682c10 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 18 Feb 2025 13:36:52 -0500 Subject: [PATCH 3/3] added detailed stats --- bbconf/bbagent.py | 41 +++++++++++++++++++++++++++++++++--- bbconf/models/base_models.py | 6 ++++++ manual_testing.py | 12 +++++++++-- 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/bbconf/bbagent.py b/bbconf/bbagent.py index c91cd292..87ec8cce 100644 --- a/bbconf/bbagent.py +++ b/bbconf/bbagent.py @@ -5,7 +5,6 @@ from sqlalchemy.orm import Session from sqlalchemy.sql import distinct, func, select -from sqlalchemy.exc import IntegrityError from bbconf.config_parser.bedbaseconfig import BedBaseConfig from bbconf.db_utils import ( @@ -17,11 +16,10 @@ UsageFiles, UsageSearch, ) -from bbconf.models.base_models import StatsReturn, UsageModel +from bbconf.models.base_models import StatsReturn, UsageModel, FileStats from bbconf.modules.bedfiles import BedAgentBedFile from bbconf.modules.bedsets import BedAgentBedSet from bbconf.modules.objects import BBObjects -from bbconf.exceptions import BedBaseConfError from .const import PKG_NAME @@ -87,6 +85,43 @@ def get_stats(self) -> StatsReturn: genomes_number=number_of_genomes, ) + def get_detailed_stats(self) -> FileStats: + """ + Get comprehensive statistics for all bed files + + """ + with Session(self.config.db_engine.engine) as session: + file_types = { + f[0]: f[1] + for f in session.execute( + select(Bed.bed_type, func.count(Bed.bed_type)).group_by( + Bed.bed_type + ) + ).all() + } + file_formats = { + f[0]: f[1] + for f in session.execute( + select(Bed.bed_format, func.count(Bed.bed_format)).group_by( + Bed.bed_format + ) + ).all() + } + file_genomes = { + f[0]: f[1] + for f in session.execute( + select(Bed.genome_alias, func.count(Bed.genome_alias)).group_by( + Bed.genome_alias + ) + ).all() + } + + return FileStats( + file_type=file_types, + file_format=file_formats, + file_genome=file_genomes, + ) + def get_list_genomes(self) -> List[str]: """ Get list of genomes from the database diff --git a/bbconf/models/base_models.py b/bbconf/models/base_models.py index 5329d7e9..63c96053 100644 --- a/bbconf/models/base_models.py +++ b/bbconf/models/base_models.py @@ -25,6 +25,12 @@ class StatsReturn(BaseModel): genomes_number: int = 0 +class FileStats(BaseModel): + file_type: Dict[str, int] + file_format: Dict[str, int] + file_genome: Dict[str, int] + + class UsageModel(BaseModel): """ Usage model. Used to track usage of the bedbase. diff --git a/manual_testing.py b/manual_testing.py index 2a732c06..f226b2f1 100644 --- a/manual_testing.py +++ b/manual_testing.py @@ -209,6 +209,14 @@ def config_t(): print(is_valid) +def compreh_stats(): + from bbconf import BedBaseAgent + + agent = BedBaseAgent(config="/home/bnt4me/virginia/repos/bedhost/config.yaml") + results = agent.get_file_stats() + results + + if __name__ == "__main__": # zarr_s3() # add_s3() @@ -218,5 +226,5 @@ def config_t(): # get_id_plots_missing() # neighbour_beds() # sql_search() - - config_t() + # config_t() + compreh_stats()