Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added user usages stats, and platform stats #80

Merged
merged 3 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ include bbconf/schemas/*
include bbconf/modules/*
include bbconf/config_parser/*
include bbconf/models/*
include bbconf/utils/*
101 changes: 99 additions & 2 deletions bbconf/bbagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,16 @@
from sqlalchemy.sql import distinct, func, select

from bbconf.config_parser.bedbaseconfig import BedBaseConfig
from bbconf.db_utils import Bed, BedSets, License
from bbconf.models.base_models import StatsReturn
from bbconf.db_utils import (
Bed,
BedSets,
License,
UsageBedSetMeta,
UsageBedMeta,
UsageFiles,
UsageSearch,
)
from bbconf.models.base_models import StatsReturn, UsageModel, FileStats
from bbconf.modules.bedfiles import BedAgentBedFile
from bbconf.modules.bedsets import BedAgentBedSet
from bbconf.modules.objects import BBObjects
Expand Down Expand Up @@ -77,6 +85,43 @@ def get_stats(self) -> StatsReturn:
genomes_number=number_of_genomes,
)

def get_detailed_stats(self) -> FileStats:
"""
Get comprehensive statistics for all bed files

"""
with Session(self.config.db_engine.engine) as session:
file_types = {
f[0]: f[1]
for f in session.execute(
select(Bed.bed_type, func.count(Bed.bed_type)).group_by(
Bed.bed_type
)
).all()
}
file_formats = {
f[0]: f[1]
for f in session.execute(
select(Bed.bed_format, func.count(Bed.bed_format)).group_by(
Bed.bed_format
)
).all()
}
file_genomes = {
f[0]: f[1]
for f in session.execute(
select(Bed.genome_alias, func.count(Bed.genome_alias)).group_by(
Bed.genome_alias
)
).all()
}

return FileStats(
file_type=file_types,
file_format=file_formats,
file_genome=file_genomes,
)

def get_list_genomes(self) -> List[str]:
"""
Get list of genomes from the database
Expand All @@ -99,3 +144,55 @@ def list_of_licenses(self) -> List[str]:
with Session(self.config.db_engine.engine) as session:
licenses = session.execute(statement).all()
return [result[0] for result in licenses]

def add_usage(self, stats: UsageModel) -> None:

with Session(self.config.db_engine.engine) as session:
for key, value in stats.files.items():
new_stats = UsageFiles(
file_path=key,
count=value,
date_from=stats.date_from,
date_to=stats.date_to,
)
session.add(new_stats)

for key, value in stats.bed_meta.items():
new_stats = UsageBedMeta(
bed_id=key,
count=value,
date_from=stats.date_from,
date_to=stats.date_to,
)
session.add(new_stats)

for key, value in stats.bedset_meta.items():
new_stats = UsageBedSetMeta(
bedset_id=key,
count=value,
date_from=stats.date_from,
date_to=stats.date_to,
)
session.add(new_stats)

for key, value in stats.bed_search.items():
new_stats = UsageSearch(
query=key,
count=value,
type="bed",
date_from=stats.date_from,
date_to=stats.date_to,
)
session.add(new_stats)

for key, value in stats.bedset_search.items():
new_stats = UsageSearch(
query=key,
count=value,
type="bedset",
date_from=stats.date_from,
date_to=stats.date_to,
)
session.add(new_stats)

session.commit()
55 changes: 55 additions & 0 deletions bbconf/db_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import logging
import os
from typing import List, Optional

import pandas as pd
Expand Down Expand Up @@ -473,6 +474,60 @@ class GeoGsmStatus(Base):
)


class UsageBedMeta(Base):
__tablename__ = "usage_bed_meta"

id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True)

bed_id: Mapped[str] = mapped_column(
ForeignKey("bed.id", ondelete="CASCADE"), nullable=True, index=True
)

count: Mapped[int] = mapped_column(default=0, comment="Number of visits")

date_from: Mapped[datetime.datetime] = mapped_column(comment="Date from")
date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to")


class UsageBedSetMeta(Base):
__tablename__ = "usage_bedset_meta"

id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True)

bedset_id: Mapped[str] = mapped_column(
ForeignKey("bedsets.id", ondelete="CASCADE"), nullable=True, index=True
)
count: Mapped[int] = mapped_column(default=0, comment="Number of visits")

date_from: Mapped[datetime.datetime] = mapped_column(comment="Date from")
date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to")


class UsageFiles(Base):
__tablename__ = "usage_files"

id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True)
file_path: Mapped[str] = mapped_column(nullable=False, comment="Path to the file")
count: Mapped[int] = mapped_column(default=0, comment="Number of downloads")

date_from: Mapped[datetime.datetime] = mapped_column(comment="Date from")
date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to")


class UsageSearch(Base):
__tablename__ = "usage_search"

id: Mapped[int] = mapped_column(primary_key=True, index=True, autoincrement=True)
query: Mapped[str] = mapped_column(nullable=False, comment="Search query")
type: Mapped[str] = mapped_column(
nullable=False, comment="Type of the search. Bed/Bedset"
)
count: Mapped[int] = mapped_column(default=0, comment="Number of searches")

date_from: Mapped[datetime.datetime] = mapped_column(comment="Date from")
date_to: Mapped[datetime.datetime] = mapped_column(comment="Date to")


class BaseEngine:
"""
A class with base methods, that are used in several classes.
Expand Down
25 changes: 24 additions & 1 deletion bbconf/models/base_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import List, Optional, Union
from typing import List, Optional, Union, Dict
import datetime

from pydantic import BaseModel, ConfigDict, Field

Expand All @@ -22,3 +23,25 @@ class StatsReturn(BaseModel):
bedfiles_number: int = 0
bedsets_number: int = 0
genomes_number: int = 0


class FileStats(BaseModel):
file_type: Dict[str, int]
file_format: Dict[str, int]
file_genome: Dict[str, int]


class UsageModel(BaseModel):
"""
Usage model. Used to track usage of the bedbase.
"""

bed_meta: Union[dict, None] = Dict[str, int]
bedset_meta: Union[dict, None] = Dict[str, int]

bed_search: Union[dict, None] = Dict[str, int]
bedset_search: Union[dict, None] = Dict[str, int]
files: Union[dict, None] = Dict[str, int]

date_from: datetime.datetime
date_to: Union[datetime.datetime, None] = None
7 changes: 7 additions & 0 deletions bbconf/utils/usage_events.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id,event
1,bed_search
2,bedset_search
3,bed_metadata
4,bedset_metadata
5,bed_files
6,bedset_files
12 changes: 10 additions & 2 deletions manual_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,14 @@ def config_t():
print(is_valid)


def compreh_stats():
from bbconf import BedBaseAgent

agent = BedBaseAgent(config="/home/bnt4me/virginia/repos/bedhost/config.yaml")
results = agent.get_file_stats()
results


if __name__ == "__main__":
# zarr_s3()
# add_s3()
Expand All @@ -218,5 +226,5 @@ def config_t():
# get_id_plots_missing()
# neighbour_beds()
# sql_search()

config_t()
# config_t()
compreh_stats()
24 changes: 23 additions & 1 deletion tests/test_common.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import pytest

from bbconf.const import DEFAULT_LICENSE
from bbconf.models.base_models import UsageModel
from bbconf.exceptions import BedBaseConfError
import datetime

from .conftest import SERVICE_UNAVAILABLE
from .utils import ContextManagerDBTesting
from .utils import ContextManagerDBTesting, BED_TEST_ID, BEDSET_TEST_ID


@pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="Database is not available")
Expand All @@ -23,3 +26,22 @@ def test_get_licenses(bbagent_obj):

assert return_result
assert DEFAULT_LICENSE in return_result


@pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="Database is not available")
class TestAddUsage:
def test_add_usages(self, bbagent_obj):
usage = UsageModel(
bed_meta={BED_TEST_ID: 3},
bedset_meta={BEDSET_TEST_ID: 4},
bed_search={"ff": 2},
bedset_search={"ase": 1},
files={"bin.bed.gz": 432},
date_from=datetime.datetime.now(),
date_to=datetime.datetime.now(),
)

with ContextManagerDBTesting(
config=bbagent_obj.config, add_data=True, bedset=True
):
bbagent_obj.add_usage(usage)