Skip to content

Commit

Permalink
Merge pull request #145 from databio/dev
Browse files Browse the repository at this point in the history
Release bedhost v0.7.0
  • Loading branch information
khoroshevskyi authored Oct 24, 2024
2 parents c6f3614 + 6a2380c commit acbb847
Show file tree
Hide file tree
Showing 21 changed files with 771 additions and 316 deletions.
2 changes: 1 addition & 1 deletion bedhost/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.0"
__version__ = "0.7.0"
1 change: 0 additions & 1 deletion bedhost/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ class ComponentVersions(BaseModel):


class EmbeddingModels(BaseModel):
vec2vec: str
region2vec: str
text2vec: str

Expand Down
1 change: 0 additions & 1 deletion bedhost/routers/base_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ async def service_info():
version=bedhost_version,
component_versions=all_versions,
embedding_models=EmbeddingModels(
vec2vec=bbagent.config.config.path.vec2vec,
region2vec=bbagent.config.config.path.region2vec,
text2vec=bbagent.config.config.path.text2vec,
),
Expand Down
1 change: 1 addition & 0 deletions bedhost/routers/bed_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"/example",
summary="Get example BED record metadata",
response_model=BedMetadataAll,
response_model_by_alias=False,
)
async def get_example_bed_record():
"""
Expand Down
141 changes: 78 additions & 63 deletions bedhost/routers/bedset_api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from fastapi import APIRouter, HTTPException
from fastapi import APIRouter, HTTPException, Request, Response
import logging

from bbconf.models.bedset_models import (
Expand All @@ -12,6 +12,7 @@

from ..main import bbagent
from ..const import PKG_NAME, EXAMPLE_BEDSET
from ..utils import zip_pep


router = APIRouter(prefix="/v1/bedset", tags=["bedset"])
Expand Down Expand Up @@ -63,6 +64,20 @@ async def get_bedset_metadata(
raise HTTPException(status_code=404, detail="No records found")


@router.get(
"/{bedset_id}/pep",
summary="Download PEP project for a single BEDset record",
description=f"Example\n bed_id: {EXAMPLE_BEDSET}",
)
async def get_bedset_pep(
bedset_id: str,
):
try:
return zip_pep(bbagent.bedset.get_bedset_pep(bedset_id))
except BedSetNotFoundError as _:
raise HTTPException(status_code=404, detail="No records found")


@router.get(
"/{bedset_id}/metadata/plots",
response_model=BedSetPlots,
Expand Down Expand Up @@ -108,65 +123,65 @@ async def get_bedfiles_in_bedset(
return bbagent.bedset.get_bedset_bedfiles(bedset_id)


# TODO: how are we using it?

# @router.get("/{bedset_id}/track_hub")
# async def get_track_hub_bedset(request: Request, bedset_id: str):
# """
# Generate track hub files for the BED set
# """
#
# hit = bbc.bedset.retrieve_one(bedset_id)
# name = hit.get("name", "")
#
# hub_txt = (
# f"hub \t BEDBASE_{name}\n"
# f"shortLabel \t BEDBASE_{name}\n"
# f"longLabel\t BEDBASE {name} signal tracks\n"
# f"genomesFile\t {request.url_for('get_genomes_file_bedset', bedset_id=bedset_id)}\n"
# "email\t [email protected]\n"
# "descriptionUrl\t http://www.bedbase.org/"
# )
#
# return Response(hub_txt, media_type="text/plain")


# @router.get("/{bedset_id}/track_hub_genome_file", include_in_schema=False)
# async def get_genomes_file_bedset(request: Request, bedset_id: str):
# """
# Generate genomes file for the BED set track hub
# """
#
# genome = bbc.bedset.retrieve_one(bedset_id, "genome")
#
# genome_txt = (
# f"genome\t {genome['alias']}\n"
# f"trackDb\t {request.url_for('get_trackDb_file_bedset', bedset_id=bedset_id)}"
# )
#
# return Response(genome_txt, media_type="text/plain")


# @router.get("/{md5sum}/track_hub_trackDb_file", include_in_schema=False)
# async def get_trackDb_file_bedset(request: Request, bedset_id: str):
# """
# Generate trackDb file for the BED set track hub
# """
#
# hit = bbc.select_bedfiles_for_bedset(
# bedset_id,
# metadata=True,
# )
#
# trackDb_txt = ""
# for bed in hit:
# trackDb_txt = (
# trackDb_txt + f"track\t {bed.get('name', '')}\n"
# "type\t bigBed\n"
# f"bigDataUrl\t http://data.bedbase.org/bigbed_files/{bed.get('name', '')}.bigBed\n"
# f"shortLabel\t {bed.get('name', '')}\n"
# f"longLabel\t {bed.get('description', '')}\n"
# "visibility\t full\n\n"
# )
#
# return Response(trackDb_txt, media_type="text/plain")
@router.head("/{bedset_id}/track_hub")
@router.get("/{bedset_id}/track_hub")
async def get_track_hub_bedset(request: Request, bedset_id: str):
"""
Generate track hub files for the BED set
"""

bbagent.bedset.get(bedset_id)

hub_txt = (
f"hub \t BEDBASE_{bedset_id}\n"
f"shortLabel \t BEDBASE_{bedset_id}\n"
f"longLabel\t BEDBASE {bedset_id} signal tracks\n"
f"genomesFile\t {str(request.url_for('get_genomes_file_bedset', bedset_id=bedset_id)).replace('http', 'https', 1)}\n"
"email\t [email protected]\n"
"descriptionUrl\t https://bedbase.org/"
)

return Response(hub_txt, media_type="text/plain")


@router.head("/{bedset_id}/track_hub_genome_file", include_in_schema=False)
@router.get("/{bedset_id}/track_hub_genome_file", include_in_schema=False)
async def get_genomes_file_bedset(request: Request, bedset_id: str):
"""
Generate genomes file for the BED set track hub
"""

genome = "hg38"
genome_txt = (
f"genome\t {genome}\n"
f"trackDb\t {str(request.url_for('get_trackDb_file_bedset', bedset_id=bedset_id)).replace('http', 'https', 1)}"
)

return Response(genome_txt, media_type="text/plain")


@router.head("/{bedset_id}/track_hub_trackDb_file", include_in_schema=False)
@router.get("/{bedset_id}/track_hub_trackDb_file", include_in_schema=False)
async def get_trackDb_file_bedset(bedset_id: str):
"""
Generate trackDb file for the BED set track hub
"""

hit = bbagent.bedset.get_bedset_bedfiles(bedset_id)

trackDb_txt = ""
for bed in hit.results:
metadata = bbagent.bed.get(bed.id, full=True)

if metadata.files.bigbed_file:

trackDb_txt = (
trackDb_txt + f"track\t {metadata.name}\n"
"type\t bigBed\n"
f"bigDataUrl\t {metadata.files.bigbed_file.access_methods[0].access_url.url} \n"
f"shortLabel\t {metadata.name}\n"
f"longLabel\t {metadata.description}\n"
"visibility\t full\n\n"
)

return Response(trackDb_txt, media_type="text/plain")
80 changes: 80 additions & 0 deletions bedhost/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import io
import zipfile
from datetime import date
from typing import Any, Dict

import pandas as pd
import yaml
from fastapi import Response
from peppy.const import (
CFG_SAMPLE_TABLE_KEY,
CFG_SUBSAMPLE_TABLE_KEY,
CONFIG_KEY,
NAME_KEY,
SAMPLE_RAW_DICT_KEY,
SUBSAMPLE_RAW_LIST_KEY,
)


def zip_conv_result(conv_result: dict, filename: str = "project.zip") -> Response:
"""
Given a dictionary of converted results, zip them up and return a response
## Copied from pephub/helpers.py
:param conv_result: dictionary of converted results
:param filename: name of the zip
return Response: response object
"""
mf = io.BytesIO()

with zipfile.ZipFile(mf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
for name, res in conv_result.items():
# Add file, at correct path
zf.writestr(name, str.encode(res))

# Grab ZIP file from in-memory, make response with correct MIME-type
resp = Response(
mf.getvalue(),
media_type="application/x-zip-compressed",
headers={"Content-Disposition": f"attachment;filename={filename}"},
)

return resp


def zip_pep(project: Dict[str, Any]) -> Response:
"""
Zip a project up to download
## Copied from pephub/helpers.py
:param project: peppy project to zip
"""

content_to_zip = {}
config = project[CONFIG_KEY]
project_name = config[NAME_KEY]

if project[SAMPLE_RAW_DICT_KEY] is not None:
config[CFG_SAMPLE_TABLE_KEY] = ["sample_table.csv"]
content_to_zip["sample_table.csv"] = pd.DataFrame(
project[SAMPLE_RAW_DICT_KEY]
).to_csv(index=False)

if project[SUBSAMPLE_RAW_LIST_KEY] is not None:
if not isinstance(project[SUBSAMPLE_RAW_LIST_KEY], list):
config[CFG_SUBSAMPLE_TABLE_KEY] = ["subsample_table1.csv"]
content_to_zip["subsample_table1.csv"] = pd.DataFrame(
project[SUBSAMPLE_RAW_LIST_KEY]
).to_csv(index=False)
else:
config[CFG_SUBSAMPLE_TABLE_KEY] = []
for number, file in enumerate(project[SUBSAMPLE_RAW_LIST_KEY]):
file_name = f"subsample_table{number + 1}.csv"
config[CFG_SUBSAMPLE_TABLE_KEY].append(file_name)
content_to_zip[file_name] = pd.DataFrame(file).to_csv(index=False)

content_to_zip[f"{project_name}_config.yaml"] = yaml.dump(config, indent=4)

zip_filename = project_name or f"downloaded_pep_{date.today()}"
return zip_conv_result(content_to_zip, filename=zip_filename)
16 changes: 9 additions & 7 deletions deployment/config/api-dev.bedbase.org.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
path:
remote_url_base: http://data.bedbase.org/
text2vec: "sentence-transformers/all-MiniLM-L6-v2"
region2vec: databio/r2v-pretrained-for-search
vec2vec: "databio/v2v-sentencetransformers-encode"
text2vec: 'sentence-transformers/all-MiniLM-L6-v2'
# region2vec: databio/r2v-pretrained-for-search
region2vec: databio/r2v-encode-hg38
vec2vec: 'databio/v2v-sentencetransformers-encode'
database:
host: $POSTGRES_HOST
port: 5432
Expand All @@ -13,7 +14,8 @@ qdrant:
host: $QDRANT_HOST
port: 6333
api_key: $QDRANT_API_KEY
collection: bedbase2
file_collection: bedbase2
text_collection: bed_text
server:
host: 0.0.0.0
port: 8000
Expand All @@ -28,14 +30,14 @@ phc:
tag: default
access_methods:
http:
type: "https"
type: 'https'
description: HTTP compatible path
prefix: https://data2.bedbase.org/
s3:
type: "s3"
type: 's3'
description: S3 compatible path
prefix: s3://data2.bedbase.org/
local:
type: "https"
type: 'https'
description: How to serve local files.
prefix: /static/
6 changes: 4 additions & 2 deletions deployment/config/api.bedbase.org.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
path:
remote_url_base: http://data.bedbase.org/
text2vec: "sentence-transformers/all-MiniLM-L6-v2"
region2vec: databio/r2v-pretrained-for-search
# region2vec: databio/r2v-pretrained-for-search
region2vec: databio/r2v-encode-hg38
vec2vec: "databio/v2v-sentencetransformers-encode"
database:
host: $POSTGRES_HOST
Expand All @@ -13,7 +14,8 @@ qdrant:
host: $QDRANT_HOST
port: 6333
api_key: $QDRANT_API_KEY
collection: bedbase2
file_collection: bedbase2
text_collection: bed_text
server:
host: 0.0.0.0
port: 8000
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-all.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#bbconf @ git+https://github.com/databio/bbconf.git@dev#egg=bbconf
bbconf>=0.7.1
bbconf>=0.8.0
fastapi>=0.103.0
logmuse>=0.2.7
markdown
Expand Down
4 changes: 1 addition & 3 deletions ui/bedbase-types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ export interface components {
/** BedStatsModel */
BedStatsModel: {
/** Regions No */
regions_no?: number | null;
number_of_regions?: number | null;
/** Gc Content */
gc_content?: number | null;
/** Median Tss Dist */
Expand Down Expand Up @@ -1103,8 +1103,6 @@ export interface components {
};
/** EmbeddingModels */
EmbeddingModels: {
/** Vec2Vec */
vec2vec: string;
/** Region2Vec */
region2vec: string;
/** Text2Vec */
Expand Down
Loading

0 comments on commit acbb847

Please sign in to comment.