Skip to content

Commit

Permalink
Merge branch 'main' into feat/srm-runs-detail-and-librarylinking
Browse files Browse the repository at this point in the history
  • Loading branch information
raylrui committed Feb 25, 2025
2 parents a2c723f + 8f571dd commit a77f46b
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 51 deletions.
5 changes: 5 additions & 0 deletions config/stacks/htsget.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ import {
vpcProps,
} from '../constants';
import { HtsgetStackConfigurableProps } from '../../lib/workload/stateless/stacks/htsget/stack';
import { fileManagerBuckets, fileManagerInventoryBuckets } from './fileManager';

export const getHtsgetProps = (stage: AppStage): HtsgetStackConfigurableProps => {
const inventorySourceBuckets = fileManagerInventoryBuckets(stage);
const eventSourceBuckets = fileManagerBuckets(stage);

return {
vpcProps,
apiGatewayCognitoProps: {
Expand All @@ -17,5 +21,6 @@ export const getHtsgetProps = (stage: AppStage): HtsgetStackConfigurableProps =>
apiName: 'Htsget',
customDomainNamePrefix: 'htsget-file',
},
buckets: [...inventorySourceBuckets, ...eventSourceBuckets],
};
};
24 changes: 0 additions & 24 deletions lib/workload/stateless/stacks/htsget/deploy.toml

This file was deleted.

26 changes: 20 additions & 6 deletions lib/workload/stateless/stacks/htsget/stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ import { Stack, StackProps } from 'aws-cdk-lib';
import { Role } from 'aws-cdk-lib/aws-iam';
import { IVpc, Vpc, VpcLookupOptions } from 'aws-cdk-lib/aws-ec2';
import { ApiGatewayConstruct, ApiGatewayConstructProps } from '../../../components/api-gateway';
import path from 'path';
import { HtsgetLambdaConstruct } from 'htsget-lambda';
import { HtsgetLambda } from 'htsget-lambda';

/**
* Configurable props for the htsget stack.
Expand All @@ -18,6 +17,10 @@ export type HtsgetStackConfigurableProps = {
* API gateway construct props.
*/
apiGatewayCognitoProps: ApiGatewayConstructProps;
/**
* The buckets to configure for htsget access.
*/
buckets: string[];
};

/**
Expand All @@ -43,13 +46,24 @@ export class HtsgetStack extends Stack {
this.vpc = Vpc.fromLookup(this, 'MainVpc', props.vpcProps);
this.apiGateway = new ApiGatewayConstruct(this, 'ApiGateway', props.apiGatewayCognitoProps);

const configPath = path.join(__dirname, 'deploy.toml');
new HtsgetLambdaConstruct(this, 'Htsget', {
config: configPath,
new HtsgetLambda(this, 'Htsget', {
htsgetConfig: {
environment_override: {
HTSGET_LOCATIONS: props.buckets.map((bucket) => {
let regex = `^${bucket}/(?P<key>.*)$`;
let substitution_string = '$key';
let backend = `{ kind=S3, bucket=${bucket} }`;

return `{ regex=${regex}, substitution_string=${substitution_string}, backend=${backend} }`;
}),
},
},
cargoLambdaFlags: ['--features', 'aws'],
vpc: this.vpc,
role: props.role,
httpApi: this.apiGateway.httpApi,
gitReference: 'htsget-lambda-v0.5.2',
gitReference: 'htsget-lambda-v0.6.0',
gitForceClone: false,
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import json

from django.core.management import BaseCommand

from django.db.models import Q

from app.models import Library


# https://docs.djangoproject.com/en/5.0/howto/custom-management-commands/
class Command(BaseCommand):
help = "Delete all DB data"

def add_arguments(self, parser):
parser.add_argument(
"--dry-run",
action="store_true",
help="List all libraries that will be deleted without actually deleting them",
)

def handle(self, *args, **options):
all_libraries = Library.objects.all().filter(
Q(library_id__icontains="_rerun") | Q(library_id__icontains="_topup"))

print("Libraries contain matching pattern:")
print(json.dumps([library.library_id for library in all_libraries], indent=4))

if not options["dry_run"]:
print("Deleting all libraries")
all_libraries.delete()
else:
print("Dry run: not deleting libraries")

print('Completed')
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import path from 'path';
import { Construct } from 'constructs';
import { Duration } from 'aws-cdk-lib';
import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha';
import { ISecret } from 'aws-cdk-lib/aws-secretsmanager';
import {
DockerImageFunction,
DockerImageFunctionProps,
DockerImageCode,
} from 'aws-cdk-lib/aws-lambda';

type LambdaProps = {
/**
* The basic common lambda properties that it should inherit from
*/
basicLambdaConfig: Partial<DockerImageFunctionProps>;
/**
* The secret for the db connection where the lambda will need access to
*/
dbConnectionSecret: ISecret;
};

export class LambdaDjangoCommandConstruct extends Construct {
readonly lambda: PythonFunction;

constructor(scope: Construct, id: string, lambdaProps: LambdaProps) {
super(scope, id);

this.lambda = new DockerImageFunction(this, 'DjangoCommandLambda', {
environment: {
...lambdaProps.basicLambdaConfig.environment,
},
securityGroups: lambdaProps.basicLambdaConfig.securityGroups,
vpc: lambdaProps.basicLambdaConfig.vpc,
vpcSubnets: lambdaProps.basicLambdaConfig.vpcSubnets,
architecture: lambdaProps.basicLambdaConfig.architecture,
code: DockerImageCode.fromImageAsset(path.join(__dirname, '../../../'), {
file: 'deploy/construct/lambda-django-command/lambda.Dockerfile',
}),
timeout: Duration.minutes(15),
memorySize: 4096,
});

lambdaProps.dbConnectionSecret.grantRead(this.lambda);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM public.ecr.aws/lambda/python:3.12

WORKDIR ${LAMBDA_TASK_ROOT}

# COPY all files
COPY . .

# Install the specified packages
RUN pip install -r deps/requirements-full.txt

# Specify handler
CMD [ "handler.django_command.handler" ]
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { LambdaAPIConstruct } from './construct/lambda-api';
import { ApiGatewayConstructProps } from '../../../../components/api-gateway';
import { PostgresManagerStack } from '../../../../stateful/stacks/postgres-manager/deploy/stack';
import { LambdaLoadCustomCSVConstruct } from './construct/lambda-load-custom-csv';
import { LambdaDjangoCommandConstruct } from './construct/lambda-django-command';

export type MetadataManagerStackProps = {
/**
Expand Down Expand Up @@ -89,6 +90,11 @@ export class MetadataManagerStack extends Stack {
vpc: vpc,
});

new LambdaDjangoCommandConstruct(this, 'DjangoCommandLambda', {
basicLambdaConfig: basicLambdaConfig,
dbConnectionSecret: dbSecret,
});

const syncGsheetLambda = new LambdaSyncGsheetConstruct(this, 'SyncGsheetLambda', {
basicLambdaConfig: basicLambdaConfig,
dbConnectionSecret: dbSecret,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
"""migrate lambda module
Convenience AWS lambda handler for Django database migration command hook
"""
import json
import logging
from django.core.management import execute_from_command_line

logger = logging.getLogger()
logger.setLevel(logging.INFO)


def handler(event, context) -> dict[str, str]:
logger.info(f"Processing event: {json.dumps(event, indent=4)}")

command = event.get("command", None)
args = event.get("args", [])

whitelist_command = ["clean_duplicated_libraries"]

if command not in whitelist_command:
raise ValueError(f"Command {command} not accepted")


res = execute_from_command_line(["./manage.py", command, *args])

return res
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def clean_model_history(minutes: int = None):
call_command("clean_duplicate_history", "--auto", minutes=minutes, stdout=open(os.devnull, 'w'))


def sanitize_lab_metadata_df(df: pd.DataFrame):
def sanitize_lab_metadata_df(df: pd.DataFrame) -> pd.DataFrame:
"""
sanitize record by renaming columns, and clean df cells
"""
Expand All @@ -37,6 +37,11 @@ def sanitize_lab_metadata_df(df: pd.DataFrame):
# dropping column that has empty column heading
df = df.drop('', axis='columns', errors='ignore')

# We are now removing and '_rerun' or '_topup' postfix from libraries
# See https://github.com/umccr/orcabus/issues/865
df['library_id'] = df['library_id'].str.replace(r'_rerun\d*$', '', regex=True)
df['library_id'] = df['library_id'].str.replace(r'_topup\d*$', '', regex=True)

df = df.reset_index(drop=True)
return df

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import json
import pandas as pd
from django.db.models import Q
from django.utils.timezone import override
from libumccr.aws import libeb

from unittest.mock import MagicMock
Expand All @@ -11,6 +13,7 @@
from proc.service.tracking_sheet_srv import sanitize_lab_metadata_df, persist_lab_metadata, \
drop_incomplete_tracking_sheet_records
from .utils import check_put_event_entries_format, check_put_event_value, is_expected_event_in_output
from ..service.utils import warn_drop_duplicated_library

TEST_EVENT_BUS_NAME = "TEST_BUS"

Expand Down Expand Up @@ -173,6 +176,50 @@ def test_persist_lab_metadata(self):
ctc = prj.contact_set.get(contact_id=rec.get("ProjectOwner"))
self.assertEqual(ctc.contact_id, rec.get("ProjectOwner"), 'incorrect project-contact link')

def test_rerun_topup_libraries(self) -> None:
"""
python manage.py test proc.tests.test_tracking_sheet_srv.TrackingSheetSrvUnitTests.test_rerun_topup_libraries
we don't want to treat any topup / rerun libraries as a new record
"""

# Prepare the initial data with a topup and rerun libraries
final_records = [RECORD_1]

# topup record
topup_record = RECORD_1.copy()
topup_record['LibraryID'] = topup_record['LibraryID'] + '_topup'
final_records.append(topup_record)

topup_2_record = RECORD_1.copy()
topup_2_record['LibraryID'] = topup_2_record['LibraryID'] + '_topup23'
final_records.append(topup_2_record)

# rerun record
rerun_record = RECORD_1.copy()
rerun_record['LibraryID'] = rerun_record['LibraryID'] + '_rerun'
final_records.append(rerun_record)

# Change the latest library properties to check if latest record is final stored
test_override_cycles = "TEST_123"
rerun_2_record = RECORD_1.copy()
rerun_2_record['LibraryID'] = rerun_2_record['LibraryID'] + '_rerun2342'
rerun_2_record["OverrideCycles"] = test_override_cycles
final_records.append(rerun_2_record)

metadata_pd = pd.json_normalize(final_records)
metadata_pd = sanitize_lab_metadata_df(metadata_pd)
metadata_pd = warn_drop_duplicated_library(metadata_pd)

persist_lab_metadata(metadata_pd, SHEET_YEAR)

original_lib = Library.objects.get(library_id=RECORD_1.get("LibraryID"))
self.assertIsNotNone(original_lib, "Original library should be created")
self.assertEqual(original_lib.override_cycles, test_override_cycles, "Latest record is expected to be stored")

dup_libraries = Library.objects.all().filter(Q(library_id__icontains="_rerun") | Q(library_id__icontains="_topup"))
self.assertEqual(dup_libraries.count(), 0, "Topup and rerun libraries should NOT exist")

def test_new_df_in_different_year(self) -> None:
"""
python manage.py test proc.tests.test_tracking_sheet_srv.TrackingSheetSrvUnitTests.test_new_df_in_different_year
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@
"@aws-cdk/aws-pipes-alpha": "2.177.0-alpha.0",
"@aws-cdk/aws-pipes-sources-alpha": "2.177.0-alpha.0",
"aws-cdk-lib": "2.177.0",
"cargo-lambda-cdk": "0.0.31",
"cargo-lambda-cdk": "^0.0.31",
"cdk-nag": "^2.35.3",
"constructs": "^10.4.2",
"core-js-pure": "^3.40.0",
"dotenv": "^16.4.7",
"htsget-lambda": "^0.7.2",
"htsget-lambda": "^0.8.7",
"source-map-support": "^0.5.21",
"sqs-dlq-monitoring": "^1.2.20"
},
Expand Down
Loading

0 comments on commit a77f46b

Please sign in to comment.