diff --git a/source/infrastructure/lib/api/model-management.ts b/source/infrastructure/lib/api/model-management.ts index 39f5f0f4..feca62ec 100644 --- a/source/infrastructure/lib/api/model-management.ts +++ b/source/infrastructure/lib/api/model-management.ts @@ -66,6 +66,8 @@ export class ModelApi extends Construct { modelLambda.addToRolePolicy(this.iamHelper.cfnStatement); modelLambda.addToRolePolicy(this.iamHelper.stsStatement); modelLambda.addToRolePolicy(this.iamHelper.cfnStatement); + modelLambda.addToRolePolicy(this.iamHelper.serviceQuotaStatement); + modelLambda.addToRolePolicy(this.iamHelper.sagemakerModelManagementStatement); // API Gateway Lambda Integration to manage model const lambdaModelIntegration = new apigw.LambdaIntegration(modelLambda, { diff --git a/source/infrastructure/lib/shared/iam-helper.ts b/source/infrastructure/lib/shared/iam-helper.ts index cf2c9562..c841bd48 100644 --- a/source/infrastructure/lib/shared/iam-helper.ts +++ b/source/infrastructure/lib/shared/iam-helper.ts @@ -30,6 +30,8 @@ export class IAMHelper extends Construct { public secretStatement: PolicyStatement; public codePipelineStatement: PolicyStatement; public cfnStatement: PolicyStatement; + public serviceQuotaStatement: PolicyStatement; + public sagemakerModelManagementStatement: PolicyStatement; public createPolicyStatement(actions: string[], resources: string[]) { return new PolicyStatement({ @@ -79,7 +81,7 @@ export class IAMHelper extends Construct { "sagemaker:CreateEndpoint", "sagemaker:CreateEndpointConfig", "sagemaker:InvokeEndpointAsync", - "sagemaker:UpdateEndpointWeightsAndCapacities", + "sagemaker:UpdateEndpointWeightsAndCapacities" ], [`arn:${Aws.PARTITION}:sagemaker:${Aws.REGION}:${Aws.ACCOUNT_ID}:endpoint/*`], ); @@ -191,5 +193,57 @@ export class IAMHelper extends Construct { ], ["*"], ); + this.sagemakerModelManagementStatement = this.createPolicyStatement( + [ + "sagemaker:List*", + "sagemaker:ListEndpoints", + "sagemaker:DeleteModel", + "sagemaker:DeleteEndpoint", + "sagemaker:DescribeEndpoint", + "sagemaker:DeleteEndpointConfig", + "sagemaker:DescribeEndpointConfig", + "sagemaker:InvokeEndpoint", + "sagemaker:CreateModel", + "sagemaker:CreateEndpoint", + "sagemaker:CreateEndpointConfig", + "sagemaker:InvokeEndpointAsync", + "sagemaker:UpdateEndpointWeightsAndCapacities" + ], + ["*"], + ); + this.serviceQuotaStatement = this.createPolicyStatement( + [ + "autoscaling:DescribeAccountLimits", + "cloudformation:DescribeAccountLimits", + "cloudwatch:DescribeAlarmsForMetric", + "cloudwatch:DescribeAlarms", + "cloudwatch:GetMetricData", + "cloudwatch:GetMetricStatistics", + "dynamodb:DescribeLimits", + "elasticloadbalancing:DescribeAccountLimits", + "iam:GetAccountSummary", + "kinesis:DescribeLimits", + "organizations:DescribeAccount", + "organizations:DescribeOrganization", + "organizations:ListAWSServiceAccessForOrganization", + "rds:DescribeAccountAttributes", + "route53:GetAccountLimit", + "tag:GetTagKeys", + "tag:GetTagValues", + "servicequotas:GetAssociationForServiceQuotaTemplate", + "servicequotas:GetAWSDefaultServiceQuota", + "servicequotas:GetRequestedServiceQuotaChange", + "servicequotas:GetServiceQuota", + "servicequotas:GetServiceQuotaIncreaseRequestFromTemplate", + "servicequotas:ListAWSDefaultServiceQuotas", + "servicequotas:ListRequestedServiceQuotaChangeHistory", + "servicequotas:ListRequestedServiceQuotaChangeHistoryByQuota", + "servicequotas:ListServices", + "servicequotas:ListServiceQuotas", + "servicequotas:ListServiceQuotaIncreaseRequestsInTemplate", + "servicequotas:ListTagsForResource" + ], + ["*"], + ); } } diff --git a/source/lambda/etl/sfn_handler.py b/source/lambda/etl/sfn_handler.py index 94ec9b2e..c3bccf3b 100644 --- a/source/lambda/etl/sfn_handler.py +++ b/source/lambda/etl/sfn_handler.py @@ -2,124 +2,172 @@ import logging import os from datetime import datetime, timezone +from typing import Dict, List, TypedDict import boto3 -from chatbot_management import create_chatbot from constant import ExecutionStatus, IndexType, UiStatus from utils.parameter_utils import get_query_parameter +# Initialize AWS resources once client = boto3.client("stepfunctions") dynamodb = boto3.resource("dynamodb") execution_table = dynamodb.Table(os.environ.get("EXECUTION_TABLE_NAME")) -index_table = dynamodb.Table(os.environ.get("INDEX_TABLE_NAME")) chatbot_table = dynamodb.Table(os.environ.get("CHATBOT_TABLE_NAME")) model_table = dynamodb.Table(os.environ.get("MODEL_TABLE_NAME")) -embedding_endpoint = os.environ.get("EMBEDDING_ENDPOINT") +index_table = dynamodb.Table(os.environ.get("INDEX_TABLE_NAME")) sfn_arn = os.environ.get("SFN_ARN") -create_time = str(datetime.now(timezone.utc)) +# Consolidate constants at the top +CORS_HEADERS = { + "Content-Type": "application/json", + "Access-Control-Allow-Headers": "Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "*", +} + +# Initialize logging at the top level logger = logging.getLogger() logger.setLevel(logging.INFO) -def handler(event, context): - # Check the event for possible S3 created event - input_payload = {} - logger.info(event) - resp_header = { - "Content-Type": "application/json", - "Access-Control-Allow-Headers": "Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token", - "Access-Control-Allow-Origin": "*", - "Access-Control-Allow-Methods": "*", +def validate_index_type(index_type: str) -> bool: + """Validate if the provided index type is supported.""" + valid_types = [ + IndexType.QD.value, + IndexType.QQ.value, + IndexType.INTENTION.value, + ] + return index_type in valid_types + + +def get_etl_info(group_name: str, chatbot_id: str, index_type: str): + """ + Retrieve the index id, model type, and model endpoint for the given chatbot and index type. + These will be further used to perform knowledge ingestion to opensearch. + Returns: Tuple of (index_id, model_type, model_endpoint) + """ + + chatbot_item = chatbot_table.get_item( + Key={"groupName": group_name, "chatbotId": chatbot_id} + ).get("Item") + + model_item = model_table.get_item( + Key={"groupName": group_name, "modelId": f"{chatbot_id}-embedding"} + ).get("Item") + + if not (chatbot_item and model_item): + raise ValueError("Chatbot or model not found") + + model = model_item.get("parameter", {}) + specific_type_indices = ( + chatbot_item.get("indexIds", {}).get(index_type, {}).get("value", {}) + ) + + if not specific_type_indices: + raise ValueError("No indices found for the given index type") + + return ( + next(iter(specific_type_indices.values())), # First index ID + model.get("ModelType"), + model.get("ModelEndpoint"), + ) + + +def create_execution_record( + execution_id: str, input_body: Dict, sfn_execution_id: str +) -> None: + """Create execution record in DynamoDB.""" + execution_record = { + **input_body, + "sfnExecutionId": sfn_execution_id, + "executionStatus": ExecutionStatus.IN_PROGRESS.value, + "executionId": execution_id, + "uiStatus": UiStatus.ACTIVE.value, + "createTime": str(datetime.now(timezone.utc)), } + del execution_record["tableItemId"] + execution_table.put_item(Item=execution_record) + + +def handler(event: Dict, context) -> Dict: + """Main Lambda handler for ETL operations.""" + logger.info(event) - authorizer_type = event["requestContext"].get("authorizer", {}).get("authorizerType") - if authorizer_type == "lambda_authorizer": - claims = json.loads(event["requestContext"]["authorizer"]["claims"]) + try: + # Validate and extract authorization + authorizer = event["requestContext"].get("authorizer", {}) + if authorizer.get("authorizerType") != "lambda_authorizer": + raise ValueError("Invalid authorizer type") + + claims = json.loads(authorizer.get("claims", {})) if "use_api_key" in claims: group_name = get_query_parameter(event, "GroupName", "Admin") cognito_groups_list = [group_name] else: - cognito_groups = claims["cognito:groups"] - cognito_groups_list = cognito_groups.split(",") - else: - logger.error("Invalid authorizer type") + cognito_groups_list = claims["cognito:groups"].split(",") + + # Process input + input_body = json.loads(event["body"]) + index_type = input_body.get("indexType") + + if not validate_index_type(index_type): + return { + "statusCode": 400, + "headers": CORS_HEADERS, + "body": f"Invalid indexType, valid values are {', '.join([t.value for t in IndexType])}", + } + + group_name = input_body.get("groupName") or ( + "Admin" + if "Admin" in cognito_groups_list + else cognito_groups_list[0] + ) + chatbot_id = input_body.get("chatbotId", group_name.lower()) + index_id, embedding_model_type, embedding_endpoint = get_etl_info( + group_name, chatbot_id, index_type + ) + + # Update input body with processed values + input_body.update( + { + "chatbotId": chatbot_id, + "groupName": group_name, + "tableItemId": context.aws_request_id, + "indexId": index_id, + "embeddingModelType": embedding_model_type, + "embeddingEndpoint": embedding_endpoint, + } + ) + + # Start step function and create execution record + sfn_response = client.start_execution( + stateMachineArn=sfn_arn, input=json.dumps(input_body) + ) + + execution_id = context.aws_request_id + create_execution_record( + execution_id, + input_body, + sfn_response["executionArn"].split(":")[-1], + ) + return { - "statusCode": 403, - "headers": resp_header, - "body": json.dumps({"error": "Invalid authorizer type"}), + "statusCode": 200, + "headers": CORS_HEADERS, + "body": json.dumps( + { + "execution_id": execution_id, + "step_function_arn": sfn_response["executionArn"], + "input_payload": input_body, + } + ), } - # Parse the body from the event object - input_body = json.loads(event["body"]) - if "indexType" not in input_body or input_body["indexType"] not in [ - IndexType.QD.value, - IndexType.QQ.value, - IndexType.INTENTION.value, - ]: + except Exception as e: + logger.error(f"Error processing request: {str(e)}") return { - "statusCode": 400, - "headers": resp_header, - "body": ( - f"Invalid indexType, valid values are " - f"{IndexType.QD.value}, {IndexType.QQ.value}, " - f"{IndexType.INTENTION.value}" - ), + "statusCode": 500, + "headers": CORS_HEADERS, + "body": json.dumps({"error": str(e)}), } - index_type = input_body["indexType"] - group_name = "Admin" if "Admin" in cognito_groups_list else cognito_groups_list[0] - chatbot_id = input_body.get("chatbotId", group_name.lower()) - - if "indexId" in input_body: - index_id = input_body["indexId"] - else: - # Use default index id if not specified in the request - index_id = f"{chatbot_id}-qd-default" - if index_type == IndexType.QQ.value: - index_id = f"{chatbot_id}-qq-default" - elif index_type == IndexType.INTENTION.value: - index_id = f"{chatbot_id}-intention-default" - - if "tag" in input_body: - tag = input_body["tag"] - else: - tag = index_id - - input_body["indexId"] = index_id - input_body["groupName"] = group_name if "groupName" not in input_body else input_body["groupName"] - chatbot_event_body = input_body - chatbot_event_body["group_name"] = group_name - chatbot_event = {"body": json.dumps(chatbot_event_body)} - chatbot_result = create_chatbot(chatbot_event, group_name) - - input_body["tableItemId"] = context.aws_request_id - input_body["chatbotId"] = chatbot_id - input_body["embeddingModelType"] = chatbot_result["modelType"] - input_payload = json.dumps(input_body) - response = client.start_execution(stateMachineArn=sfn_arn, input=input_payload) - - # Update execution table item - if "tableItemId" in input_body: - del input_body["tableItemId"] - execution_id = response["executionArn"].split(":")[-1] - input_body["sfnExecutionId"] = execution_id - input_body["executionStatus"] = ExecutionStatus.IN_PROGRESS.value - input_body["indexId"] = index_id - input_body["executionId"] = context.aws_request_id - input_body["uiStatus"] = UiStatus.ACTIVE.value - input_body["createTime"] = create_time - - execution_table.put_item(Item=input_body) - - return { - "statusCode": 200, - "headers": resp_header, - "body": json.dumps( - { - "execution_id": context.aws_request_id, - "step_function_arn": response["executionArn"], - "input_payload": input_payload, - } - ), - } diff --git a/source/lambda/intention/intention.py b/source/lambda/intention/intention.py index d73213ad..8776712f 100644 --- a/source/lambda/intention/intention.py +++ b/source/lambda/intention/intention.py @@ -1,36 +1,43 @@ import hashlib import json +import logging import os import re import time +from io import BytesIO from typing import List + import boto3 -from openpyxl import load_workbook -from io import BytesIO -from embeddings import get_embedding_info +from aos import sm_utils +from aos.aos_utils import LLMBotOpenSearchClient from botocore.paginate import TokenEncoder -from opensearchpy import NotFoundError, RequestError, helpers, RequestsHttpConnection -import logging -from langchain.embeddings.bedrock import BedrockEmbeddings +from constant import ( + BULK_SIZE, + DEFAULT_CONTENT_TYPE, + DEFAULT_MAX_ITEMS, + DEFAULT_SIZE, + DOWNLOAD_RESOURCE, + EXECUTION_RESOURCE, + INDEX_USED_SCAN_RESOURCE, + PRESIGNED_URL_RESOURCE, + ModelDimensionMap, +) +from embeddings import get_embedding_info from langchain.docstore.document import Document +from langchain.embeddings.bedrock import BedrockEmbeddings from langchain_community.vectorstores import OpenSearchVectorSearch from langchain_community.vectorstores.opensearch_vector_search import ( OpenSearchVectorSearch, ) -from aos import sm_utils +from openpyxl import load_workbook +from opensearchpy import ( + NotFoundError, + RequestError, + RequestsHttpConnection, + helpers, +) from requests_aws4auth import AWS4Auth -from aos.aos_utils import LLMBotOpenSearchClient -from constant import (BULK_SIZE, - DEFAULT_CONTENT_TYPE, - DEFAULT_MAX_ITEMS, - DEFAULT_SIZE, - DOWNLOAD_RESOURCE, - EXECUTION_RESOURCE, - INDEX_USED_SCAN_RESOURCE, - PRESIGNED_URL_RESOURCE, - ModelDimensionMap) - logger = logging.getLogger(__name__) encoder = TokenEncoder() @@ -58,30 +65,36 @@ built_in_tools = ["chat", "get_weather"] try: - master_user = sm_client.get_secret_value( - SecretId=aos_secret)["SecretString"] + master_user = sm_client.get_secret_value(SecretId=aos_secret)[ + "SecretString" + ] secret = json.loads(master_user) username = secret.get("username") password = secret.get("password") if not aos_endpoint: opensearch_client = boto3.client("opensearch") - response = opensearch_client.describe_domain( - DomainName=aos_domain_name) + response = opensearch_client.describe_domain(DomainName=aos_domain_name) aos_endpoint = response["DomainStatus"]["Endpoint"] - aos_client = LLMBotOpenSearchClient(aos_endpoint, (username, password)).client + aos_client = LLMBotOpenSearchClient( + aos_endpoint, (username, password) + ).client awsauth = (username, password) except sm_client.exceptions.ResourceNotFoundException: logger.info("Secret '%s' not found in Secrets Manager", aos_secret) aos_client = LLMBotOpenSearchClient(aos_endpoint).client - awsauth = AWS4Auth(refreshable_credentials=credentials, - region=region, service="es") + awsauth = AWS4Auth( + refreshable_credentials=credentials, region=region, service="es" + ) except sm_client.exceptions.InvalidRequestException: - logger.info("InvalidRequestException. It might caused by getting secret value from a deleting secret") + logger.info( + "InvalidRequestException. It might caused by getting secret value from a deleting secret" + ) logger.info("Fallback to authentication with IAM") aos_client = LLMBotOpenSearchClient(aos_endpoint).client - awsauth = AWS4Auth(refreshable_credentials=credentials, - region=region, service="es") + awsauth = AWS4Auth( + refreshable_credentials=credentials, region=region, service="es" + ) except Exception as err: logger.error("Error retrieving secret '%s': %s", aos_secret, str(err)) raise @@ -123,7 +136,9 @@ def aos_ingestion(self, documents: List[Document], index: str) -> None: embeddings_vectors_list.append( embeddings_vectors[0]["dense_vecs"][doc_id] ) - metadata["embedding_endpoint_name"] = self.embedding_model_endpoint + metadata["embedding_endpoint_name"] = ( + self.embedding_model_endpoint + ) metadata_list.append(metadata) embeddings_vectors = embeddings_vectors_list metadatas = metadata_list @@ -152,10 +167,11 @@ def lambda_handler(event, context): if resource == PRESIGNED_URL_RESOURCE: input_body = json.loads(event["body"]) file_name = f"intentions/{group_name}/[{input_body['timestamp']}]{input_body['file_name']}" - presigned_url = __gen_presigned_url(file_name, - input_body.get( - "content_type", DEFAULT_CONTENT_TYPE), - input_body.get("expiration", 60*60)) + presigned_url = __gen_presigned_url( + file_name, + input_body.get("content_type", DEFAULT_CONTENT_TYPE), + input_body.get("expiration", 60 * 60), + ) output = { "message": "The S3 presigned url is generated", "data": { @@ -163,7 +179,6 @@ def lambda_handler(event, context): "s3Bucket": s3_bucket_name, "s3Prefix": file_name, }, - } elif resource.startswith(EXECUTION_RESOURCE): if http_method == "POST": @@ -205,7 +220,7 @@ def __delete_execution(event, group_name): "intentionId": execution_id, }, ) - item = index_response.get('Item') + item = index_response.get("Item") if item: indexes = item.get("index").split(",") details = json.loads(item.get("details")) @@ -222,6 +237,7 @@ def __delete_execution(event, group_name): ) return res + # def __can_be_deleted(execution_id): # return False, "" @@ -232,22 +248,21 @@ def __delete_documents_by_text_set(index_name, text_values): search_body = { "size": 10000, "query": { - "terms": { - "text.keyword": list(text_values) # Convert set to list - } - } + "terms": {"text.keyword": list(text_values)} # Convert set to list + }, } # Perform the search try: search_result = aos_client.search( - index=index_name, body=search_body) # Adjust size if needed - hits = search_result['hits']['hits'] + index=index_name, body=search_body + ) # Adjust size if needed + hits = search_result["hits"]["hits"] # If documents exist, delete them if hits: for hit in hits: - doc_id = hit['_id'] + doc_id = hit["_id"] aos_client.delete(index=index_name, id=doc_id) logger.info("Deleted document with id %s", doc_id) except NotFoundError: @@ -266,8 +281,11 @@ def __get_query_parameter(event, parameter_name, default_value=None): def __gen_presigned_url(object_name: str, content_type: str, expiration: int): return s3_client.generate_presigned_url( ClientMethod="put_object", - Params={"Bucket": s3_bucket_name, - "Key": object_name, "ContentType": content_type}, + Params={ + "Bucket": s3_bucket_name, + "Key": object_name, + "ContentType": content_type, + }, ExpiresIn=expiration, HttpMethod="PUT", ) @@ -284,24 +302,20 @@ def __list_execution(event, group_name): } response = dynamodb_client.query( TableName=intention_table_name, - KeyConditionExpression='groupName = :groupName', - ExpressionAttributeValues={ - ':groupName': {'S': group_name} - } + KeyConditionExpression="groupName = :groupName", + ExpressionAttributeValues={":groupName": {"S": group_name}}, ) output = {} page_json = [] - items = response['Items'] - while 'LastEvaluatedKey' in response: + items = response["Items"] + while "LastEvaluatedKey" in response: response = dynamodb_client.query( TableName=intention_table_name, - KeyConditionExpression='groupName = :pk_val', - ExpressionAttributeValues={ - ':pk_val': {'S': group_name} - }, - ExclusiveStartKey=response['LastEvaluatedKey'] + KeyConditionExpression="groupName = :pk_val", + ExpressionAttributeValues={":pk_val": {"S": group_name}}, + ExclusiveStartKey=response["LastEvaluatedKey"], ) - items.extend(response['Items']) + items.extend(response["Items"]) for item in items: item_json = {} @@ -321,7 +335,7 @@ def __list_execution(event, group_name): item_json["executionStatus"] = value else: item_json[key] = value - + page_json.append(item_json) output["items"] = page_json output["config"] = config @@ -341,7 +355,7 @@ def __create_execution(event, context, email, group_name): bucket = input_body.get("s3Bucket") prefix = input_body.get("s3Prefix") s3_response = __get_s3_object_with_retry(bucket, prefix) - file_content = s3_response['Body'].read() + file_content = s3_response["Body"].read() excel_file = BytesIO(file_content) workbook = load_workbook(excel_file) sheet = workbook.active @@ -358,16 +372,21 @@ def __create_execution(event, context, email, group_name): qd_index = chatbot_item.get("indexIds", {}).get("qd", {}).get("value", {}) valid_qd_types = [*built_in_tools, *list(qd_index.keys())] for row in sheet.iter_rows(min_row=2, values_only=True): - question, intention, kwargs = row[0], row[1], row[2] if len( - row) > 2 else None + question, intention, kwargs = ( + row[0], + row[1], + row[2] if len(row) > 2 else None, + ) if not question: continue - qaList.append({ - "question": question, - "intention": intention, - "kwargs": kwargs, - "is_valid": intention in valid_qd_types - }) + qaList.append( + { + "question": question, + "intention": intention, + "kwargs": kwargs, + "is_valid": intention in valid_qd_types, + } + ) valid_qa_list = [qa for qa in qaList if qa.get("is_valid")] # write to ddb(meta data) @@ -381,20 +400,27 @@ def __create_execution(event, context, email, group_name): "tag": execution_detail["index"], "file": f'{bucket}{input_body.get("s3Prefix")}', "lastModifiedBy": email, - "lastModifiedTime": re.findall(r'\[(.*?)\]', input_body.get("s3Prefix"))[0], + "lastModifiedTime": re.findall( + r"\[(.*?)\]", input_body.get("s3Prefix") + )[0], "details": json.dumps(qaList), "validRatio": f"{len(valid_qa_list)} / {len(qaList)}", } ) - + # write to aos(vectorData) - __save_2_aos(input_body.get("model"), - execution_detail["index"], valid_qa_list, bucket, prefix) + __save_2_aos( + input_body.get("model"), + execution_detail["index"], + valid_qa_list, + bucket, + prefix, + ) return { "execution_id": execution_detail["tableItemId"], "input_payload": execution_detail, - "result": "success" + "result": "success", } @@ -409,7 +435,7 @@ def convert_qa_list(qa_list: list, bucket: str, prefix: str) -> List[Document]: "file_path": "", "keywords": [], "summary": "", - "type": "Intent" + "type": "Intent", } page_content = qa["question"] metadata = metadata_template @@ -426,9 +452,11 @@ def convert_qa_list(qa_list: list, bucket: str, prefix: str) -> List[Document]: return doc_list -def __save_2_aos(modelId: str, index: str, qaListParam: list, bucket: str, prefix: str): +def __save_2_aos( + modelId: str, index: str, qaListParam: list, bucket: str, prefix: str +): qaList = __deduplicate_by_key(qaListParam, "question") - if kb_enabled and embedding_model_endpoint.startswith("bce-embedding"): + if kb_enabled: embedding_info = get_embedding_info(embedding_model_endpoint) embedding_function = sm_utils.getCustomEmbeddings( embedding_model_endpoint, region, embedding_info.get("ModelType") @@ -460,18 +488,14 @@ def __create_index(index: str, modelId: str): "number_of_shards": 1, "number_of_replicas": 0, "knn": True, - "knn.algo_param.ef_search": 32 + "knn.algo_param.ef_search": 32, } }, "mappings": { "properties": { "text": { "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - } + "fields": {"keyword": {"type": "keyword"}}, }, "sentence_vector": { "type": "knn_vector", @@ -480,14 +504,11 @@ def __create_index(index: str, modelId: str): "engine": "nmslib", "space_type": "l2", "name": "hnsw", - "parameters": { - "ef_construction": 512, - "m": 16 - } - } - } + "parameters": {"ef_construction": 512, "m": 16}, + }, + }, } - } + }, } try: aos_client.indices.create(index=index, body=body) @@ -497,8 +518,11 @@ def __create_index(index: str, modelId: str): def __refresh_index(index: str, modelId: str, qaList): - success, failed = helpers.bulk(aos_client, __append_embeddings( - index, modelId, qaList), chunk_size=BULK_SIZE) + success, failed = helpers.bulk( + aos_client, + __append_embeddings(index, modelId, qaList), + chunk_size=BULK_SIZE, + ) aos_client.indices.refresh(index=index) logger.info("Successfully added: %d ", success) logger.info("Failed: %d ", len(failed)) @@ -510,40 +534,40 @@ def __append_embeddings(index, modelId, qaList: list): for item in qaList: question = item["question"] embedding_func = BedrockEmbeddings( - client=bedrock_client, - model_id=modelId, - normalize=True + client=bedrock_client, model_id=modelId, normalize=True ) - embeddings_vectors = embedding_func.embed_documents( - [question] - ) + embeddings_vectors = embedding_func.embed_documents([question]) documents.append( { "text": question, "metadata": { "answer": item["intention"], "source": "portal", - **({"kwargs": item["kwargs"]} if item.get("kwargs") else {}), - "type": "Intent" + **( + {"kwargs": item["kwargs"]} if item.get("kwargs") else {} + ), + "type": "Intent", }, - "sentence_vector": embeddings_vectors[0] + "sentence_vector": embeddings_vectors[0], } ) for document in documents: index_list = index.split(",") for index_item in index_list: - doc_id = hashlib.md5(str(document["text"]).encode('utf-8')).hexdigest() + doc_id = hashlib.md5( + str(document["text"]).encode("utf-8") + ).hexdigest() action = { "_op_type": "index", "_index": index_item, "_id": doc_id, - "_source": document + "_source": document, } actions.append(action) return actions - # yield {"_op_type": "index", "_index": index_item, "_source": document, "_id": hashlib.md5(str(document).encode('utf-8')).hexdigest()} + # yield {"_op_type": "index", "_index": index_item, "_source": document, "_id": hashlib.md5(str(document).encode('utf-8')).hexdigest()} def __get_execution(event, group_name): @@ -554,7 +578,7 @@ def __get_execution(event, group_name): "intentionId": executionId, }, ) - item = index_response['Item'] + item = index_response["Item"] res = {} items = [] # for item in items: @@ -562,10 +586,10 @@ def __get_execution(event, group_name): for key in list(item.keys()): value = item.get(key) if key == "file" or key == "File": - split_index = value.rfind('/') + split_index = value.rfind("/") if split_index != -1: item_json["s3Path"] = value[:split_index] - item_json["s3Prefix"] = value[split_index + 1:] + item_json["s3Prefix"] = value[split_index + 1 :] else: item_json["s3Path"] = value item_json["s3Prefix"] = "-" @@ -582,7 +606,9 @@ def __get_execution(event, group_name): return res -def __get_s3_object_with_retry(bucket: str, key: str, max_retries: int = 5, delay: int = 1): +def __get_s3_object_with_retry( + bucket: str, key: str, max_retries: int = 5, delay: int = 1 +): attempt = 0 while attempt < max_retries: try: @@ -595,12 +621,15 @@ def __get_s3_object_with_retry(bucket: str, key: str, max_retries: int = 5, dela raise time.sleep(delay) + def __download_template(): url = s3_client.generate_presigned_url( ClientMethod="get_object", - Params={'Bucket': s3_bucket_name, - 'Key': "templates/intention_corpus.xlsx"}, - ExpiresIn=60 + Params={ + "Bucket": s3_bucket_name, + "Key": "templates/intention_corpus.xlsx", + }, + ExpiresIn=60, ) return url @@ -614,7 +643,7 @@ def __index_used_scan(event, group_name): }, ) pre_model = index_response.get("Item") - model_name = '' + model_name = "" if pre_model: model_response = model_table.get_item( Key={ @@ -622,25 +651,24 @@ def __index_used_scan(event, group_name): "modelId": pre_model.get("modelIds", {}).get("embedding"), } ) - model_name = model_response.get("Item", {}).get( - "parameter", {}).get("ModelName", "") + model_name = ( + model_response.get("Item", {}) + .get("parameter", {}) + .get("ModelName", "") + ) # model_name = model_response.get("ModelName", {}).get("S","-") if not pre_model or model_name == input_body.get("model"): return { "statusCode": 200, "headers": resp_header, - "body": json.dumps({ - "result": "valid" - }) + "body": json.dumps({"result": "valid"}), } else: return { "statusCode": 200, "headers": resp_header, - "body": json.dumps({ - "result": "invalid" - } - )} + "body": json.dumps({"result": "invalid"}), + } def __deduplicate_by_key(lst, key): @@ -649,6 +677,7 @@ def __deduplicate_by_key(lst, key): seen[element[key]] = element return list(seen.values()) + def __get_query_parameter(event, parameter_name, default_value=None): if ( event.get("queryStringParameters") diff --git a/source/lambda/model_management/requirements.txt b/source/lambda/model_management/requirements.txt index ab55f6ef..b5bbb176 100644 --- a/source/lambda/model_management/requirements.txt +++ b/source/lambda/model_management/requirements.txt @@ -1 +1 @@ -https://aws-gcr-solutions-assets.s3.us-east-1.amazonaws.com/dmaa/wheels/dmaa-0.4.0-py3-none-any.whl +https://aws-gcr-solutions-assets.s3.us-east-1.amazonaws.com/dmaa/wheels/dmaa-0.5.0-py3-none-any.whl diff --git a/source/lambda/online/common_logic/langchain_integration/chat_models/dmaa_models.py b/source/lambda/online/common_logic/langchain_integration/chat_models/dmaa_models.py index 5f5d1958..70f82ea1 100644 --- a/source/lambda/online/common_logic/langchain_integration/chat_models/dmaa_models.py +++ b/source/lambda/online/common_logic/langchain_integration/chat_models/dmaa_models.py @@ -5,17 +5,20 @@ ModelProvider ) import os +import boto3 from dmaa.integrations.langchain_clients import SageMakerVllmChatModel as _SageMakerVllmChatModel +session = boto3.Session() +current_region = session.region_name class SageMakerVllmChatModel(_SageMakerVllmChatModel): - enable_auto_tool_choice: bool = True + enable_auto_tool_choice: bool = False enable_prefill: bool = True class Qwen25Instruct72bAwq(Model): model_id = LLMModelType.QWEN25_INSTRUCT_72B_AWQ - enable_auto_tool_choice: bool = True + enable_auto_tool_choice: bool = False enable_prefill: bool = True default_model_kwargs = { "max_tokens": 2000, @@ -32,7 +35,7 @@ def create_model(cls, model_kwargs=None, **kwargs): or os.environ.get("AWS_PROFILE", None) or None ) - region_name = kwargs.get("region_name", None) + region_name = kwargs.get("region_name", None) or current_region group_name = kwargs.get("group_name", os.environ.get('GROUP_NAME',"Admin")) llm = SageMakerVllmChatModel( diff --git a/source/lambda/online/lambda_main/test/main_local_test_common.py b/source/lambda/online/lambda_main/test/main_local_test_common.py index 052ef7e4..ca73c178 100644 --- a/source/lambda/online/lambda_main/test/main_local_test_common.py +++ b/source/lambda/online/lambda_main/test/main_local_test_common.py @@ -167,20 +167,20 @@ def test_multi_turns_agent_pr(): # "query": "What does 245346356356 times 346357457 equal?", # 1089836033535 # "query": "9.11和9.9哪个更大?", # 1089836033535 # "query": "what happened in the history of Morgan Stanley in Emerging market in 1989 ?", - "query": "Tell me the team members of Morgan Stanley in China", - # "query": "今天天气如何?", + # "query": "Tell me the team members of Morgan Stanley in China", + "query": "今天天气如何?", + # "query": "介绍一下MemGPT", + "use_history": True, + "enable_trace": True + }, + { + # "query": "”我爱北京天安门“包含多少个字符?", + # "query": "11133乘以97892395等于多少", # 1089836033535 + "query": "我在上海", # "query": "介绍一下MemGPT", "use_history": True, "enable_trace": True }, - # { - # # "query": "”我爱北京天安门“包含多少个字符?", - # # "query": "11133乘以97892395等于多少", # 1089836033535 - # "query": "我在上海", - # # "query": "介绍一下MemGPT", - # "use_history": True, - # "enable_trace": True - # }, ] # default_index_names = { @@ -193,9 +193,9 @@ def test_multi_turns_agent_pr(): # 'model_id': "anthropic.claude-3-5-sonnet-20240620-v1:0", # 'model_id': "anthropic.claude-3-5-haiku-20241022-v1:0", # 'model_id': "us.meta.llama3-2-90b-instruct-v1:0", - 'model_id': "anthropic.claude-3-5-sonnet-20241022-v2:0", + # 'model_id': "anthropic.claude-3-5-sonnet-20241022-v2:0", # 'model_id': "meta.llama3-1-70b-instruct-v1:0", - # 'model_id':"mistral.mistral-large-2407-v1:0", + 'model_id':"mistral.mistral-large-2407-v1:0", # 'model_id':"cohere.command-r-plus-v1:0", 'model_kwargs': { 'temperature': 0.01, diff --git a/source/model/etl/code/model.sh b/source/model/etl/code/model.sh index ea7b3886..0cff8011 100755 --- a/source/model/etl/code/model.sh +++ b/source/model/etl/code/model.sh @@ -6,16 +6,16 @@ dockerfile=$1 image=$2 tag=$3 # New argument for the tag +aws_region=$4 -if [ "$image" = "" ] || [ "$dockerfile" = "" ] || [ "$tag" = "" ] +if [ "$image" = "" ] || [ "$dockerfile" = "" ] || [ "$tag" = "" ] || [ "$aws_region" = "" ] then - echo "Usage: \$0 " + echo "Usage: \$0 " exit 1 fi # Get the account number associated with the current IAM credentials account=$(aws sts get-caller-identity --query Account --output text) -aws_region=$(aws configure get region) echo "Account: $account" echo "Region: $aws_region" diff --git a/source/portal/package-lock.json b/source/portal/package-lock.json index ef678152..2eb5c8b3 100644 --- a/source/portal/package-lock.json +++ b/source/portal/package-lock.json @@ -8,7 +8,6 @@ "name": "portal", "version": "0.0.0", "dependencies": { - "@cloudscape-design/component-toolkit": "^1.0.0-beta.81", "@cloudscape-design/components": "^3.0.604", "@cloudscape-design/global-styles": "^1.0.27", "axios": "^1.7.4", @@ -444,18 +443,18 @@ } }, "node_modules/@cloudscape-design/component-toolkit": { - "version": "1.0.0-beta.81", - "resolved": "https://registry.npmjs.org/@cloudscape-design/component-toolkit/-/component-toolkit-1.0.0-beta.81.tgz", - "integrity": "sha512-//WS5C+DSi6vbD17gdyUO9hikFWjID8FXSmzqtAxV6FsO2HRiSJkAMLt6jRCMZKo9JpOnDaEhtx5pkuiY/Ez+g==", + "version": "1.0.0-beta.47", + "resolved": "https://registry.npmjs.org/@cloudscape-design/component-toolkit/-/component-toolkit-1.0.0-beta.47.tgz", + "integrity": "sha512-A2rJJo7/OBosTKLEZ75k92/yKB6nSWgbGZCfIFadm/Y+lYiW0lShU98B4MnVqGqfQigbNaBtlChKn4Aixxk+qg==", "dependencies": { "@juggle/resize-observer": "^3.3.1", "tslib": "^2.3.1" } }, "node_modules/@cloudscape-design/components": { - "version": "3.0.857", - "resolved": "https://registry.npmjs.org/@cloudscape-design/components/-/components-3.0.857.tgz", - "integrity": "sha512-v0vwY/PsPHQF62mE5e0nqV7xJdkHbITgFUNIKoVcJtx10SzomKRBwn4+odWZKo6E6LU8paeB2vTPdXgrOJtpnw==", + "version": "3.0.604", + "resolved": "https://registry.npmjs.org/@cloudscape-design/components/-/components-3.0.604.tgz", + "integrity": "sha512-PtYv3sp0SbgGmh7LgTjTmnN26yGkFwa7yMSea0qXrbLhIJ//eXC0Mj5Kp6pzBaZ1U+4e40km4bWv/EASVQK8Ag==", "dependencies": { "@cloudscape-design/collection-hooks": "^1.0.0", "@cloudscape-design/component-toolkit": "^1.0.0-beta", @@ -465,7 +464,7 @@ "@dnd-kit/sortable": "^7.0.2", "@dnd-kit/utilities": "^3.2.1", "@juggle/resize-observer": "^3.3.1", - "ace-builds": "1.36.0", + "ace-builds": "^1.32.6", "balanced-match": "^1.0.2", "clsx": "^1.1.0", "d3-shape": "^1.3.7", @@ -498,9 +497,9 @@ } }, "node_modules/@cloudscape-design/global-styles": { - "version": "1.0.33", - "resolved": "https://registry.npmjs.org/@cloudscape-design/global-styles/-/global-styles-1.0.33.tgz", - "integrity": "sha512-6bg18XIxkRS2ojMNGxVA8mV35rqkiHDXwOJjfHhYPzg6LjFagZWyg/hRRGuP5MExszB748m2HYYdXT0EejxiPA==" + "version": "1.0.27", + "resolved": "https://registry.npmjs.org/@cloudscape-design/global-styles/-/global-styles-1.0.27.tgz", + "integrity": "sha512-26/Xt6eVB+xV+WCZJVmckiUUCYKrhQdGIr1t5gzpO/2VvERqjf+8x3buznyxNlFEfXaiKTMBtATnnYe27ARSiw==" }, "node_modules/@cloudscape-design/test-utils-core": { "version": "1.0.30", @@ -1792,9 +1791,9 @@ } }, "node_modules/ace-builds": { - "version": "1.36.0", - "resolved": "https://registry.npmjs.org/ace-builds/-/ace-builds-1.36.0.tgz", - "integrity": "sha512-7to4F86V5N13EY4M9LWaGo2Wmr9iWe5CrYpc28F+/OyYCf7yd+xBV5x9v/GB73EBGGoYd89m6JjeIUjkL6Yw+w==" + "version": "1.32.9", + "resolved": "https://registry.npmjs.org/ace-builds/-/ace-builds-1.32.9.tgz", + "integrity": "sha512-dqBLPj//Gq0b92YUtRIsdWsORf4J+4xW3r8/4Wr2Vqid7O1j7YBV/ZsVvWBjZFy+EnvMCRFCFOEIM1cbt4BQ/g==" }, "node_modules/acorn": { "version": "8.11.3", diff --git a/source/portal/package.json b/source/portal/package.json index 8d3a5302..c020eb62 100644 --- a/source/portal/package.json +++ b/source/portal/package.json @@ -10,7 +10,6 @@ "preview": "vite preview" }, "dependencies": { - "@cloudscape-design/component-toolkit": "^1.0.0-beta.81", "@cloudscape-design/components": "^3.0.604", "@cloudscape-design/global-styles": "^1.0.27", "axios": "^1.7.4", diff --git a/source/portal/src/pages/chatbot/ChatBot.tsx b/source/portal/src/pages/chatbot/ChatBot.tsx index 6c98bf84..fe2cb748 100644 --- a/source/portal/src/pages/chatbot/ChatBot.tsx +++ b/source/portal/src/pages/chatbot/ChatBot.tsx @@ -171,6 +171,7 @@ const ChatBot: React.FC = (props: ChatBotProps) => { setTemperature(defaultConfig.temperature) setTopKRetrievals(defaultConfig.topKRetrievals) setScore(defaultConfig.score) + setUserMessage('') setAdditionalConfig('') // setModelOption(optionList?.[0]?.value ?? '') setSessionId(uuidv4()); @@ -392,13 +393,16 @@ const ChatBot: React.FC = (props: ChatBotProps) => { } }; - document.addEventListener('compositionstart', () => { - setIsComposing(true); - }); + const inputElement = document.querySelector('input'); - document.addEventListener('compositionend', () => { - setIsComposing(false); - }); + if (inputElement) { + inputElement.addEventListener('compositionstart', () => { + setIsComposing(true); + }); + inputElement.addEventListener('compositionend', () => { + setIsComposing(false); + }); + } useEffect(() => { if (lastMessage !== null) { @@ -467,23 +471,25 @@ const ChatBot: React.FC = (props: ChatBotProps) => { } if (!maxRounds.trim()) { - setMaxTokenError('validation.requireMaxRounds'); + setMaxRoundsError('validation.requireMaxRounds'); setModelSettingExpand(true); return; } + if (parseInt(maxRounds) < 0) { - setMaxTokenError('validation.maxRoundsRange'); + setMaxRoundsError('validation.maxRoundsRange'); setModelSettingExpand(true); return; } if (!topKRetrievals.trim()) { - setMaxTokenError('validation.requireTopKRetrievals'); + setTopKRetrievalsError('validation.requireTopKRetrievals'); setModelSettingExpand(true); return; } + if (parseInt(topKRetrievals) < 1) { - setMaxTokenError('validation.topKRetrievals'); + setTopKRetrievalsError('validation.topKRetrievals'); setModelSettingExpand(true); return; } @@ -790,6 +796,9 @@ const ChatBot: React.FC = (props: ChatBotProps) => { type="number" value={maxRounds} onChange={({ detail }) => { + if(parseInt(detail.value) < 0 || parseInt(detail.value) > 100){ + return + } setMaxRoundsError(''); setMaxRounds(detail.value); }} @@ -851,6 +860,9 @@ const ChatBot: React.FC = (props: ChatBotProps) => { type="number" value={topKRetrievals} onChange={({ detail }) => { + if(parseInt(detail.value) < 0 || parseInt(detail.value) > 100){ + return + } setTopKRetrievalsError(''); setTopKRetrievals(detail.value); }} @@ -864,7 +876,7 @@ const ChatBot: React.FC = (props: ChatBotProps) => { > { if(parseFloat(detail.value) < 0 || parseFloat(detail.value) > 1){ diff --git a/source/script/build.sh b/source/script/build.sh index 70f0835f..52fb0552 100644 --- a/source/script/build.sh +++ b/source/script/build.sh @@ -4,6 +4,7 @@ set -e # Load config.json config_file="../infrastructure/bin/config.json" +deploy_region=$(jq -r '.deployRegion' $config_file) knowledge_base_enabled=$(jq -r '.knowledgeBase.enabled' $config_file) knowledge_base_intelliagent_enabled=$(jq -r '.knowledgeBase.knowledgeBaseType.intelliAgentKb.enabled' $config_file) knowledge_base_models_enabled=$(jq -r '.knowledgeBase.knowledgeBaseType.intelliAgentKb.knowledgeBaseModel.enabled' $config_file) @@ -31,7 +32,7 @@ aws ecr-public get-login-password --region us-east-1 | docker login --username A prepare_etl_model() { echo "Preparing ETL Model" cd model/etl/code - sh model.sh ./Dockerfile $ecr_repository $ecr_image_tag + sh model.sh ./Dockerfile $ecr_repository $ecr_image_tag $deploy_region cd - > /dev/null pwd } @@ -52,7 +53,7 @@ build_frontend() { build_deployment_module() { echo "Building Model Deployment Module" - curl https://aws-gcr-solutions-assets.s3.us-east-1.amazonaws.com/dmaa/wheels/dmaa-0.4.0-py3-none-any.whl -o dmaa-0.4.0-py3-none-any.whl && pip install dmaa-0.4.0-py3-none-any.whl"[all]" + curl https://aws-gcr-solutions-assets.s3.us-east-1.amazonaws.com/dmaa/wheels/dmaa-0.5.0-py3-none-any.whl -o dmaa-0.5.0-py3-none-any.whl && pip install dmaa-0.5.0-py3-none-any.whl"[all]" dmaa bootstrap }