aws-samples · dependabot · Feb 5, 2024
diff --git a/source/model/etl/code/.ipynb_checkpoints/requirements-checkpoint.txt b/source/model/etl/code/.ipynb_checkpoints/requirements-checkpoint.txt
@@ -3,7 +3,7 @@ boto3==1.28.85
 opencv-contrib-python-headless==4.8.1.78
 #transformers==0.1.17
 onnxruntime-gpu
-Pillow==8.4.0
+Pillow==10.2.0
 pyclipper==1.3.0
 Shapely==1.7.1
 PyMuPDF<1.21.0

diff --git a/README.md b/README.md

 **Quick Start Tuturial**

 **Extract document from specified S3 bucket and prefix, POST https://xxxx.execute-api.us-east-1.amazonaws.com/v1/extract, use flag need_split to configure if extracted document need to be splitted semantically or keep with original content**
 ```bash
 BODY
 {
 }
 ```

 **Offline (asychronous) process to batch processing documents specified in S3 bucket and prefix, such process include extracting, splitting document content, converting to vector representation and injecting into Amazon Open Search (AOS). POST https://xxxx.execute-api.us-east-1.amazonaws.com/v1/etl**
 ```bash
 BODY
 {
diff --git a/source/lambda/executor/main.py b/source/lambda/executor/main.py
        stop=None,
    )
    elpase_time = time.time() - start
    logger.info(f"runing time of parse query: {elpase_time}s seconds")
    return parsed_query


    )
    # logger.info(json.dumps(opensearch_knn_response, ensure_ascii=False))
    elpase_time = time.time() - start
    logger.info(f"runing time of opensearch_knn : {elpase_time}s seconds")
    answer = None
    sources = None
    if len(opensearch_knn_results) > 0:
    # logger.info(json.dumps(opensearch_knn_response, ensure_ascii=False))
    faq_recall_end_time = time.time()
    elpase_time = faq_recall_end_time - start
    logger.info(f"runing time of faq recall : {elpase_time}s seconds")
    filter = None
    if parsed_query["is_api_query"]:
        filter = [{"term": {"metadata.is_api": True}}]
    )
    ug_recall_end_time = time.time()
    elpase_time = ug_recall_end_time - faq_recall_end_time
    logger.info(f"runing time of ug recall: {elpase_time}s seconds")

    # 2. get AOS invertedIndex recall
    opensearch_query_results = []

    rerank_end_time = time.time()
    elpase_time = rerank_end_time - ug_recall_end_time
    logger.info(f"runing time of rerank: {elpase_time}s seconds")

    return rerank_knowledge

        answer = llm_generate(**generate_input)
        llm_end_time = time.time()
        elpase_time = llm_end_time - llm_start_time
        logger.info(f"runing time of llm: {elpase_time}s seconds")
        # answer = ret["answer"]
        debug_info["knowledge_qa_llm"] = answer
    except Exception as e:
            )

        main_entry_elpase = time.time() - main_entry_start
        logger.info(f"runing time of {biz_type} entry : {main_entry_elpase}s seconds")
     
    response_kwargs = dict(
        stream=stream,
diff --git a/source/lambda/executor/utils/prompt_template.py b/source/lambda/executor/utils/prompt_template.py

 CLAUDE21_RAG_PROMPT_TEMPLTE = """You are a customer service agent, and answering user's query. You ALWAYS follow these guidelines when writing your response:
 <guidelines>
 - NERVER say "根据搜索结果/大家好/谢谢...".
 </guidelines>

 Here are some documents for you to reference for your query:

	Quick Start Tuturial

	Extract document from specified S3 bucket and prefix, POST https://xxxx.execute-api.us-east-1.amazonaws.com/v1/extract, use flag need_split to configure if extracted document need to be splitted semantically or keep with original content
Check failure on line 76 in README.md GitHub Actions / miss spelling check for words or sentences `splitted ==> split`
	```bash
	BODY
	{
	}
	```

	Offline (asychronous) process to batch processing documents specified in S3 bucket and prefix, such process include extracting, splitting document content, converting to vector representation and injecting into Amazon Open Search (AOS). POST https://xxxx.execute-api.us-east-1.amazonaws.com/v1/etl
Check failure on line 86 in README.md GitHub Actions / miss spelling check for words or sentences `asychronous ==> asynchronous`
	```bash
	BODY
	{
	stop=None,
	)
	elpase_time = time.time() - start
	logger.info(f"runing time of parse query: {elpase_time}s seconds")
Check failure on line 291 in source/lambda/executor/main.py GitHub Actions / miss spelling check for words or sentences `runing ==> running, ruining`
	return parsed_query


	)
	# logger.info(json.dumps(opensearch_knn_response, ensure_ascii=False))
	elpase_time = time.time() - start
	logger.info(f"runing time of opensearch_knn : {elpase_time}s seconds")
Check failure on line 320 in source/lambda/executor/main.py GitHub Actions / miss spelling check for words or sentences `runing ==> running, ruining`
	answer = None
	sources = None
	if len(opensearch_knn_results) > 0:
	# logger.info(json.dumps(opensearch_knn_response, ensure_ascii=False))
	faq_recall_end_time = time.time()
	elpase_time = faq_recall_end_time - start
	logger.info(f"runing time of faq recall : {elpase_time}s seconds")
Check failure on line 370 in source/lambda/executor/main.py GitHub Actions / miss spelling check for words or sentences `runing ==> running, ruining`
	filter = None
	if parsed_query["is_api_query"]:
	filter = [{"term": {"metadata.is_api": True}}]
	)
	ug_recall_end_time = time.time()
	elpase_time = ug_recall_end_time - faq_recall_end_time
	logger.info(f"runing time of ug recall: {elpase_time}s seconds")
Check failure on line 403 in source/lambda/executor/main.py GitHub Actions / miss spelling check for words or sentences `runing ==> running, ruining`

	# 2. get AOS invertedIndex recall
	opensearch_query_results = []

	rerank_end_time = time.time()
	elpase_time = rerank_end_time - ug_recall_end_time
	logger.info(f"runing time of rerank: {elpase_time}s seconds")
Check failure on line 457 in source/lambda/executor/main.py GitHub Actions / miss spelling check for words or sentences `runing ==> running, ruining`

	return rerank_knowledge

	answer = llm_generate(**generate_input)
	llm_end_time = time.time()
	elpase_time = llm_end_time - llm_start_time
	logger.info(f"runing time of llm: {elpase_time}s seconds")
Check failure on line 558 in source/lambda/executor/main.py GitHub Actions / miss spelling check for words or sentences `runing ==> running, ruining`
	# answer = ret["answer"]
	debug_info["knowledge_qa_llm"] = answer
	except Exception as e:
	)

	main_entry_elpase = time.time() - main_entry_start
	logger.info(f"runing time of {biz_type} entry : {main_entry_elpase}s seconds")
Check failure on line 1107 in source/lambda/executor/main.py GitHub Actions / miss spelling check for words or sentences `runing ==> running, ruining`

	response_kwargs = dict(
	stream=stream,

	CLAUDE21_RAG_PROMPT_TEMPLTE = """You are a customer service agent, and answering user's query. You ALWAYS follow these guidelines when writing your response:
	<guidelines>
	- NERVER say "根据搜索结果/大家好/谢谢...".
Check failure on line 14 in source/lambda/executor/utils/prompt_template.py GitHub Actions / miss spelling check for words or sentences `NERVER ==> NEVER`
	</guidelines>

	Here are some documents for you to reference for your query: