-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
310 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
|
||
from langchain_openai import ChatOpenAI | ||
from langchain.schema.messages import HumanMessage,AIMessage | ||
import streamlit as st | ||
import base64 | ||
from PIL import Image | ||
from pymongo import MongoClient | ||
import urllib,json,io | ||
from response_gen import genresponse | ||
chain=ChatOpenAI(model="gpt-4o",temperature=0.0) | ||
#mongo client | ||
username="ronidas" | ||
pwd="t2HKvnxjL38QGV3D" | ||
client=MongoClient("mongodb+srv://"+urllib.parse.quote(username)+":"+urllib.parse.quote(pwd)+"@cluster0.lymvb.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") | ||
db=client["invoice"] | ||
collection=db["invoice"] | ||
|
||
def encode_image(upload_file): | ||
image_bytes=upload_file.getvalue() | ||
base64_image=base64.b64encode(image_bytes).decode("utf-8") | ||
return base64_image | ||
|
||
def get_response(b64image): | ||
msg=chain.invoke( | ||
[ | ||
AIMessage( | ||
content="you are an useful asnd intelligent boty who is very good at image reading ocr taskto get insights from images of invoces" | ||
), | ||
HumanMessage( | ||
content=[ | ||
{"type":"text","text":"""summarise the invoice into json with key value pair of the following keys: | ||
invoice_number | ||
invoice_date | ||
customer_name | ||
product will be list of maps with brand,item,unit and single_unit_price,all_unit_price as keys | ||
total_price | ||
mode_of_payment | ||
and return it as output | ||
make sure to remove "$" from price related column and | ||
make sure values in all price related columns must be stored as number not a string | ||
output will be only json nothing else this is very strict must follow"""}, | ||
{"type":"image_url", | ||
"image_url":{ | ||
"url":"data:image/jpg;base64,"+ b64image, | ||
"detail":"auto" | ||
} | ||
|
||
} | ||
] | ||
) | ||
] | ||
) | ||
return msg.content | ||
|
||
def main(): | ||
st.title("INVOICE ANALYSIS APP") | ||
upload_files=st.file_uploader("upload your file",type=["jpg"],accept_multiple_files=True) | ||
if upload_files is not None: | ||
for upload_file in upload_files: | ||
image=Image.open(upload_file) | ||
st.image(image,caption="your invoice",use_column_width=True) | ||
st.success("image uploaded successfully") | ||
b64_image=encode_image(upload_file) | ||
response=get_response(b64_image) | ||
data=response.replace("json","") | ||
data=data.replace("`","") | ||
data=json.loads(data) | ||
collection.insert_one(data) | ||
count=collection.count_documents({}) | ||
if count >0: | ||
st.success("documents are uploaded successfully") | ||
qsn=st.text_area("ask your question") | ||
if qsn is not None: | ||
btn=st.button("submit") | ||
if btn: | ||
response=genresponse(qsn) | ||
for result in response: | ||
st.write(result) | ||
|
||
|
||
if __name__=="__main__": | ||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
"type":"text","text":"""summarise the invoice into json with key value pairof the following keys: | ||
invoice_number | ||
invoice_date | ||
customer_name | ||
product will be list of maps with brand,item,unit and single_unit_price,all_unit_price as keys | ||
total_price | ||
mode_of_payment | ||
and return it as output | ||
make sure to remove "$" from price related column and | ||
make sure values in all price related columns must be stored as number not a string | ||
output will be only json nothing else this is very strict must follow""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
you are a very intelligent AI assitasnt who is expert in identifying relevant questions from user | ||
from user and converting into nosql mongodb agggregation pipeline query. | ||
Note: You have to just return the query as to use in agggregation pipeline nothing else. Don't return any other thing | ||
Please use the below schema to write the mongodb queries , dont use any other queries. | ||
schema: | ||
the mentioned mogbodb collection talks about invoivces for a company. The schema for this document represents the structure of the data, describing various informations related to the _id, invoice_number, invoice_date, customer_name, product.brand, product.item, product.unit, product.single_unit_price, product.all_unit_price, total_price, mode_of_payment | ||
your job is to get python code for the user question | ||
Here’s a breakdown of its schema with descriptions for each field: | ||
|
||
|
||
1. _id: Unique identifier for the document, represented as an ObjectId. | ||
2. invoice_number: A unique number assigned to the invoice. | ||
3. invoice_date: The date when the invoice was issued. | ||
4. customer_name: The name of the customer to whom the invoice is issued. | ||
5. product: A list of products included in the invoice. | ||
- brand: The brand of the product. | ||
- item: The name or model of the product. | ||
- unit: The number of units of the product being invoiced, represented as an integer. | ||
- single_unit_price: The price of a single unit of the product, represented as an integer. | ||
- all_unit_price: The total price for all units of this product, represented as an integer.Will be used for brand or product specific revenue | ||
6. total_price: The total price of all products included in the invoice, represented as an integer. | ||
7. mode_of_payment: The mode of payment used for the invoice. | ||
Here is the example provided with explanations: | ||
example:{sample1} | ||
This schema provides a comprehensive view of the data structure for multiple invoices in MongoDB, | ||
use the below sample_examples to generate your queries perfectly | ||
sample_example: | ||
|
||
Below are several sample user questions related to the MongoDB document provided, | ||
and the corresponding MongoDB aggregation pipeline queries that can be used to fetch the desired data. | ||
Use them wisely. | ||
|
||
sample_question: {sample} | ||
As an expert you must use them whenever required. | ||
Note: You have to just return the query nothing else. Don't return any additional text with the query.Please follow this strictly | ||
input:{input} | ||
output: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
write all brand names | ||
write all brand names and their corresponding product count | ||
write all product names for Samsung | ||
write all unique product names for Microsoft | ||
write all unique product names for Microsoft and their price | ||
write all unique product names for Microsoft and their price | ||
which brand genrerated maximum sales | ||
which product ordered maximum times | ||
which product ordered maximum number of units in a singhle invoice? | ||
for which user maximum number of invoices generated? write all the invoice numbers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import streamlit as st | ||
from pymongo import MongoClient | ||
import urllib,io,json | ||
from langchain_openai import ChatOpenAI | ||
from langchain.prompts import PromptTemplate | ||
from langchain.chains import LLMChain | ||
|
||
llm=ChatOpenAI(model="gpt-4o",temperature=0.0) | ||
#mongo client | ||
#mongo client | ||
username="ronidas" | ||
pwd="t2HKvnxjL38QGV3D" | ||
client=MongoClient("mongodb+srv://"+urllib.parse.quote(username)+":"+urllib.parse.quote(pwd)+"@cluster0.lymvb.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") | ||
db=client["invoice"] | ||
collection=db["invoice"] | ||
|
||
|
||
with io.open("sample.txt","r",encoding="utf-8")as f1: | ||
sample=f1.read() | ||
f1.close() | ||
with io.open("sample1.txt","r",encoding="utf-8")as f1: | ||
sample1=f1.read() | ||
f1.close() | ||
|
||
prompt=""" | ||
you are a very intelligent AI assitasnt who is expert in identifying relevant questions from user | ||
and converting into nosql mongodb agggregation pipeline query. | ||
Note: You have to just return the query as to use in agggregation pipeline nothing else. Don't return any other thing | ||
Please use the below schema to write the mongodb queries , dont use any other queries. | ||
schema: | ||
the mentioned mogbodb collection talks about invoivces for a company. The schema for this document represents the structure of the data, describing various informations related to the _id, invoice_number, invoice_date, customer_name, product.brand, product.item, product.unit, product.single_unit_price, product.all_unit_price, total_price, mode_of_payment | ||
your job is to get python code for the user question | ||
Here’s a breakdown of its schema with descriptions for each field: | ||
1. _id: Unique identifier for the document, represented as an ObjectId. | ||
2. invoice_number: A unique number assigned to the invoice. | ||
3. invoice_date: The date when the invoice was issued. | ||
4. customer_name: The name of the customer to whom the invoice is issued. | ||
5. product: A list of products included in the invoice. | ||
- brand: The brand of the product. | ||
- item: The name or model of the product. | ||
- unit: The number of units of the product being invoiced, represented as an integer. | ||
- single_unit_price: The price of a single unit of the product, represented as an integer. | ||
- all_unit_price: The total price for all units of this product, represented as an integer.Will be used for brand or product specific revenue | ||
6. total_price: The total price of all products included in the invoice, represented as an integer. | ||
7. mode_of_payment: The mode of payment used for the invoice. | ||
Here is the example provided with explanations: | ||
example:{sample1} | ||
This schema provides a comprehensive view of the data structure for multiple invoices in MongoDB, | ||
use the below sample_examples to generate your queries perfectly | ||
sample_example: | ||
Below are several sample user questions related to the MongoDB document provided, | ||
and the corresponding MongoDB aggregation pipeline queries that can be used to fetch the desired data. | ||
Use them wisely. | ||
sample_question: {sample} | ||
As an expert you must use them whenever required. | ||
Note: You have to just return the query nothing else. Don't return any additional text with the query.Please follow this strictly | ||
input:{input} | ||
output: | ||
""" | ||
def genresponse(input): | ||
query_with_prompt=PromptTemplate( | ||
template=prompt, | ||
input_variables=["input","sample","sample1"] | ||
) | ||
llmchain=LLMChain(llm=llm,prompt=query_with_prompt,verbose=True) | ||
response=llmchain.invoke({ | ||
"input":input, | ||
"sample":sample, | ||
"sample1":sample1 | ||
}) | ||
data=response["text"] | ||
data=data.replace("json","") | ||
data=data.replace("`","") | ||
data=data.replace(".$numberInt","") | ||
query=json.loads(data) | ||
results=collection.aggregate(query) | ||
return results | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
Question 1: What is the total price of invoices issued to "Susan Davis"? | ||
|
||
Query: | ||
json | ||
[ | ||
{ "$match": { "customer_name": "Susan Davis" } }, | ||
{ "$group": { "_id": "$customer_name", "total_price": { "$sum": "$total_price.$numberInt" } } }, | ||
{ "$project": { "_id": 0, "total_price": 1 } } | ||
] | ||
|
||
|
||
Question 2: List all products bought by "Susan Davis" along with their brands and item names. | ||
|
||
Query: | ||
json | ||
[ | ||
{ "$match": { "customer_name": "Susan Davis" } }, | ||
{ "$unwind": "$product" }, | ||
{ "$project": { "_id": 0, "brand": "$product.brand", "item": "$product.item" } } | ||
] | ||
|
||
|
||
Question 3: How many units of each product were bought in total? | ||
|
||
Query: | ||
json | ||
[ | ||
{ "$unwind": "$product" }, | ||
{ "$group": { "_id": { "brand": "$product.brand", "item": "$product.item" }, "total_units": { "$sum": { "$toInt": "$product.unit.$numberInt" } } } }, | ||
{ "$project": { "_id": 0, "brand": "$_id.brand", "item": "$_id.item", "total_units": 1 } } | ||
] | ||
|
||
|
||
Question 4: Find the total revenue from each brand of products. | ||
|
||
Query: | ||
json | ||
[ | ||
{ "$unwind": "$product" }, | ||
{ "$group": { "_id": "$product.brand", "total_revenue": { "$sum": { "$toInt": "$product.all_unit_price.$numberInt" } } } }, | ||
{ "$project": { "_id": 0, "brand": "$_id", "total_revenue": 1 } } | ||
] | ||
|
||
|
||
Question 5: List all invoices with their invoice numbers and total prices. | ||
|
||
Query: | ||
json | ||
[ | ||
{ "$project": { "_id": 0, "invoice_number": 1, "total_price": { "$toInt": "$total_price.$numberInt" } } } | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{ | ||
"_id": {"$oid": "664495848b7d3c4a165be667"}, // Unique identifier for the document, represented as an ObjectId. | ||
"invoice_number": "2024-020", // A unique number assigned to the invoice. | ||
"invoice_date": "2024-12-02", // The date when the invoice was issued. | ||
"customer_name": "James Brown", // The name of the customer to whom the invoice is issued. | ||
"product": [ // A list of products included in the invoice. | ||
{ | ||
"brand": "HP", // The brand of the product. | ||
"item": "Envy 15", // The name or model of the product. | ||
"unit": {"$numberInt": "3"}, // The number of units of the product being invoiced, represented as an integer using the $numberInt type. | ||
"single_unit_price": {"$numberInt": "1534"}, // The price of a single unit of the product, represented as an integer using the $numberInt type. | ||
"all_unit_price": {"$numberInt": "4602"} // The total price for all units of this product, represented as an integer using the $numberInt type. | ||
}, | ||
{ | ||
"brand": "Microsoft", // The brand of the product. | ||
"item": "Surface Laptop 4", // The name or model of the product. | ||
"unit": {"$numberInt": "3"}, // The number of units of the product being invoiced, represented as an integer using the $numberInt type. | ||
"single_unit_price": {"$numberInt": "1241"}, // The price of a single unit of the product, represented as an integer using the $numberInt type. | ||
"all_unit_price": {"$numberInt": "3723"} // The total price for all units of this product, represented as an integer using the $numberInt type. | ||
} | ||
], | ||
"total_price": {"$numberInt": "8325"}, // The total price of all products included in the invoice, represented as an integer using the $numberInt type. | ||
"mode_of_payment": "UPI" // The mode of payment used for the invoice. | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from pymongo import MongoClient | ||
import urllib | ||
|
||
username="ronidas" | ||
pwd="okZAaW0eTqKqfCwh" | ||
client=MongoClient("mongodb+srv://"+urllib.parse.quote(username)+":"+urllib.parse.quote(pwd)+"@cluster0.lymvb.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") | ||
db=client["invoice"] | ||
collection=db["invoice"] | ||
result = collection.aggregate([{'$unwind': '$product'}, {'$group': {'_id': '$product.item', 'price': {'$first': '$product.single_unit_price.$numberInt'}}}, {'$project': {'_id': 0, 'product_name': '$_id', 'price': 1}}]) | ||
for doc in result: | ||
print(doc) |
Binary file not shown.