Skip to content

Commit

Permalink
tutorial64
Browse files Browse the repository at this point in the history
  • Loading branch information
ronidas39 committed May 15, 2024
1 parent 02a0be9 commit 4af8c26
Show file tree
Hide file tree
Showing 11 changed files with 310 additions and 1 deletion.
2 changes: 1 addition & 1 deletion tutorial2/single_url.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from langchain.document_loaders import youtube
import io

loader=youtube.YoutubeLoader.from_youtube_url("https://youtu.be/jYbcuZP40p8")
loader=youtube.YoutubeLoader.from_youtube_url("https://youtu.be/eWT9lAJOcmA")
docs=loader.load()
print(docs)
with io.open("transcript.txt","w",encoding="utf-8")as f1:
Expand Down
Binary file not shown.
83 changes: 83 additions & 0 deletions tutorial64/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@

from langchain_openai import ChatOpenAI
from langchain.schema.messages import HumanMessage,AIMessage
import streamlit as st
import base64
from PIL import Image
from pymongo import MongoClient
import urllib,json,io
from response_gen import genresponse
chain=ChatOpenAI(model="gpt-4o",temperature=0.0)
#mongo client
username="ronidas"
pwd="t2HKvnxjL38QGV3D"
client=MongoClient("mongodb+srv://"+urllib.parse.quote(username)+":"+urllib.parse.quote(pwd)+"@cluster0.lymvb.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db=client["invoice"]
collection=db["invoice"]

def encode_image(upload_file):
image_bytes=upload_file.getvalue()
base64_image=base64.b64encode(image_bytes).decode("utf-8")
return base64_image

def get_response(b64image):
msg=chain.invoke(
[
AIMessage(
content="you are an useful asnd intelligent boty who is very good at image reading ocr taskto get insights from images of invoces"
),
HumanMessage(
content=[
{"type":"text","text":"""summarise the invoice into json with key value pair of the following keys:
invoice_number
invoice_date
customer_name
product will be list of maps with brand,item,unit and single_unit_price,all_unit_price as keys
total_price
mode_of_payment
and return it as output
make sure to remove "$" from price related column and
make sure values in all price related columns must be stored as number not a string
output will be only json nothing else this is very strict must follow"""},
{"type":"image_url",
"image_url":{
"url":"data:image/jpg;base64,"+ b64image,
"detail":"auto"
}

}
]
)
]
)
return msg.content

def main():
st.title("INVOICE ANALYSIS APP")
upload_files=st.file_uploader("upload your file",type=["jpg"],accept_multiple_files=True)
if upload_files is not None:
for upload_file in upload_files:
image=Image.open(upload_file)
st.image(image,caption="your invoice",use_column_width=True)
st.success("image uploaded successfully")
b64_image=encode_image(upload_file)
response=get_response(b64_image)
data=response.replace("json","")
data=data.replace("`","")
data=json.loads(data)
collection.insert_one(data)
count=collection.count_documents({})
if count >0:
st.success("documents are uploaded successfully")
qsn=st.text_area("ask your question")
if qsn is not None:
btn=st.button("submit")
if btn:
response=genresponse(qsn)
for result in response:
st.write(result)


if __name__=="__main__":
main()

11 changes: 11 additions & 0 deletions tutorial64/mainprompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"type":"text","text":"""summarise the invoice into json with key value pairof the following keys:
invoice_number
invoice_date
customer_name
product will be list of maps with brand,item,unit and single_unit_price,all_unit_price as keys
total_price
mode_of_payment
and return it as output
make sure to remove "$" from price related column and
make sure values in all price related columns must be stored as number not a string
output will be only json nothing else this is very strict must follow"""
37 changes: 37 additions & 0 deletions tutorial64/prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
you are a very intelligent AI assitasnt who is expert in identifying relevant questions from user
from user and converting into nosql mongodb agggregation pipeline query.
Note: You have to just return the query as to use in agggregation pipeline nothing else. Don't return any other thing
Please use the below schema to write the mongodb queries , dont use any other queries.
schema:
the mentioned mogbodb collection talks about invoivces for a company. The schema for this document represents the structure of the data, describing various informations related to the _id, invoice_number, invoice_date, customer_name, product.brand, product.item, product.unit, product.single_unit_price, product.all_unit_price, total_price, mode_of_payment
your job is to get python code for the user question
Here’s a breakdown of its schema with descriptions for each field:


1. _id: Unique identifier for the document, represented as an ObjectId.
2. invoice_number: A unique number assigned to the invoice.
3. invoice_date: The date when the invoice was issued.
4. customer_name: The name of the customer to whom the invoice is issued.
5. product: A list of products included in the invoice.
- brand: The brand of the product.
- item: The name or model of the product.
- unit: The number of units of the product being invoiced, represented as an integer.
- single_unit_price: The price of a single unit of the product, represented as an integer.
- all_unit_price: The total price for all units of this product, represented as an integer.Will be used for brand or product specific revenue
6. total_price: The total price of all products included in the invoice, represented as an integer.
7. mode_of_payment: The mode of payment used for the invoice.
Here is the example provided with explanations:
example:{sample1}
This schema provides a comprehensive view of the data structure for multiple invoices in MongoDB,
use the below sample_examples to generate your queries perfectly
sample_example:

Below are several sample user questions related to the MongoDB document provided,
and the corresponding MongoDB aggregation pipeline queries that can be used to fetch the desired data.
Use them wisely.

sample_question: {sample}
As an expert you must use them whenever required.
Note: You have to just return the query nothing else. Don't return any additional text with the query.Please follow this strictly
input:{input}
output:
10 changes: 10 additions & 0 deletions tutorial64/qsn.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
write all brand names
write all brand names and their corresponding product count
write all product names for Samsung
write all unique product names for Microsoft
write all unique product names for Microsoft and their price
write all unique product names for Microsoft and their price
which brand genrerated maximum sales
which product ordered maximum times
which product ordered maximum number of units in a singhle invoice?
for which user maximum number of invoices generated? write all the invoice numbers
82 changes: 82 additions & 0 deletions tutorial64/response_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import streamlit as st
from pymongo import MongoClient
import urllib,io,json
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm=ChatOpenAI(model="gpt-4o",temperature=0.0)
#mongo client
#mongo client
username="ronidas"
pwd="t2HKvnxjL38QGV3D"
client=MongoClient("mongodb+srv://"+urllib.parse.quote(username)+":"+urllib.parse.quote(pwd)+"@cluster0.lymvb.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db=client["invoice"]
collection=db["invoice"]


with io.open("sample.txt","r",encoding="utf-8")as f1:
sample=f1.read()
f1.close()
with io.open("sample1.txt","r",encoding="utf-8")as f1:
sample1=f1.read()
f1.close()

prompt="""
you are a very intelligent AI assitasnt who is expert in identifying relevant questions from user
and converting into nosql mongodb agggregation pipeline query.
Note: You have to just return the query as to use in agggregation pipeline nothing else. Don't return any other thing
Please use the below schema to write the mongodb queries , dont use any other queries.
schema:
the mentioned mogbodb collection talks about invoivces for a company. The schema for this document represents the structure of the data, describing various informations related to the _id, invoice_number, invoice_date, customer_name, product.brand, product.item, product.unit, product.single_unit_price, product.all_unit_price, total_price, mode_of_payment
your job is to get python code for the user question
Here’s a breakdown of its schema with descriptions for each field:
1. _id: Unique identifier for the document, represented as an ObjectId.
2. invoice_number: A unique number assigned to the invoice.
3. invoice_date: The date when the invoice was issued.
4. customer_name: The name of the customer to whom the invoice is issued.
5. product: A list of products included in the invoice.
- brand: The brand of the product.
- item: The name or model of the product.
- unit: The number of units of the product being invoiced, represented as an integer.
- single_unit_price: The price of a single unit of the product, represented as an integer.
- all_unit_price: The total price for all units of this product, represented as an integer.Will be used for brand or product specific revenue
6. total_price: The total price of all products included in the invoice, represented as an integer.
7. mode_of_payment: The mode of payment used for the invoice.
Here is the example provided with explanations:
example:{sample1}
This schema provides a comprehensive view of the data structure for multiple invoices in MongoDB,
use the below sample_examples to generate your queries perfectly
sample_example:
Below are several sample user questions related to the MongoDB document provided,
and the corresponding MongoDB aggregation pipeline queries that can be used to fetch the desired data.
Use them wisely.
sample_question: {sample}
As an expert you must use them whenever required.
Note: You have to just return the query nothing else. Don't return any additional text with the query.Please follow this strictly
input:{input}
output:
"""
def genresponse(input):
query_with_prompt=PromptTemplate(
template=prompt,
input_variables=["input","sample","sample1"]
)
llmchain=LLMChain(llm=llm,prompt=query_with_prompt,verbose=True)
response=llmchain.invoke({
"input":input,
"sample":sample,
"sample1":sample1
})
data=response["text"]
data=data.replace("json","")
data=data.replace("`","")
data=data.replace(".$numberInt","")
query=json.loads(data)
results=collection.aggregate(query)
return results

51 changes: 51 additions & 0 deletions tutorial64/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
Question 1: What is the total price of invoices issued to "Susan Davis"?

Query:
json
[
{ "$match": { "customer_name": "Susan Davis" } },
{ "$group": { "_id": "$customer_name", "total_price": { "$sum": "$total_price.$numberInt" } } },
{ "$project": { "_id": 0, "total_price": 1 } }
]


Question 2: List all products bought by "Susan Davis" along with their brands and item names.

Query:
json
[
{ "$match": { "customer_name": "Susan Davis" } },
{ "$unwind": "$product" },
{ "$project": { "_id": 0, "brand": "$product.brand", "item": "$product.item" } }
]


Question 3: How many units of each product were bought in total?

Query:
json
[
{ "$unwind": "$product" },
{ "$group": { "_id": { "brand": "$product.brand", "item": "$product.item" }, "total_units": { "$sum": { "$toInt": "$product.unit.$numberInt" } } } },
{ "$project": { "_id": 0, "brand": "$_id.brand", "item": "$_id.item", "total_units": 1 } }
]


Question 4: Find the total revenue from each brand of products.

Query:
json
[
{ "$unwind": "$product" },
{ "$group": { "_id": "$product.brand", "total_revenue": { "$sum": { "$toInt": "$product.all_unit_price.$numberInt" } } } },
{ "$project": { "_id": 0, "brand": "$_id", "total_revenue": 1 } }
]


Question 5: List all invoices with their invoice numbers and total prices.

Query:
json
[
{ "$project": { "_id": 0, "invoice_number": 1, "total_price": { "$toInt": "$total_price.$numberInt" } } }
]
24 changes: 24 additions & 0 deletions tutorial64/sample1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"_id": {"$oid": "664495848b7d3c4a165be667"}, // Unique identifier for the document, represented as an ObjectId.
"invoice_number": "2024-020", // A unique number assigned to the invoice.
"invoice_date": "2024-12-02", // The date when the invoice was issued.
"customer_name": "James Brown", // The name of the customer to whom the invoice is issued.
"product": [ // A list of products included in the invoice.
{
"brand": "HP", // The brand of the product.
"item": "Envy 15", // The name or model of the product.
"unit": {"$numberInt": "3"}, // The number of units of the product being invoiced, represented as an integer using the $numberInt type.
"single_unit_price": {"$numberInt": "1534"}, // The price of a single unit of the product, represented as an integer using the $numberInt type.
"all_unit_price": {"$numberInt": "4602"} // The total price for all units of this product, represented as an integer using the $numberInt type.
},
{
"brand": "Microsoft", // The brand of the product.
"item": "Surface Laptop 4", // The name or model of the product.
"unit": {"$numberInt": "3"}, // The number of units of the product being invoiced, represented as an integer using the $numberInt type.
"single_unit_price": {"$numberInt": "1241"}, // The price of a single unit of the product, represented as an integer using the $numberInt type.
"all_unit_price": {"$numberInt": "3723"} // The total price for all units of this product, represented as an integer using the $numberInt type.
}
],
"total_price": {"$numberInt": "8325"}, // The total price of all products included in the invoice, represented as an integer using the $numberInt type.
"mode_of_payment": "UPI" // The mode of payment used for the invoice.
}
11 changes: 11 additions & 0 deletions tutorial64/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pymongo import MongoClient
import urllib

username="ronidas"
pwd="okZAaW0eTqKqfCwh"
client=MongoClient("mongodb+srv://"+urllib.parse.quote(username)+":"+urllib.parse.quote(pwd)+"@cluster0.lymvb.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db=client["invoice"]
collection=db["invoice"]
result = collection.aggregate([{'$unwind': '$product'}, {'$group': {'_id': '$product.item', 'price': {'$first': '$product.single_unit_price.$numberInt'}}}, {'$project': {'_id': 0, 'product_name': '$_id', 'price': 1}}])
for doc in result:
print(doc)
Binary file added tutorial64/tutorial64.pptx
Binary file not shown.

0 comments on commit 4af8c26

Please sign in to comment.