forked from EswarDivi/DocuConverse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTalkwithpdf.py
191 lines (153 loc) · 5.77 KB
/
Talkwithpdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# Import Required Libraries
__import__("pysqlite3")
import sys
sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
import os
import streamlit as st
from streamlit_chat import message
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.embeddings import CohereEmbeddings
from langchain.prompts import PromptTemplate
from langchain.llms import Cohere
from datetime import datetime
# Setting Up API Tokens
# Create .streamlit Folder in Root Directory
# Create a File secrets.toml
# TOML format
# cohere_apikey="Enter you Key"
# Setting Up Streamlit Page
st.set_page_config(page_title="Chat With PDF", page_icon=":smile:")
# Creating Temp Folder
if not os.path.exists("./tempfolder"):
os.makedirs("./tempfolder")
# tabs
tab1, tab2 = st.tabs(["📈 Chat Here", "🗃 Relevant Chunks"])
tab1.markdown(
"""
<h1 style='text-align: center;'>Chat With PDF</h1>
<h4 style='text-align: center;'>Powered by Cohere</h4>
<p style='text-align: center;'>For uninterrupted usage, visit the <a href='https://huggingface.co/spaces/eswardivi/ChatwithPdf' target='_blank'>HuggingFace Space</a></p>
""",
unsafe_allow_html=True,
)
# Saving Upload file to tempfolder
def save_uploadedfile(uploadedfile):
with open(
os.path.join("tempfolder", uploadedfile.name),
"wb",
) as f:
f.write(uploadedfile.getbuffer())
return st.sidebar.success("Saved File")
# Creating Sidebar for Utilites
with st.sidebar:
st.title("Upload PDF")
st.write("For any Queries, please feel free to contact")
st.write("Email: [[email protected]](mailto:[email protected])")
st.write("GitHub: [github.com/EswarDivi](https://github.com/EswarDivi)")
uploaded_file = st.file_uploader("Choose a file", type=["pdf"])
temp_r = st.slider("Temperature", 0.1, 0.9, 0.45, 0.1)
chunksize = st.slider("Chunk Size for Splitting Document ", 256, 1024, 400, 10)
clear_button = st.button("Clear Conversation", key="clear")
# Initialzing Text Splitter
text_splitter = CharacterTextSplitter(chunk_size=chunksize, chunk_overlap=10)
# Intializing Cohere Embdedding
embeddings = CohereEmbeddings(model="large", cohere_api_key=st.secrets["cohere_apikey"])
def PDF_loader(document):
loader = OnlinePDFLoader(document)
documents = loader.load()
prompt_template = """
System Prompt:
Your are an AI chatbot that helps users chat with PDF documents. How may I help you today?
{context}
{question}
"""
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}
texts = text_splitter.split_documents(documents)
global db
db = Chroma.from_documents(texts, embeddings)
retriever = db.as_retriever()
global qa
qa = RetrievalQA.from_chain_type(
llm=Cohere(
model="command-xlarge-nightly",
temperature=temp_r,
cohere_api_key=st.secrets["cohere_apikey"],
),
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
chain_type_kwargs=chain_type_kwargs,
)
return "Ready"
if uploaded_file is not None:
save_uploadedfile(uploaded_file)
file_size = os.path.getsize(f"tempfolder/{uploaded_file.name}") / (
1024 * 1024
) # Size in MB
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"[{current_time}] Uploaded PDF: {file_size} MB")
PDF_loader("tempfolder/" + uploaded_file.name)
tab1.markdown(
"<h3 style='text-align: center;'>Now You Are Chatting With "
+ uploaded_file.name
+ "</h3>",
unsafe_allow_html=True,
)
# Session State
if "chat_history" not in st.session_state:
st.session_state["chat_history"] = []
if "generated" not in st.session_state:
st.session_state["generated"] = []
if "past" not in st.session_state:
st.session_state["past"] = []
# Generating Response
def generate_response(query):
result = qa({"query": query, "chat_history": st.session_state["chat_history"]})
tab2.markdown(
"<h3 style='text-align: center;'>Relevant Documents Metadata</h3>",
unsafe_allow_html=True,
)
tab2.write(result["source_documents"])
result["result"] = result["result"]
return result["result"]
# Creating Containers
response_container = tab1.container()
container = tab1.container()
with container:
with st.form(key="my_form", clear_on_submit=True):
user_input = st.text_input("You:", key="input")
submit_button = st.form_submit_button(label="Send")
if user_input and submit_button:
if uploaded_file is not None:
output = generate_response(user_input)
print(output)
st.session_state["past"].append(user_input)
st.session_state["generated"].append(output)
st.session_state["chat_history"] = [(user_input, output)]
else:
st.session_state["past"].append(user_input)
st.session_state["generated"].append(
"Please go ahead and upload the PDF in the sidebar, it would be great to have it there."
)
if st.session_state["generated"]:
with response_container:
for i in range(len(st.session_state["generated"])):
message(
st.session_state["past"][i],
is_user=True,
key=str(i) + "_user",
avatar_style="adventurer",
seed=123,
)
message(st.session_state["generated"][i], key=str(i))
# Enabling Clear button
if clear_button:
st.session_state["generated"] = []
st.session_state["past"] = []
st.session_state["chat_history"] = []