forked from DonTizi/ReMind
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathswift.py
170 lines (141 loc) · 6.96 KB
/
swift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import logging
from pathlib import Path
from flask import Flask, request, jsonify
from flask_socketio import SocketIO
from flask_cors import CORS
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
from langchain.schema import Document
from datetime import datetime, timedelta
import json
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
app = Flask(__name__)
CORS(app)
socketio = SocketIO(app, cors_allowed_origins="*")
base_dir = Path.home() / 'Library' / 'Application Support' / 'RemindEnchanted'
persist_directory = base_dir / 'vectoreDB'
persist_directory.mkdir(parents=True, exist_ok=True)
embedding_model = OllamaEmbeddings(model='nomic-embed-text')
llm = Ollama(model="llama3.1")
vectorstore = None
retriever = None
qa_chain = None
def initialize_vectorstore():
global vectorstore, retriever, qa_chain
try:
if persist_directory.exists():
logging.info("Loading existing vector store")
vectorstore = Chroma(persist_directory=str(persist_directory), embedding_function=embedding_model)
logging.info("Existing vector store loaded successfully")
else:
logging.info("Creating new vector store")
vectorstore = Chroma(embedding_function=embedding_model, persist_directory=str(persist_directory))
logging.info("New vector store created successfully")
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
logging.info("Retriever and QA chain initialized successfully")
except Exception as e:
logging.error(f"Error initializing vector store: {e}", exc_info=True)
def add_new_document(text, metadata):
global vectorstore
try:
doc = Document(page_content=text, metadata=metadata)
vectorstore.add_documents([doc])
vectorstore.persist()
logging.info("New document added to the vector store")
except Exception as e:
logging.error(f"Error adding new document: {e}", exc_info=True)
def classify_question(question):
prompt = f"""Determine if the following question requires searching a knowledge base about the user's personal information and activities, or if it can be answered with general knowledge.
Question: {question}
If the question is about the user's personal information, activities, or anything that would require searching a personal knowledge base, respond with "SEARCH_REQUIRED".
If the question can be answered with general knowledge or does not require personal information, respond with "GENERAL_KNOWLEDGE".
Response:"""
response = llm(prompt)
return "SEARCH_REQUIRED" in response
def determine_time_range(question):
prompt = f"""Analyze the following question and determine the time range it refers to.
Question: {question}
Respond with one of the following:
- TODAY: if the question refers to today or the current day
- YESTERDAY: if the question refers to yesterday
- WEEK: if the question refers to the current week or the past 7 days
- MONTH: if the question refers to the current month or the past 30 days
- ALL: if no specific time range is mentioned or if it refers to a longer period
Response:"""
response = llm(prompt).strip().upper()
return response if response in ["TODAY", "YESTERDAY", "WEEK", "MONTH"] else "ALL"
def get_date_range(time_range):
today = datetime.now().date()
if time_range == "TODAY":
return today, today
elif time_range == "YESTERDAY":
yesterday = today - timedelta(days=1)
return yesterday, yesterday
elif time_range == "WEEK":
start_of_week = today - timedelta(days=today.weekday())
return start_of_week, today
elif time_range == "MONTH":
start_of_month = today.replace(day=1)
return start_of_month, today
else:
return None, None # For "ALL" or undefined ranges
def filter_documents_by_date(docs, start_date, end_date):
if start_date is None or end_date is None:
return docs
return [
doc for doc in docs
if start_date <= datetime.strptime(doc.metadata.get('date', '1970-01-01'), '%Y-%m-%d').date() <= end_date
]
@app.route('/')
def index():
return "Chat application is running!"
@app.route('/add', methods=['POST'])
def add_document():
data = request.json
text = data.get('text')
metadata = data.get('metadata', {})
if not text:
return jsonify({"status": "error", "message": "No text provided"}), 400
# Ensure the metadata includes the current date
metadata['date'] = datetime.now().strftime('%Y-%m-%d')
add_new_document(text, metadata)
return jsonify({"status": "success", "message": "Document added successfully"}), 200
@app.route('/query', methods=['POST'])
def query_endpoint():
if not qa_chain or not llm:
return jsonify({"status": "error", "message": "QA chain or LLM not initialized. Check server logs for details."}), 500
query_text = request.json.get('query')
try:
if classify_question(query_text):
# Question requires searching the knowledge base
time_range = determine_time_range(query_text)
start_date, end_date = get_date_range(time_range)
relevant_docs = retriever.invoke(query_text)
filtered_docs = filter_documents_by_date(relevant_docs, start_date, end_date)
if filtered_docs:
# Generate a response that interprets what you might have been doing
context = "\n".join([doc.page_content for doc in filtered_docs])
ai_response = qa_chain.run(f"Given the following context: \n\n{context}\n\n Answer the question: '{query_text}' by interpreting what the user was likely doing with this information. Answer specifically the question about the specific subject with the context you have and nothing else.")
ai_response = ai_response
else:
ai_response = f"I couldn't find any relevant information for the specified time range ({time_range.lower()} - {start_date} to {end_date}). Could you please rephrase your question or specify a different time range?"
context = [doc.page_content for doc in filtered_docs]
else:
# Question can be answered with general knowledge
ai_response = llm(query_text)
context = []
return jsonify({
"status": "success",
"results": [ai_response],
"context": context
}), 200
except Exception as e:
logging.error(f"Error processing query: {e}")
return jsonify({"status": "error", "message": "An error occurred while processing the query."}), 500
if __name__ == '__main__':
initialize_vectorstore()
logging.info("Server is starting...")
socketio.run(app, host='0.0.0.0', port=8005, debug=False)