-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
92 lines (71 loc) · 2.47 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from src.funcs import create_model, create_vectordb, create_conv_chain
from src.funcs import StopGenerationCriteria
from transformers import pipeline
from transformers import AutoTokenizer
from transformers import StoppingCriteriaList
# Parameters to tune
NUM_SAVED_MESSAGES = 6
MAX_LENGTH = 2048
# Choose model
model_src = "tiiuae/falcon-7b-instruct"
cache_dir = "models/"
# Loading model
model_4bit = create_model(model_src, cache_dir)
# Making tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_src, cache_dir=cache_dir)
# Making criteria for stopping model from rambling or imagining
stop_tokens = [["Human", ":"], ["AI", ":"], ["User", ":"]]
stopping_criteria = StoppingCriteriaList(
[StopGenerationCriteria(stop_tokens, tokenizer, model_4bit.device)]
)
# Custom Prompt
template = """
You are a friendly AI assistant currently nicknamed "Falcon" who is helping the user accomplish his tasks,
and answers his questions informatively.
Current conversation:
{history}
Human: {input}
AI:""".strip()
# Pipeline for models
pipe = pipeline(
"text-generation",
model=model_4bit,
tokenizer=tokenizer,
use_cache=True,
device_map="auto",
stopping_criteria=stopping_criteria, # Criteria
max_length=MAX_LENGTH,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
# Making conversation chain
chain = create_conv_chain(
template=template,
num_saved_mes=NUM_SAVED_MESSAGES,
pipe=pipe
)
# Making Vector Database
vectordb = create_vectordb()
print(f"\nConversation started with Falcon. Type 'quit' to stop conversation.\n")
# Entering chat with Falcon
while True:
user_input = input("\nYou: ")
if user_input.lower() == "quit":
break
db_search = vectordb.similarity_search_with_relevance_scores(user_input)
search = db_search[0][0]
score = db_search[0][1]
print(f"This is the doc: {search}\n This is the relevance score {score}")
response = chain(user_input)["response"]
# Processing response to remove stop word
response = response.replace("\nUser","").replace("\nHuman:","")
print("Falcon: ", response)
# Extract the conversation history from the chain object
conversation_history = chain.memory.buffer
# Save the conversation history to a text file
with open("conversation_history.txt", "w") as file:
file.writelines(conversation_history)
print("\nSession ended")