From 46877183c6c01909c3e56cc02e41b3768c36e22f Mon Sep 17 00:00:00 2001 From: Viraj Malia <96500051+virajmalia@users.noreply.github.com> Date: Wed, 5 Jun 2024 20:04:21 -0700 Subject: [PATCH] Enable search prompt using ddg (#16) * Enable search prompt using ddg * update readme and project deps --- README.md | 30 +++++++++++++++--------------- pyproject.toml | 1 - src/llama4u.py | 9 +++++++++ src/sources/search.py | 37 ------------------------------------- 4 files changed, 24 insertions(+), 53 deletions(-) delete mode 100755 src/sources/search.py diff --git a/README.md b/README.md index dec937d..8e0c9b5 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,26 @@ # The Llama4U Project [![Python application](https://github.com/virajmalia/llama4u/actions/workflows/CI.yml/badge.svg)](https://github.com/virajmalia/llama4u/actions/workflows/CI.yml) -## Vision -Develop a free and open source, fully-featured AI solution with agents. +Llama4U is a privacy-focused AI assistant developed using [Ollama][1], [LangChain][2] and [Llama3][3]. A completely free AI solution that can be hosted locally, while providing online capabilities in a responsible and user-controllable way. + +#### *APIs that have usage limitations or require keys to be registered with an online account won't be added to this project.* + +## Steps to run +1. Host `llama3` model from [Ollama][1] on your computer. +2. Clone this repository. +3. `pip install -e .` +4. `llama4u` + +`llama4u --help` for full CLI. + +## List of chat commands + +- `/search`: Perform online search using DuckDuckGo ## Current motivations for feature-set - Perplexity AI - ChatGPT/GPT4o -## Rule -- APIs that have usage limitations or require keys to be registered with an online account won't be added to this project. - ## System requirements - Powerful CPU or Nvidia GPU (>=8G VRAM) - Ubuntu 22.04 @@ -32,16 +42,6 @@ fi echo $CUDACXX && $CUDACXX --version ``` -## Steps to run -1. Host `llama3` model from [Ollama][1] on your computer -2. `pip install -e .` -3. `llama4u` - -`llama4u --help` for full CLI - -## Description -Llama4U is an AI assistant developed using [Ollama][1], [LangChain][2] and [Llama3][3]. A completely free AI solution that can be hosted locally, while providing online capabilities in a responsible and user-controllable way. - ## Credits - Meta, for the open source Llama models - Ollama diff --git a/pyproject.toml b/pyproject.toml index 8c7ee45..c53f7f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,6 @@ dependencies = [ "langchain-core", "langchain-community", "langchain-chroma", - "duckduckgo_search", "termcolor" ] diff --git a/src/llama4u.py b/src/llama4u.py index c4218cf..0efda92 100755 --- a/src/llama4u.py +++ b/src/llama4u.py @@ -3,6 +3,7 @@ from termcolor import colored from langchain_community.chat_models.ollama import ChatOllama from langchain_community.chat_message_histories.in_memory import ChatMessageHistory +from langchain_community.tools.ddg_search.tool import DuckDuckGoSearchRun from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.runnables.history import RunnableWithMessageHistory from input.input import parse_arguments @@ -46,9 +47,17 @@ def get_session_history(self, session_id): async def chat_session(self): """ Chat session with history """ while True: + # Get input print(colored('>>> ', 'yellow'), end="") user_prompt = input() + # Redirect search queries + if user_prompt.startswith("/search"): + search_results = DuckDuckGoSearchRun().run(user_prompt.replace("/search", "")) + user_prompt = \ + f"Summarize the following search results as if you are answering:{search_results}" + + # Invoke chain response = self.with_msg_history.invoke( {"input": user_prompt}, config={"configurable": {"session_id": "abc123"}}, diff --git a/src/sources/search.py b/src/sources/search.py deleted file mode 100755 index 37cc2b6..0000000 --- a/src/sources/search.py +++ /dev/null @@ -1,37 +0,0 @@ -""" search.py for Llama4U """ -from duckduckgo_search import DDGS -from langchain_chroma import Chroma -from langchain_community.document_loaders import WebBaseLoader -from langchain_community.embeddings import HuggingFaceEmbeddings -from langchain_text_splitters import RecursiveCharacterTextSplitter - -class Search(): - """ Search class to perform online search using DuckDuckGo """ - def __init__(self, query_str): - self.query = query_str - self.embedding = HuggingFaceEmbeddings(model_name='multi-qa-MiniLM-L6-cos-v1') - # Split the output to keep small sized chunks - self.splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) - - # Get DuckDuckGo search results - results = DDGS().text(self.query, max_results=3) - urls = [] - for res in results: - urls.append(res.get('href')) - - # Create an instance of WebBaseLoader - loader = WebBaseLoader(urls) - self.data = loader.load() - self.data_split = self.splitter.split_documents(self.data) - - # Create a VectorDB from the DDG search results - self.vectordb = Chroma.from_documents(documents=self.data_split, embedding=self.embedding) - - def retrieve(self, db_query): - """ Retrieve results of the search operation from the vectordb """ - # Use the vectorDB as a retriever - retriever = self.vectordb.as_retriever() - - # Query the vectorDB - docs = retriever.invoke(db_query) - return docs[0].page_content