Giveth · aminlatifi · Feb 23, 2025 · Feb 20, 2025 · Feb 22, 2025 · Feb 22, 2025
diff --git a/requirements.txt b/requirements.txt
@@ -8,13 +8,15 @@ attrs==25.1.0
 autopep8==2.3.2
 beautifulsoup4==4.13.3
 black==25.1.0
+blinker==1.9.0
 certifi==2025.1.31
 charset-normalizer==3.4.1
 click==8.1.8
 dataclasses-json==0.6.7
 dill==0.3.9
 distro==1.9.0
 filelock==3.17.0
+Flask==3.1.0
 frozenlist==1.5.0
 fsspec==2025.2.0
 h11==0.14.0
@@ -24,16 +26,20 @@ httpx-sse==0.4.0
 huggingface-hub==0.28.1
 idna==3.10
 isort==6.0.0
+itsdangerous==2.2.0
+Jinja2==3.1.5
 jiter==0.8.2
 jsonpatch==1.33
 jsonpointer==3.0.0
 langchain==0.3.18
 langchain-community==0.3.17
 langchain-core==0.3.34
+langchain-experimental==0.3.4
 langchain-neo4j==0.3.0
 langchain-openai==0.3.4
 langchain-text-splitters==0.3.6
 langsmith==0.3.5
+MarkupSafe==3.0.2
 marshmallow==3.26.1
 mccabe==0.7.0
 multidict==6.1.0
@@ -72,5 +78,6 @@ typing-inspect==0.9.0
 typing_extensions==4.12.2
 urllib3==2.3.0
 uuid==1.30
+Werkzeug==3.1.3
 yarl==1.18.3
 zstandard==0.23.0
diff --git a/src/chunking.py b/src/chunking.py
@@ -8,9 +8,13 @@ def generate_chunk_uuid(chunk_text):
 
 
 def chunk_text(content, chunk_size=512, chunk_overlap=50):
+    if not content:
+        return []
+
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=chunk_size, chunk_overlap=chunk_overlap
     )
+
     return text_splitter.split_text(content)
 
 

diff --git a/src/cypher_query.py b/src/cypher_query.py
@@ -1,3 +1,4 @@
+import re
 from utils.openai import generate_embedding, openai_client
 from neo4j_utils import get_neo4j_driver
 import json
@@ -21,9 +22,12 @@ def check_if_embedding_needed(request, schema_hint):
     END OF THE QUERY
 
     By looking at the query, I want you to infer whether any semantic search is needed or not.
-    If needed, please provide a message that can be used to generate an embedding.
+    I mean should I should search for project chunk with similar meaning to the query or not.
+    If it's needed, please provide a message that can be used to generate an embedding.
     For example, if the query asks "provide me 2 projects related to climate change impact on renewable energy", you must provide a message like "climate change impact on renewable energy" as the embedding message.
-    But if the query is like "provide me 2 projects", then no embedding is needed.
+    But if the intention is  random projects or random donations, then no embedding is needed, and you should return {{"embedding_needed": "False"}}.
+
+
 
     Now, please tell me does the query need an embedding? Respond strictly in this JSON format:
     {{
@@ -38,6 +42,8 @@ def check_if_embedding_needed(request, schema_hint):
         temperature=0.3,
     )
     result = response.choices[0].text.strip()
+    result = re.sub(r'("embedding_needed": )false', r"\1False", result)
+    result = re.sub(r'("embedding_needed": )true', r"\1True", result)
     print(f"Embedding Check Result: {result}")
     return eval(result)  # Convert the JSON-like string to a dictionary
 
@@ -81,7 +87,8 @@ def generate_neo4j_query(request, schema_hint, embedding_message=None, embedding
     # Replace the similarity function
     cypher_query = response.choices[0].text.strip()
     cypher_query = cypher_query.replace(
-        "gds.alpha.similarity.cosine", "gds.similarity.cosine")
+        "gds.alpha.similarity.cosine", "gds.similarity.cosine"
+    )
     cypher_query = cypher_query.replace("gds.alpha.pageRank", "gds.pageRank")
     return cypher_query
 
@@ -99,19 +106,21 @@ def process_user_request(request, schema_hint):
         embedding_message = embedding_check["embedding_message"]
         # Your embedding generation function
         embedding = generate_embedding(embedding_message)
-        print(
-            f"Generated Embedding for '{embedding_message}': {embedding[:5]}...")
+        print(f"Generated Embedding for '{embedding_message}': {embedding[:5]}...")
     else:
         embedding_message = None
         embedding = None
 
     # Step 3: Generate the Cypher query
     cypher_query = generate_neo4j_query(
-        request, schema_hint, embedding_message=embedding_message, embedding=embedding)
+        request, schema_hint, embedding_message=embedding_message, embedding=embedding
+    )
     print(f"Generated Cypher Query: {cypher_query}")
 
     if embedding:
-        parameters = {"queryVector": embedding, }  # Pass query embedding
+        parameters = {
+            "queryVector": embedding,
+        }  # Pass query embedding
     else:
         parameters = {}
 
@@ -130,20 +139,25 @@ def execute_cypher_query(cypher_query, parameters):
 
 schema_hint = """
 Neo4j Schema:
-Node labels: Project, Chunk
-Relationships: Project -> Chunk (:HAS_CHUNK)
-Project properties: id, title, raised_amount, giv_power, listed
+Node labels: Project, Chunk, Donation
+Relationships: Project -> Chunk (:HAS_CHUNK), Project -> Donation (:HAS_DONATION)
+Project properties: id, title, raised_amount, giv_power, given_power_rank, givbacks_eligible, in_active_qf_round, unique_donors, owner_wallet, ethereum_address, polygon_address, optimism_address, celo_address, base_address, arbitrum_address, gnosis_address, zkevm_address, ethereum_classic_address, stellar_address, solana_address, x, facebook, instagram, youtube, linkedin, reddit, discord, farcaster, lens, website, telegram, github, listed
 Chunk properties: id, text, embedding, created_at
+Donation properties: id, tx_hash, chain_id, project_title, created_at, amount, value_usd
 Chunks are generated by splitting the description of a project.
 """
 user_request = {
-    'query': "I want to hear about projects impact kids health",
-    'output_format': "{project_id, project_title, raised_amount, giv_power, related_chunks: [text]}"
+    "query": "I want to hear about projects impact kids health",
+    "output_format": "{project_id, project_title, raised_amount, giv_power, giv_power_rank, givbacks_eligible, in_active_qf_round, unique_donors, owner_wallet, ethereum_address, polygon_address, optimism_address, celo_address, base_address, arbitrum_address, gnosis_address, zkevm_address, ethereum_classic_address, stellar_address, solana_address, x, facebook, instagram, youtube, linkedin, reddit, discord, farcaster, lens, website, telegram, github, related_chunks: [text] (array)}",
 }
-# results = process_user_request(schema_hint=schema_hint, request=user_request)
-print('#######################')
-
-# print(json.dumps(results, indent=4))
+# user_request = {
+#     "query": "5 random donations with value more than 100$",
+#     "output_format": "{tx_hash, chain_id, project_title(project.title), created_at, amount, value_usd}",
+# }
+results = process_user_request(schema_hint=schema_hint, request=user_request)
+print("#######################")
+
+print(json.dumps(results, indent=4))
 
 # from langchain_neo4j import GraphCypherQAChain, Neo4jGraph
 # from langchain_openai import ChatOpenAI