diff --git a/psl_proof/__main__.py b/psl_proof/__main__.py index 52e56a5..ba09677 100644 --- a/psl_proof/__main__.py +++ b/psl_proof/__main__.py @@ -16,9 +16,10 @@ def load_config() -> Dict[str, Any]: """Load proof configuration from environment variables.""" config = { - 'dlp_id': 1234, # Set your own DLP ID here + 'dlp_id': 16 # MOKSHA 'input_dir': INPUT_DIR, - 'salt': 'replace-this-salt' # TODO: replace this so that we can salt in a better way + 'salt': '5EkntCWI' + 'validator_base_api_url': 'https://api.vana.genesis.dfusion.ai' } logging.info(f"Using config: {json.dumps(config, indent=2)}") return config @@ -39,7 +40,7 @@ def run() -> None: output_path = os.path.join(OUTPUT_DIR, "results.json") with open(output_path, 'w') as f: json.dump(proof_response.dict(), f, indent=2) - logging.info(f"Proof generation complete: {proof_response}") + #logging.info(f"Proof generation complete: {proof_response}") def extract_input() -> None: diff --git a/psl_proof/models/cargo_data.py b/psl_proof/models/cargo_data.py index 29c147e..8f9442f 100644 --- a/psl_proof/models/cargo_data.py +++ b/psl_proof/models/cargo_data.py @@ -10,7 +10,7 @@ # Enum for DataSource class DataSource(Enum): - telegram = 1 + telegram = 0 # Source Chat Data @dataclass @@ -24,31 +24,36 @@ class SourceChatData: chat_start_on: datetime = None chat_ended_on: datetime = None + def chat_id_as_key(self) -> str : + return str(self.chat_id) + def timeliness_value(self) -> float: if self.total_content_length == 0: return 0 # tav = (𝛴 litsi) / (𝛴 li) - time_avg = self.total_content_value / self.total_content_length + time_avg = float(self.total_content_value) / float(self.total_content_length) # a = ln(2) / thl - half_life = 60 # 60 minutes + half_life = 600.0 # 600 minutes time_decay = math.log(2) / half_life # t = exp(-atav) return math.exp(- time_decay * time_avg) # range 0 to 1 def thoughtfulness_of_conversation(self) -> float: n = len(self.participants) # n: number of participants - u = 2 # 𝜇: optimal number of participants - d = 1 # 𝜎: standard deviation of the curve + if n is 1: + return 0.0 + u = 3.0 # 𝜇: optimal number of participants + d = 5.0 # 𝜎: standard deviation of the curve # Formula: p = exp(-(n-𝜇) / (2𝜎^2)) return math.exp(-(n - u) / (2 * d ** 2)) # range 0 to 1 def contextualness_of_conversation(self) -> float: c = self.total_content_length #total token length, c, of the text data - m = 2 #midpoint - k = 1 #key parameters. + m = 2.0 #midpoint + k = 1.0 #key parameters. # l=1/(1+exp(-k(c-c0))) - return 1/(1 + math.exp(-k*(c-m))) + return 1.0/(1.0 + math.exp(-k*(c-m))) def quality_score(self) -> float : a = 1 # factor @@ -83,13 +88,13 @@ def add_content( time_in_minutes = int(time_in_seconds // 60) if (self.chat_start_on): - if (self.chat_start_on < chat_timestamp): + if (self.chat_start_on > chat_timestamp): self.chat_start_on = chat_timestamp else : self.chat_start_on = chat_timestamp if (self.chat_ended_on): - if (self.chat_ended_on > chat_timestamp): + if (self.chat_ended_on < chat_timestamp): self.chat_ended_on = chat_timestamp else : self.chat_ended_on = chat_timestamp @@ -113,29 +118,35 @@ def to_dict(self) -> dict: } def to_submission_json(self) -> dict: + chat_start_on = self.chat_start_on if self.chat_start_on is not None else datetime.now() + chat_ended_on = self.chat_ended_on if self.chat_ended_on is not None else datetime.now() return { - "SourceChatId": self.chat_id, + "SourceChatId": self.chat_id_as_key(), "ParticipantCount": len(self.participants), "ChatCount": self.chat_count, "ChatLength": self.total_content_length, - "ChatStartOn": self.chat_start_on.isoformat() if isinstance(self.chat_start_on, datetime) else str(self.chat_start_on), - "ChatEndedOn": self.chat_ended_on.isoformat() if isinstance(self.chat_ended_on, datetime) else str(self.chat_ended_on), + "ChatStartOn": chat_start_on.isoformat(), + "ChatEndedOn": chat_ended_on.isoformat() } + + # SourceData with enum and chat data @dataclass class SourceData: source: DataSource # "telegram" user: str + submission_token: str submission_id: str submission_by: str submission_date: datetime source_chats: List[SourceChatData] # List of SourceChatData instances - def __init__(self, source, submission_id, submission_by, submission_date, user, source_chats=None): + def __init__(self, source, submission_token, submission_id, submission_by, submission_date, user, source_chats=None): self.source = source self.user = user + self.submission_token = submission_token self.submission_id = submission_id self.submission_by = submission_by self.submission_date = submission_date @@ -152,14 +163,23 @@ def to_dict(self): } def to_submission_json(self) : - return { - "DataSource": self.source.name, # Use .name to convert enum to string + json = { + "DataSource": self.source.value, # Use .name to convert enum to string "SourceId": self.submission_id, + "SubmissionToken": self.submission_token, "SubmittedBy": self.submission_by, - "SubmittedOn": self.submission_date.isoformat() if isinstance(self.submission_date, datetime) else str(self.submission_date), + "SubmittedOn": self.submission_date.isoformat(), "Chats": [source_chat.to_submission_json() for source_chat in self.source_chats] } + #print(f"Submission json:{json}") + return json + def to_verification_json(self) -> dict: + return { + "VerificationType": 0, # VerificationToken. + "Token": self.submission_token, + "Reference": self.submission_id + } # ChatData for Source (final destination data structure) @dataclass @@ -168,16 +188,14 @@ class ChatData: chat_length: int sentiment: Dict[str, Any] = field(default_factory=dict) - keywords_keybert: Dict[str, Any] = field(default_factory=dict) - #keywords_lda: Dict[str, Any] = field(default_factory=dict) + keywords: Dict[str, Any] = field(default_factory=dict) def to_dict(self): return { "chat_id": self.chat_id, "chat_length": self.chat_length, - "sentiment": self.sentiment, # No need to call .to_dict() for dicts - "keywords_keybert": self.keywords_keybert, # Same for other dict fields - #"keywords_lda": self.keywords_lda # Same for other dict fields + "sentiment": self.sentiment, # No need to call .to_dict() for dicts + "keywords": self.keywords, # Same for other dict fields } # CargoData for Source @@ -221,4 +239,4 @@ def to_dict(self): return { "source_id": self.source_id, "dlp_id": self.dlp_id - } \ No newline at end of file + } diff --git a/psl_proof/proof.py b/psl_proof/proof.py index 74cd66a..30b5856 100644 --- a/psl_proof/proof.py +++ b/psl_proof/proof.py @@ -9,19 +9,20 @@ from psl_proof.utils.hashing_utils import salted_data, serialize_bloom_filter_base64, deserialize_bloom_filter_base64 from psl_proof.models.cargo_data import SourceChatData, CargoData, SourceData, DataSource, MetaData, DataSource from psl_proof.utils.validate_data import validate_data - +from psl_proof.utils.submission import submit_data +from psl_proof.utils.verification import verify_token, VerifyTokenResult class Proof: def __init__(self, config: Dict[str, Any]): self.config = config self.proof_response = ProofResponse(dlp_id=config['dlp_id']) - #RL: Proof Data... + def generate(self) -> ProofResponse: """Generate proofs for all input files.""" logging.info("Starting proof data") - zktls_proof = None + data_revision = "01.01" source_data = None for input_filename in os.listdir(self.config['input_dir']): @@ -29,13 +30,7 @@ def generate(self) -> ProofResponse: if os.path.splitext(input_file)[1].lower() == '.json': with open(input_file, 'r') as f: input_data = json.load(f) - #print(f"Input Data: {input_data}") - - if input_filename == 'zktls_proof.json': - zktls_proof = input_data.get('zktls_proof', None) - continue - - elif input_filename == 'chats.json': + if input_filename == 'chats.json': source_data = get_source_data( input_data ) @@ -47,10 +42,17 @@ def generate(self) -> ProofResponse: salt ) source_data.submission_by = source_user_hash_64 - is_data_authentic = get_is_data_authentic( - source_data, - zktls_proof + proof_failed_reason = "" + verify_result = verify_token( + self.config, + source_data ) + is_data_authentic = verify_result + if is_data_authentic: + print(f"verify_result: {verify_result}") + is_data_authentic = verify_result.is_valid + proof_failed_reason = verify_result.error_text + cargo_data = CargoData( source_data = source_data, source_id = source_user_hash_64 @@ -64,17 +66,21 @@ def generate(self) -> ProofResponse: self.proof_response.ownership = 1.0 if is_data_authentic else 0.0 self.proof_response.authenticity = 1.0 if is_data_authentic else 0.0 + current_datetime = datetime.now().isoformat() if not is_data_authentic: #short circuit so we don't waste analysis + print(f"Validation proof failed: {proof_failed_reason}") self.proof_response.score = 0.0 self.proof_response.uniqueness = 0.0 self.proof_response.quality = 0.0 self.proof_response.valid = False self.proof_response.attributes = { 'proof_valid': False, + 'proof_failed_reason': proof_failed_reason, 'did_score_content': False, - 'source': source_data.Source.name, - 'submit_on': current_datetime, + 'source': source_data.source.name, + 'revision': data_revision, + 'submitted_on': current_datetime, 'chat_data': None } self.proof_response.metadata = metadata @@ -93,26 +99,29 @@ def generate(self) -> ProofResponse: and self.proof_response.quality >= score_threshold and self.proof_response.uniqueness >= score_threshold ) - self.proof_response.score = ( + total_score = ( self.proof_response.authenticity * 0.25 + self.proof_response.ownership * 0.25 + self.proof_response.quality * 0.25 + self.proof_response.uniqueness * 0.25 ) - + self.proof_response.score = round(total_score, 2) self.proof_response.attributes = { 'proof_valid': is_data_authentic, 'did_score_content': True, 'source': source_data.source.name, - 'submit_on': current_datetime, + 'revision': data_revision, + 'submitted_on': current_datetime, 'chat_data': cargo_data.get_chat_list_data() } self.proof_response.metadata = metadata - #RL Validate data & obtain unquiness from server - # response = submit_data(source_data)... - #RL Todo... - + #Submit Source data to server + submit_data( + self.config, + source_data + ) + print(f"proof data: {self.proof_response}") return self.proof_response def get_telegram_data( @@ -124,7 +133,7 @@ def get_telegram_data( if chat_type == "message": # Extract user ID chat_user_id = input_content.get("sender_id", {}).get("user_id", "") - print(f"chat_user_id: {chat_user_id}") + #print(f"chat_user_id: {chat_user_id}") source_chat_data.add_participant(chat_user_id) message_date = submission_timestamp @@ -132,6 +141,7 @@ def get_telegram_data( date_value = input_content.get("date", None) if date_value: message_date = datetime.utcfromtimestamp(date_value) # Convert Unix timestamp to datetime + #print(f"message_date: {message_date}") # Extract the message content message = input_content.get('content', {}) @@ -147,16 +157,12 @@ def get_telegram_data( def get_source_data(input_data: Dict[str, Any]) -> SourceData: - revision = input_data.get('revision', '').upper() + revision = input_data.get('revision', '') if (revision and revision != "01.01"): - print(f"Invalid Revision: {revision}") - + raise RuntimeError(f"Invalid Revision: {revision}") - submission_date = datetime.now().timestamp() - # Extract and convert the Unix timestamp to a datetime object - date_value = input_data.get("submission_date", None) - if date_value: - submission_date = datetime.utcfromtimestamp(date_value) # Convert Unix timestamp to datetime + submission_date = datetime.now() + #print(f"submission_date: {submission_date}") input_source_value = input_data.get('source', '').upper() input_source = None @@ -164,9 +170,12 @@ def get_source_data(input_data: Dict[str, Any]) -> SourceData: if input_source_value == 'TELEGRAM': input_source = DataSource.telegram else: - print(f"Unmapped data source: {input_source_value}") + raise RuntimeError(f"Unmapped data source: {input_source_value}") - submission_id = input_data.get('submission_id', '').upper() + submission_token = input_data.get('submission_token', '') + #print("submission_token: {submission_token}") + + submission_id = input_data.get('submission_id', '') input_user = input_data.get('user') #print(f"input_user: {input_user}") @@ -174,6 +183,7 @@ def get_source_data(input_data: Dict[str, Any]) -> SourceData: source_data = SourceData( source=input_source, user=input_user, + submission_token = submission_token, submission_id = submission_id, submission_by = input_user, submission_date = submission_date @@ -198,19 +208,8 @@ def get_source_data(input_data: Dict[str, Any]) -> SourceData: source_chat ) else: - print(f"Unhandled data source: {input_source}") + raise RuntimeError(f"Unhandled data source: {input_source}") source_chats.append( source_chat ) return source_data - - -def get_is_data_authentic(content, zktls_proof) -> bool: - """Determine if the submitted data is authentic by checking the content against a zkTLS proof""" - return 1.0 - -def get_user_submission_freshness(source, user) -> float: - """Compute User Submission freshness""" - #TODO: Get the IPFS data and check the attributes for timestamp of last submission - #TODO: Implement cool-down logic so that there is a cool down for one particular social media account. I.E. someone who just submitted will get a very low number - return 1.0 \ No newline at end of file diff --git a/psl_proof/utils/feature_extraction.py b/psl_proof/utils/feature_extraction.py index 4bc66a9..5300a1d 100644 --- a/psl_proof/utils/feature_extraction.py +++ b/psl_proof/utils/feature_extraction.py @@ -34,7 +34,10 @@ def get_keywords_keybert(chats): # return keywords def get_sentiment_data(chats): - sentiment_analyzer = pipeline("sentiment-analysis", model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") + #Patrick_ToCheck this model do not work... + #sentiment_analyzer = pipeline("sentiment-analysis", model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") + sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment-multilingual") + messages = chats.split(">") #TODO use real way to split out different messages #TODO: make sure no single message is too long for classification, can break it up if length too long sentiments = sentiment_analyzer(messages) @@ -47,4 +50,4 @@ def get_sentiment_data(chats): # Normalize scores by dividing by the total number of messages total_messages = len(messages) normalized_scores = {key: (category_scores[key] / total_messages) for key in category_scores} - return normalized_scores \ No newline at end of file + return normalized_scores diff --git a/psl_proof/utils/submission.py b/psl_proof/utils/submission.py new file mode 100644 index 0000000..da6fbfe --- /dev/null +++ b/psl_proof/utils/submission.py @@ -0,0 +1,96 @@ +from typing import Optional, List, Dict, Any +import requests +import json +from dataclasses import dataclass, field +from datetime import datetime + +from psl_proof.models.cargo_data import SourceData, DataSource +from psl_proof.utils.validation_api import get_validation_api_url + + +@dataclass +class SubmissionChat: + participant_count: int + chat_count: int + chat_length: int + chat_start_on: datetime + chat_ended_on: datetime + +@dataclass +class ChatHistory: + source_chat_id : str + chat_list: List[SubmissionChat] = field(default_factory=list) + + +def get_historical_chats( + config: Dict[str, Any], + source_data: SourceData + ) -> Optional[List[ChatHistory]]: + try: + url = get_validation_api_url( + config, + "api/submissions/historical-chats" + ) + headers = {"Content-Type": "application/json"} + payload = source_data.to_submission_json() + + response = requests.post(url, json=payload, headers=headers) + + if response.status_code == 200: + try: + chat_histories_json = response.json() + + # Map JSON response to ChatHistory objects + chat_histories = [] + for chat_history_data in chat_histories_json: + #print(f"chat_history_data:{chat_history_data}") + chat_list = [ + SubmissionChat( + participant_count=chat.get("participantCount", 0), + chat_count=chat.get("chatCount", 0), + chat_length=chat.get("chatLength", 0), + chat_start_on=datetime.fromisoformat(chat["chatStartOn"]), + chat_ended_on=datetime.fromisoformat(chat["chatEndedOn"]) + ) + for chat in chat_history_data.get("chats", []) + ] + + chat_history = ChatHistory( + source_chat_id=chat_history_data.get("sourceChatId", ""), + chat_list=chat_list + ) + chat_histories.append(chat_history) + + return chat_histories + except ValueError as e: + RuntimeError("Error parsing JSON response:", e) + return None + else: + RuntimeError(f"Validation failed. Status code: {response.status_code}, Response: {response.text}") + return None + + except requests.exceptions.RequestException as e: + RuntimeError("get_historical_chats:", e) + return None + + +def submit_data( + config: Dict[str, Any], + source_data: SourceData +): + try: + url = get_validation_api_url( + config, + "api/submissions/submit-data" + ) + headers = {"Content-Type": "application/json"} + payload = source_data.to_submission_json() + + response = requests.post(url, json=payload, headers=headers) + + if response.status_code != 200: + RuntimeError(f"Submission failed. Status code: {response.status_code}, Response: {response.text}") + + + except requests.exceptions.RequestException as e: + RuntimeError("submit_data:", e) diff --git a/psl_proof/utils/submit_data.py b/psl_proof/utils/submit_data.py deleted file mode 100644 index 8f808a8..0000000 --- a/psl_proof/utils/submit_data.py +++ /dev/null @@ -1,63 +0,0 @@ -from typing import List, Dict, Optional -import requests -from psl_proof.models.cargo_data import SourceData - -class ValidationResponse: - def __init__(self, uniqueness=0): - self.uniqueness = uniqueness - - @classmethod - def from_json(cls, json_data): - if isinstance(json_data, str): - data = json.loads(json_data) - elif isinstance(json_data, dict): - data = json_data - else: - raise ValueError("Invalid JSON data type. Must be a string or dictionary.") - - return cls(uniqueness=data.get('Uniqueness', 0)) - - -# Define the URL of the web service -topics_url = "https://6f6f-169-0-170-105.ngrok-free.app" # Replace with your API endpoint - - -def validate_data(source_data: SourceData) -> Optional[ValidationResponse]: - try: - url = f"{topics_url}/api/validations/validate" - headers = {"Content-Type": "application/json"} - payload = source_data.get_submission_json() - response = requests.post(url, data=payload, headers=headers) - - if response.status_code == 200: - jsondata = response.json() - result = ValidationResponse.from_json(jsondata) - print("Validate data successfully:", result) - return result - else: - print(f"Failed to Validate Data. Status code: {response.status_code}") - return None - - except requests.exceptions.RequestException as e: - print("An error occurred:", e) - return None - -def submit_data(source_data: SourceData): - try: - url = f"{topics_url}/api/validations/submit" - headers = {"Content-Type": "application/json"} - payload = source_data.get_submission_json() - response = requests.post(url, data=payload, headers=headers) - - if response.status_code == 200: - jsondata = response.json() - result = ValidationResponse.from_json(jsondata) - print("Submission Data successfully:", result) - return result - else: - print(f"Failed to Submission Data. Status code: {response.status_code}") - return None - - except requests.exceptions.RequestException as e: - print("An error occurred:", e) - return None \ No newline at end of file diff --git a/psl_proof/utils/validate_data.py b/psl_proof/utils/validate_data.py index c357c7e..988c622 100644 --- a/psl_proof/utils/validate_data.py +++ b/psl_proof/utils/validate_data.py @@ -2,37 +2,41 @@ from psl_proof.models.proof_response import ProofResponse from typing import List, Dict, Any from psl_proof.utils.feature_extraction import get_sentiment_data, get_keywords_keybert #, get_keywords_lda -#from utils.submit_data import validate_data, submit_data +from psl_proof.utils.submission import get_historical_chats, ChatHistory, SubmissionChat -def get_user_submited_chat_data( - config: Dict[str, Any], - cargo_data: CargoData - ) -> List[ChatData]: - # Fetch old data from IPFS... - # Patrick: Need to found out from Vana Team... +def get_uniqueness_score( + source_chat: SourceChatData, + chat_histories: List[ChatHistory] +) -> float: + # Requirement 1: If chat_histories is empty, return 1 + if not chat_histories: + return 1.0 - return [] + chat_ended_on = ( + source_chat.chat_ended_on if source_chat.chat_ended_on else datetime.now() + ) + # Loop through chat_histories to find a match by source_chat_id + for history in chat_histories: + if history.source_chat_id == source_chat.chat_id_as_key(): + # Loop through chat_list to find a match by chat_ended_On date + for historical_chat in history.chat_list: + historical_chat_ended_on = historical_chat.chat_ended_on -def score_uniqueness(previous_chat_list: List[ChatData], chat_id: int, content_length: int) -> float: - if content_length == 0 : - return 0 + if historical_chat_ended_on.tzinfo is not None: + # Remove timezone info + historical_chat_ended_on = historical_chat_ended_on.replace(tzinfo=None) - total_score = 0 - entry_count = 0 - for chat_data in previous_chat_list: - matched = chat_data.chat_id == chat_id - if matched: - entry_count += 1 - entry_len = chat_data.chat_length - # Calculate score if content_length is greater than zero - score = (content_length - entry_len) / content_length - total_score += score + if chat_ended_on.tzinfo is not None: + chat_ended_on = chat_ended_on.replace(tzinfo=None) - if entry_count > 0: - return total_score / entry_count + time_in_seconds = (chat_ended_on - historical_chat_ended_on).total_seconds() + time_in_hours = int(time_in_seconds // 3600) + if time_in_hours < 12: # within 12 Hours.. + return 0.0 - return 1 + # If no matching source_chat_id is found, return 1 + return 1.0 def validate_data( config: Dict[str, Any], @@ -42,23 +46,18 @@ def validate_data( source_data : SourceChatData = cargo_data.source_data source_chats = source_data.source_chats + #Patrick_ToCheck score_threshold should be 0.5 score_threshold = 0.5 - number_of_keywords = 10 - - total_uniqueness = 0.00 total_quality = 0.00 + total_uniqueness = 0.00 chat_count = 0 - previous_chat_list = get_user_submited_chat_data( + #Validate source data via valiator.api & obtain unquiness + chat_histories = get_historical_chats( config, - cargo_data + source_data ) - submission_json = source_data.to_submission_json() - print(f"submission_json: {submission_json}") - #RL Validate data & obtain unquiness from server - # response = validate_data(source_data) - # proof_data.uniqueness = response.uniqueness - #RL Todo... + #print(f"chat_histories: {chat_histories}") # Loop through the chat_data_list for source_chat in source_chats: @@ -69,53 +68,52 @@ def validate_data( contents_length = 0 if source_chat.contents: # Ensure chat_contents is not None source_contents = source_chat.content_as_text() - print(f"source_contents: {source_contents}") + #print(f"source_contents: {source_contents}") contents_length = len(source_contents) - chat_id = source_chat.chat_id - uniqueness = score_uniqueness( - previous_chat_list, - chat_id, - contents_length - ) - print(f"Chat({chat_id}) - uniqueness: {uniqueness}") - total_uniqueness += uniqueness - - quality = source_chat.quality_score() - print(f"Chat({chat_id}) - quality: {quality}") - total_quality += quality - - # if chat data has meaningful data... - if uniqueness > score_threshold: - # content is unique... - chat_sentiment = get_sentiment_data( - source_contents - ) - chat_keywords_keybert = get_keywords_keybert( - source_contents, - number_of_keywords - ) - # chat_keywords_lda = get_keywords_lda( - # source_contents, - # number_of_keywords - # ) - - # Create a ChatData instance and add it to the list - chat_data = ChatData( - chat_id=source_chat.chat_id, - chat_length=contents_length, - sentiment=chat_sentiment, - keywords_keybert=chat_keywords_keybert, - # keywords_lda=chat_keywords_lda - ) - #print(f"chat_data: {chat_data}") - cargo_data.chat_list.append( - chat_data + if (contents_length > 0): + + chat_id = source_chat.chat_id + + quality = source_chat.quality_score() + print(f"Chat({chat_id}) - quality: {quality}") + + total_quality += quality + + uniqueness = get_uniqueness_score( + source_chat, + chat_histories ) + print(f"Chat({chat_id}) - uniqueness: {uniqueness}") + total_uniqueness += uniqueness + + #print(f"source_contents: {source_contents}") + # if chat data has meaningful data... + if quality > score_threshold and uniqueness > score_threshold: + chat_sentiment = get_sentiment_data( + source_contents + ) + chat_keywords = get_keywords_keybert( + source_contents + ) + # Create a ChatData instance and add it to the list + chat_data = ChatData( + chat_id=source_chat.chat_id, + chat_length=contents_length, + sentiment=chat_sentiment, + keywords=chat_keywords + ) + #print(f"chat_data: {chat_data}") + cargo_data.chat_list.append( + chat_data + ) + else: + print(f"Extract data skiped - values are below threshold({score_threshold})") # Calculate uniqueness if there are chats if chat_count > 0: - proof_data.uniqueness = round(total_uniqueness / chat_count, 2) - print(f"proof_data.uniqueness: {proof_data.uniqueness}") proof_data.quality = round(total_quality / chat_count, 2) - print(f"proof_data.quality: {proof_data.quality}") \ No newline at end of file + print(f"proof_data.quality: {proof_data.quality}") + + proof_data.uniqueness = round(total_uniqueness / chat_count, 2) + print(f"proof_data.uniqueness: {proof_data.uniqueness}") \ No newline at end of file diff --git a/psl_proof/utils/validation_api.py b/psl_proof/utils/validation_api.py new file mode 100644 index 0000000..284a5a4 --- /dev/null +++ b/psl_proof/utils/validation_api.py @@ -0,0 +1,12 @@ +from typing import Optional, List, Dict, Any + +def get_validation_api_url( + config: Dict[str, Any], + api_path: str + ) -> str: + base_url = config['validator_base_api_url'] + if not base_url: + RuntimeError("validator_base_api_url: is not specified.") + url = f"{base_url}/{api_path}" + print(f"Connected: {url}") + return url diff --git a/psl_proof/utils/verification.py b/psl_proof/utils/verification.py new file mode 100644 index 0000000..a875d6b --- /dev/null +++ b/psl_proof/utils/verification.py @@ -0,0 +1,40 @@ +from typing import Optional, Dict, Any +import requests +from dataclasses import dataclass +from psl_proof.models.cargo_data import SourceData +from psl_proof.utils.validation_api import get_validation_api_url + + +@dataclass +class VerifyTokenResult: + is_valid: bool + error_text: str + + +def verify_token(config: Dict[str, Any], source_data: SourceData) -> Optional[VerifyTokenResult]: + try: + url = get_validation_api_url(config, "api/verifications/verify-token") + headers = {"Content-Type": "application/json"} + payload = source_data.to_verification_json() + + response = requests.post(url, json=payload, headers=headers) + + if response.status_code == 200: + try: + result_json = response.json() + + result = VerifyTokenResult( + is_valid=result_json.get("isValid", False), + error_text=result_json.get("errorText", ""), + ) + return result + except ValueError as e: + print("Error parsing JSON response:", e) + RuntimeError("Error parsing JSON response:", e) # Replace with logging in production + else: + print(f"verify_token failed. Status code: {response.status_code}, Response: {response.text}") # Replace with logging + RuntimeError(f"verify_token failed. Status code: {response.status_code}, Response: {response.text}") # Replace with logging + + except requests.exceptions.RequestException as e: + print("verify_token:", e) # Replace with logging + RuntimeError("verify_token:", e) # Replace with logging