dfusionai · Patrickdlg · Dec 14, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 11, 2024
diff --git a/psl_proof/__main__.py b/psl_proof/__main__.py
@@ -16,9 +16,10 @@
 def load_config() -> Dict[str, Any]:
     """Load proof configuration from environment variables."""
     config = {
-        'dlp_id': 1234,  # Set your own DLP ID here
+        'dlp_id': 16 # MOKSHA
         'input_dir': INPUT_DIR,
-        'salt': 'replace-this-salt' # TODO: replace this so that we can salt in a better way
+        'salt': '5EkntCWI'
+        'validator_base_api_url': 'https://api.vana.genesis.dfusion.ai'
     }
     logging.info(f"Using config: {json.dumps(config, indent=2)}")
     return config
@@ -39,7 +40,7 @@ def run() -> None:
     output_path = os.path.join(OUTPUT_DIR, "results.json")
     with open(output_path, 'w') as f:
         json.dump(proof_response.dict(), f, indent=2)
-    logging.info(f"Proof generation complete: {proof_response}")
+    #logging.info(f"Proof generation complete: {proof_response}")
 
 
 def extract_input() -> None:

diff --git a/psl_proof/models/cargo_data.py b/psl_proof/models/cargo_data.py
@@ -10,7 +10,7 @@
 
 # Enum for DataSource
 class DataSource(Enum):
-    telegram = 1
+    telegram = 0
 
 # Source Chat Data
 @dataclass
@@ -24,31 +24,36 @@ class SourceChatData:
     chat_start_on: datetime = None
     chat_ended_on: datetime = None
 
+    def chat_id_as_key(self) -> str :
+        return str(self.chat_id)
+
     def timeliness_value(self) -> float:
         if self.total_content_length == 0:
             return 0
         # tav = (𝛴 litsi) / (𝛴 li)
-        time_avg = self.total_content_value / self.total_content_length
+        time_avg = float(self.total_content_value) / float(self.total_content_length)
         # a = ln(2) / thl
-        half_life = 60  # 60 minutes
+        half_life = 600.0  # 600 minutes
         time_decay = math.log(2) / half_life
         # t = exp(-atav)
         return math.exp(- time_decay * time_avg)  # range 0 to 1
 
     def thoughtfulness_of_conversation(self) -> float:
         n = len(self.participants)  # n: number of participants
-        u = 2  # 𝜇: optimal number of participants
-        d = 1  # 𝜎: standard deviation of the curve
+        if n is 1:
+            return 0.0
+        u = 3.0  # 𝜇: optimal number of participants
+        d = 5.0  # 𝜎: standard deviation of the curve
 
         # Formula: p = exp(-(n-𝜇) / (2𝜎^2))
         return math.exp(-(n - u) / (2 * d ** 2))  # range 0 to 1
 
     def contextualness_of_conversation(self)  -> float:
         c = self.total_content_length #total token length, c, of the text data
-        m = 2 #midpoint
-        k = 1 #key parameters.
+        m = 2.0 #midpoint
+        k = 1.0 #key parameters.
         # l=1/(1+exp(-k(c-c0)))
-        return 1/(1 + math.exp(-k*(c-m)))
+        return 1.0/(1.0 + math.exp(-k*(c-m)))
 
     def quality_score(self) -> float :
         a = 1 # factor
@@ -83,13 +88,13 @@ def add_content(
             time_in_minutes = int(time_in_seconds // 60)
 
             if (self.chat_start_on):
-               if (self.chat_start_on < chat_timestamp):
+               if (self.chat_start_on > chat_timestamp):
                   self.chat_start_on = chat_timestamp
             else :
                self.chat_start_on = chat_timestamp
 
             if (self.chat_ended_on):
-               if (self.chat_ended_on > chat_timestamp):
+               if (self.chat_ended_on < chat_timestamp):
                   self.chat_ended_on = chat_timestamp
             else :
                self.chat_ended_on = chat_timestamp
@@ -113,29 +118,35 @@ def to_dict(self) -> dict:
         }
 
     def to_submission_json(self) -> dict:
+        chat_start_on = self.chat_start_on if self.chat_start_on is not None else datetime.now()
+        chat_ended_on = self.chat_ended_on if self.chat_ended_on is not None else datetime.now()
         return {
-            "SourceChatId": self.chat_id,
+            "SourceChatId": self.chat_id_as_key(),
             "ParticipantCount": len(self.participants),
             "ChatCount": self.chat_count,
             "ChatLength": self.total_content_length,
-            "ChatStartOn": self.chat_start_on.isoformat() if isinstance(self.chat_start_on, datetime) else str(self.chat_start_on),
-            "ChatEndedOn": self.chat_ended_on.isoformat() if isinstance(self.chat_ended_on, datetime) else str(self.chat_ended_on),
+            "ChatStartOn": chat_start_on.isoformat(),
+            "ChatEndedOn": chat_ended_on.isoformat()
         }
 
 
+
+
 # SourceData with enum and chat data
 @dataclass
 class SourceData:
     source: DataSource         # "telegram"
     user: str
+    submission_token: str
     submission_id: str
     submission_by: str
     submission_date: datetime
     source_chats: List[SourceChatData]  # List of SourceChatData instances
 
-    def __init__(self, source, submission_id, submission_by, submission_date, user, source_chats=None):
+    def __init__(self, source, submission_token, submission_id, submission_by, submission_date, user, source_chats=None):
         self.source = source
         self.user = user
+        self.submission_token = submission_token
         self.submission_id = submission_id
         self.submission_by = submission_by
         self.submission_date = submission_date
@@ -152,14 +163,23 @@ def to_dict(self):
         }
 
     def to_submission_json(self) :
-        return {
-            "DataSource": self.source.name,  # Use .name to convert enum to string
+        json = {
+            "DataSource": self.source.value,  # Use .name to convert enum to string
             "SourceId": self.submission_id,
+            "SubmissionToken": self.submission_token,
             "SubmittedBy": self.submission_by,
-            "SubmittedOn": self.submission_date.isoformat() if isinstance(self.submission_date, datetime) else str(self.submission_date),
+            "SubmittedOn": self.submission_date.isoformat(),
             "Chats": [source_chat.to_submission_json() for source_chat in self.source_chats]
         }
+        #print(f"Submission json:{json}")
+        return json
 
+    def to_verification_json(self) -> dict:
+        return {
+            "VerificationType": 0, # VerificationToken.
+            "Token": self.submission_token,
+            "Reference": self.submission_id
+        }
 
 # ChatData for Source (final destination data structure)
 @dataclass
@@ -168,16 +188,14 @@ class ChatData:
     chat_length: int
 
     sentiment: Dict[str, Any] = field(default_factory=dict)
-    keywords_keybert: Dict[str, Any] = field(default_factory=dict)
-    #keywords_lda: Dict[str, Any] = field(default_factory=dict)
+    keywords: Dict[str, Any] = field(default_factory=dict)
 
     def to_dict(self):
         return {
             "chat_id": self.chat_id,
             "chat_length": self.chat_length,
-            "sentiment": self.sentiment,                # No need to call .to_dict() for dicts
-            "keywords_keybert": self.keywords_keybert,  # Same for other dict fields
-            #"keywords_lda": self.keywords_lda           # Same for other dict fields
+            "sentiment": self.sentiment,   # No need to call .to_dict() for dicts
+            "keywords": self.keywords,     # Same for other dict fields
         }
 
 # CargoData for Source
@@ -221,4 +239,4 @@ def to_dict(self):
         return {
             "source_id": self.source_id,
             "dlp_id": self.dlp_id
-        }
+        }
diff --git a/psl_proof/proof.py b/psl_proof/proof.py
@@ -9,33 +9,28 @@
 from psl_proof.utils.hashing_utils import salted_data, serialize_bloom_filter_base64, deserialize_bloom_filter_base64
 from psl_proof.models.cargo_data import SourceChatData, CargoData, SourceData, DataSource, MetaData, DataSource
 from psl_proof.utils.validate_data import validate_data
-
+from psl_proof.utils.submission import submit_data
+from psl_proof.utils.verification import verify_token, VerifyTokenResult
 
 class Proof:
     def __init__(self, config: Dict[str, Any]):
         self.config = config
         self.proof_response = ProofResponse(dlp_id=config['dlp_id'])
 
-    #RL: Proof Data...
+
     def generate(self) -> ProofResponse:
         """Generate proofs for all input files."""
         logging.info("Starting proof data")
 
-        zktls_proof = None
+        data_revision = "01.01"
         source_data = None
 
         for input_filename in os.listdir(self.config['input_dir']):
             input_file = os.path.join(self.config['input_dir'], input_filename)
             if os.path.splitext(input_file)[1].lower() == '.json':
                 with open(input_file, 'r') as f:
                     input_data = json.load(f)
-                    #print(f"Input Data: {input_data}")
-
-                    if input_filename == 'zktls_proof.json':
-                        zktls_proof = input_data.get('zktls_proof', None)
-                        continue
-
-                    elif input_filename == 'chats.json':
+                    if input_filename == 'chats.json':
                         source_data = get_source_data(
                             input_data
                         )
@@ -47,10 +42,17 @@ def generate(self) -> ProofResponse:
             salt
         )
         source_data.submission_by = source_user_hash_64
-        is_data_authentic = get_is_data_authentic(
-            source_data,
-            zktls_proof
+        proof_failed_reason = ""
+        verify_result = verify_token(
+            self.config,
+            source_data
         )
+        is_data_authentic = verify_result
+        if is_data_authentic:
+            print(f"verify_result: {verify_result}")
+            is_data_authentic = verify_result.is_valid
+            proof_failed_reason = verify_result.error_text
+
         cargo_data = CargoData(
             source_data = source_data,
             source_id = source_user_hash_64
@@ -64,17 +66,21 @@ def generate(self) -> ProofResponse:
         self.proof_response.ownership = 1.0 if is_data_authentic else 0.0
         self.proof_response.authenticity = 1.0 if is_data_authentic else 0.0
 
+
         current_datetime = datetime.now().isoformat()
         if not is_data_authentic: #short circuit so we don't waste analysis
+            print(f"Validation proof failed: {proof_failed_reason}")
             self.proof_response.score = 0.0
             self.proof_response.uniqueness = 0.0
             self.proof_response.quality = 0.0
             self.proof_response.valid = False
             self.proof_response.attributes = {
                 'proof_valid': False,
+                'proof_failed_reason': proof_failed_reason,
                 'did_score_content': False,
-                'source': source_data.Source.name,
-                'submit_on': current_datetime,
+                'source': source_data.source.name,
+                'revision': data_revision,
+                'submitted_on': current_datetime,
                 'chat_data': None
             }
             self.proof_response.metadata = metadata
@@ -93,26 +99,29 @@ def generate(self) -> ProofResponse:
             and self.proof_response.quality >= score_threshold
             and self.proof_response.uniqueness >= score_threshold
         )
-        self.proof_response.score = (
+        total_score = (
             self.proof_response.authenticity * 0.25
             + self.proof_response.ownership * 0.25
             + self.proof_response.quality * 0.25
             + self.proof_response.uniqueness * 0.25
         )
-
+        self.proof_response.score = round(total_score, 2)
         self.proof_response.attributes = {
             'proof_valid': is_data_authentic,
             'did_score_content': True,
             'source': source_data.source.name,
-            'submit_on': current_datetime,
+            'revision': data_revision,
+            'submitted_on': current_datetime,
             'chat_data': cargo_data.get_chat_list_data()
         }
         self.proof_response.metadata = metadata
 
-        #RL Validate data & obtain unquiness from server
-        # response = submit_data(source_data)...        
-        #RL Todo...
-
+        #Submit Source data to server
+        submit_data(
+            self.config,
+            source_data
+        )
+        print(f"proof data: {self.proof_response}")
         return self.proof_response
 
 def get_telegram_data(
@@ -124,14 +133,15 @@ def get_telegram_data(
     if chat_type == "message":
         # Extract user ID
         chat_user_id = input_content.get("sender_id", {}).get("user_id", "")
-        print(f"chat_user_id: {chat_user_id}")
+        #print(f"chat_user_id: {chat_user_id}")
         source_chat_data.add_participant(chat_user_id)
 
         message_date = submission_timestamp
         # Extract and convert the Unix timestamp to a datetime object
         date_value = input_content.get("date", None)
         if date_value:
             message_date = datetime.utcfromtimestamp(date_value)  # Convert Unix timestamp to datetime
+        #print(f"message_date: {message_date}")
 
         # Extract the message content
         message = input_content.get('content', {})
@@ -147,33 +157,33 @@ def get_telegram_data(
 
 def get_source_data(input_data: Dict[str, Any]) -> SourceData:
 
-    revision = input_data.get('revision', '').upper()
+    revision = input_data.get('revision', '')
     if (revision and revision != "01.01"):
-       print(f"Invalid Revision: {revision}")
-
+       raise RuntimeError(f"Invalid Revision: {revision}")
 
-    submission_date = datetime.now().timestamp()
-    # Extract and convert the Unix timestamp to a datetime object
-    date_value = input_data.get("submission_date", None)
-    if date_value:
-        submission_date = datetime.utcfromtimestamp(date_value)  # Convert Unix timestamp to datetime
+    submission_date = datetime.now()
+    #print(f"submission_date: {submission_date}")
 
     input_source_value = input_data.get('source', '').upper()
     input_source = None
 
     if input_source_value == 'TELEGRAM':
         input_source = DataSource.telegram
     else:
-        print(f"Unmapped data source: {input_source_value}")
+        raise RuntimeError(f"Unmapped data source: {input_source_value}")
 
-    submission_id = input_data.get('submission_id', '').upper()
+    submission_token = input_data.get('submission_token', '')
+    #print("submission_token: {submission_token}")
+
+    submission_id = input_data.get('submission_id', '')
 
     input_user = input_data.get('user')
     #print(f"input_user: {input_user}")
 
     source_data = SourceData(
         source=input_source,
         user=input_user,
+        submission_token = submission_token,
         submission_id = submission_id,
         submission_by = input_user,
         submission_date = submission_date
@@ -198,19 +208,8 @@ def get_source_data(input_data: Dict[str, Any]) -> SourceData:
                         source_chat
                     )
                 else:
-                    print(f"Unhandled data source: {input_source}")
+                    raise RuntimeError(f"Unhandled data source: {input_source}")
             source_chats.append(
                 source_chat
             )
     return source_data
-
-
-def get_is_data_authentic(content, zktls_proof) -> bool:
-    """Determine if the submitted data is authentic by checking the content against a zkTLS proof"""
-    return 1.0
-
-def get_user_submission_freshness(source, user) -> float:
-    """Compute User Submission freshness"""
-    #TODO: Get the IPFS data and check the attributes for timestamp of last submission
-    #TODO: Implement cool-down logic so that there is a cool down for one particular social media account. I.E. someone who just submitted will get a very low number
-    return 1.0