Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rl 20241210 #1

Merged
merged 11 commits into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions psl_proof/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
def load_config() -> Dict[str, Any]:
"""Load proof configuration from environment variables."""
config = {
'dlp_id': 1234, # Set your own DLP ID here
'dlp_id': 16 # MOKSHA
'input_dir': INPUT_DIR,
'salt': 'replace-this-salt' # TODO: replace this so that we can salt in a better way
'salt': '5EkntCWI'
'validator_base_api_url': 'https://api.vana.genesis.dfusion.ai'
}
logging.info(f"Using config: {json.dumps(config, indent=2)}")
return config
Expand All @@ -39,7 +40,7 @@ def run() -> None:
output_path = os.path.join(OUTPUT_DIR, "results.json")
with open(output_path, 'w') as f:
json.dump(proof_response.dict(), f, indent=2)
logging.info(f"Proof generation complete: {proof_response}")
#logging.info(f"Proof generation complete: {proof_response}")


def extract_input() -> None:
Expand Down
64 changes: 41 additions & 23 deletions psl_proof/models/cargo_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# Enum for DataSource
class DataSource(Enum):
telegram = 1
telegram = 0

# Source Chat Data
@dataclass
Expand All @@ -24,31 +24,36 @@ class SourceChatData:
chat_start_on: datetime = None
chat_ended_on: datetime = None

def chat_id_as_key(self) -> str :
return str(self.chat_id)

def timeliness_value(self) -> float:
if self.total_content_length == 0:
return 0
# tav = (𝛴 litsi) / (𝛴 li)
time_avg = self.total_content_value / self.total_content_length
time_avg = float(self.total_content_value) / float(self.total_content_length)
# a = ln(2) / thl
half_life = 60 # 60 minutes
half_life = 600.0 # 600 minutes
time_decay = math.log(2) / half_life
# t = exp(-atav)
return math.exp(- time_decay * time_avg) # range 0 to 1

def thoughtfulness_of_conversation(self) -> float:
n = len(self.participants) # n: number of participants
u = 2 # 𝜇: optimal number of participants
d = 1 # 𝜎: standard deviation of the curve
if n is 1:
return 0.0
u = 3.0 # 𝜇: optimal number of participants
d = 5.0 # 𝜎: standard deviation of the curve

# Formula: p = exp(-(n-𝜇) / (2𝜎^2))
return math.exp(-(n - u) / (2 * d ** 2)) # range 0 to 1

def contextualness_of_conversation(self) -> float:
c = self.total_content_length #total token length, c, of the text data
m = 2 #midpoint
k = 1 #key parameters.
m = 2.0 #midpoint
k = 1.0 #key parameters.
# l=1/(1+exp(-k(c-c0)))
return 1/(1 + math.exp(-k*(c-m)))
return 1.0/(1.0 + math.exp(-k*(c-m)))

def quality_score(self) -> float :
a = 1 # factor
Expand Down Expand Up @@ -83,13 +88,13 @@ def add_content(
time_in_minutes = int(time_in_seconds // 60)

if (self.chat_start_on):
if (self.chat_start_on < chat_timestamp):
if (self.chat_start_on > chat_timestamp):
self.chat_start_on = chat_timestamp
else :
self.chat_start_on = chat_timestamp

if (self.chat_ended_on):
if (self.chat_ended_on > chat_timestamp):
if (self.chat_ended_on < chat_timestamp):
self.chat_ended_on = chat_timestamp
else :
self.chat_ended_on = chat_timestamp
Expand All @@ -113,29 +118,35 @@ def to_dict(self) -> dict:
}

def to_submission_json(self) -> dict:
chat_start_on = self.chat_start_on if self.chat_start_on is not None else datetime.now()
chat_ended_on = self.chat_ended_on if self.chat_ended_on is not None else datetime.now()
return {
"SourceChatId": self.chat_id,
"SourceChatId": self.chat_id_as_key(),
"ParticipantCount": len(self.participants),
"ChatCount": self.chat_count,
"ChatLength": self.total_content_length,
"ChatStartOn": self.chat_start_on.isoformat() if isinstance(self.chat_start_on, datetime) else str(self.chat_start_on),
"ChatEndedOn": self.chat_ended_on.isoformat() if isinstance(self.chat_ended_on, datetime) else str(self.chat_ended_on),
"ChatStartOn": chat_start_on.isoformat(),
"ChatEndedOn": chat_ended_on.isoformat()
}




# SourceData with enum and chat data
@dataclass
class SourceData:
source: DataSource # "telegram"
user: str
submission_token: str
submission_id: str
submission_by: str
submission_date: datetime
source_chats: List[SourceChatData] # List of SourceChatData instances

def __init__(self, source, submission_id, submission_by, submission_date, user, source_chats=None):
def __init__(self, source, submission_token, submission_id, submission_by, submission_date, user, source_chats=None):
self.source = source
self.user = user
self.submission_token = submission_token
self.submission_id = submission_id
self.submission_by = submission_by
self.submission_date = submission_date
Expand All @@ -152,14 +163,23 @@ def to_dict(self):
}

def to_submission_json(self) :
return {
"DataSource": self.source.name, # Use .name to convert enum to string
json = {
"DataSource": self.source.value, # Use .name to convert enum to string
"SourceId": self.submission_id,
"SubmissionToken": self.submission_token,
"SubmittedBy": self.submission_by,
"SubmittedOn": self.submission_date.isoformat() if isinstance(self.submission_date, datetime) else str(self.submission_date),
"SubmittedOn": self.submission_date.isoformat(),
"Chats": [source_chat.to_submission_json() for source_chat in self.source_chats]
}
#print(f"Submission json:{json}")
return json

def to_verification_json(self) -> dict:
return {
"VerificationType": 0, # VerificationToken.
"Token": self.submission_token,
"Reference": self.submission_id
}

# ChatData for Source (final destination data structure)
@dataclass
Expand All @@ -168,16 +188,14 @@ class ChatData:
chat_length: int

sentiment: Dict[str, Any] = field(default_factory=dict)
keywords_keybert: Dict[str, Any] = field(default_factory=dict)
#keywords_lda: Dict[str, Any] = field(default_factory=dict)
keywords: Dict[str, Any] = field(default_factory=dict)

def to_dict(self):
return {
"chat_id": self.chat_id,
"chat_length": self.chat_length,
"sentiment": self.sentiment, # No need to call .to_dict() for dicts
"keywords_keybert": self.keywords_keybert, # Same for other dict fields
#"keywords_lda": self.keywords_lda # Same for other dict fields
"sentiment": self.sentiment, # No need to call .to_dict() for dicts
"keywords": self.keywords, # Same for other dict fields
}

# CargoData for Source
Expand Down Expand Up @@ -221,4 +239,4 @@ def to_dict(self):
return {
"source_id": self.source_id,
"dlp_id": self.dlp_id
}
}
89 changes: 44 additions & 45 deletions psl_proof/proof.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,28 @@
from psl_proof.utils.hashing_utils import salted_data, serialize_bloom_filter_base64, deserialize_bloom_filter_base64
from psl_proof.models.cargo_data import SourceChatData, CargoData, SourceData, DataSource, MetaData, DataSource
from psl_proof.utils.validate_data import validate_data

from psl_proof.utils.submission import submit_data
from psl_proof.utils.verification import verify_token, VerifyTokenResult

class Proof:
def __init__(self, config: Dict[str, Any]):
self.config = config
self.proof_response = ProofResponse(dlp_id=config['dlp_id'])

#RL: Proof Data...

def generate(self) -> ProofResponse:
"""Generate proofs for all input files."""
logging.info("Starting proof data")

zktls_proof = None
data_revision = "01.01"
source_data = None

for input_filename in os.listdir(self.config['input_dir']):
input_file = os.path.join(self.config['input_dir'], input_filename)
if os.path.splitext(input_file)[1].lower() == '.json':
with open(input_file, 'r') as f:
input_data = json.load(f)
#print(f"Input Data: {input_data}")

if input_filename == 'zktls_proof.json':
zktls_proof = input_data.get('zktls_proof', None)
continue

elif input_filename == 'chats.json':
if input_filename == 'chats.json':
source_data = get_source_data(
input_data
)
Expand All @@ -47,10 +42,17 @@ def generate(self) -> ProofResponse:
salt
)
source_data.submission_by = source_user_hash_64
is_data_authentic = get_is_data_authentic(
source_data,
zktls_proof
proof_failed_reason = ""
verify_result = verify_token(
self.config,
source_data
)
is_data_authentic = verify_result
if is_data_authentic:
print(f"verify_result: {verify_result}")
is_data_authentic = verify_result.is_valid
proof_failed_reason = verify_result.error_text

cargo_data = CargoData(
source_data = source_data,
source_id = source_user_hash_64
Expand All @@ -64,17 +66,21 @@ def generate(self) -> ProofResponse:
self.proof_response.ownership = 1.0 if is_data_authentic else 0.0
self.proof_response.authenticity = 1.0 if is_data_authentic else 0.0


current_datetime = datetime.now().isoformat()
if not is_data_authentic: #short circuit so we don't waste analysis
print(f"Validation proof failed: {proof_failed_reason}")
self.proof_response.score = 0.0
self.proof_response.uniqueness = 0.0
self.proof_response.quality = 0.0
self.proof_response.valid = False
self.proof_response.attributes = {
'proof_valid': False,
'proof_failed_reason': proof_failed_reason,
'did_score_content': False,
'source': source_data.Source.name,
'submit_on': current_datetime,
'source': source_data.source.name,
'revision': data_revision,
'submitted_on': current_datetime,
'chat_data': None
}
self.proof_response.metadata = metadata
Expand All @@ -93,26 +99,29 @@ def generate(self) -> ProofResponse:
and self.proof_response.quality >= score_threshold
and self.proof_response.uniqueness >= score_threshold
)
self.proof_response.score = (
total_score = (
self.proof_response.authenticity * 0.25
+ self.proof_response.ownership * 0.25
+ self.proof_response.quality * 0.25
+ self.proof_response.uniqueness * 0.25
)

self.proof_response.score = round(total_score, 2)
self.proof_response.attributes = {
'proof_valid': is_data_authentic,
'did_score_content': True,
'source': source_data.source.name,
'submit_on': current_datetime,
'revision': data_revision,
'submitted_on': current_datetime,
'chat_data': cargo_data.get_chat_list_data()
}
self.proof_response.metadata = metadata

#RL Validate data & obtain unquiness from server
# response = submit_data(source_data)...
#RL Todo...

#Submit Source data to server
submit_data(
self.config,
source_data
)
print(f"proof data: {self.proof_response}")
return self.proof_response

def get_telegram_data(
Expand All @@ -124,14 +133,15 @@ def get_telegram_data(
if chat_type == "message":
# Extract user ID
chat_user_id = input_content.get("sender_id", {}).get("user_id", "")
print(f"chat_user_id: {chat_user_id}")
#print(f"chat_user_id: {chat_user_id}")
source_chat_data.add_participant(chat_user_id)

message_date = submission_timestamp
# Extract and convert the Unix timestamp to a datetime object
date_value = input_content.get("date", None)
if date_value:
message_date = datetime.utcfromtimestamp(date_value) # Convert Unix timestamp to datetime
#print(f"message_date: {message_date}")

# Extract the message content
message = input_content.get('content', {})
Expand All @@ -147,33 +157,33 @@ def get_telegram_data(

def get_source_data(input_data: Dict[str, Any]) -> SourceData:

revision = input_data.get('revision', '').upper()
revision = input_data.get('revision', '')
if (revision and revision != "01.01"):
print(f"Invalid Revision: {revision}")

raise RuntimeError(f"Invalid Revision: {revision}")

submission_date = datetime.now().timestamp()
# Extract and convert the Unix timestamp to a datetime object
date_value = input_data.get("submission_date", None)
if date_value:
submission_date = datetime.utcfromtimestamp(date_value) # Convert Unix timestamp to datetime
submission_date = datetime.now()
#print(f"submission_date: {submission_date}")

input_source_value = input_data.get('source', '').upper()
input_source = None

if input_source_value == 'TELEGRAM':
input_source = DataSource.telegram
else:
print(f"Unmapped data source: {input_source_value}")
raise RuntimeError(f"Unmapped data source: {input_source_value}")

submission_id = input_data.get('submission_id', '').upper()
submission_token = input_data.get('submission_token', '')
#print("submission_token: {submission_token}")

submission_id = input_data.get('submission_id', '')

input_user = input_data.get('user')
#print(f"input_user: {input_user}")

source_data = SourceData(
source=input_source,
user=input_user,
submission_token = submission_token,
submission_id = submission_id,
submission_by = input_user,
submission_date = submission_date
Expand All @@ -198,19 +208,8 @@ def get_source_data(input_data: Dict[str, Any]) -> SourceData:
source_chat
)
else:
print(f"Unhandled data source: {input_source}")
raise RuntimeError(f"Unhandled data source: {input_source}")
source_chats.append(
source_chat
)
return source_data


def get_is_data_authentic(content, zktls_proof) -> bool:
"""Determine if the submitted data is authentic by checking the content against a zkTLS proof"""
return 1.0

def get_user_submission_freshness(source, user) -> float:
"""Compute User Submission freshness"""
#TODO: Get the IPFS data and check the attributes for timestamp of last submission
#TODO: Implement cool-down logic so that there is a cool down for one particular social media account. I.E. someone who just submitted will get a very low number
return 1.0
Loading
Loading