Skip to content
This repository has been archived by the owner on Oct 16, 2023. It is now read-only.

Commit

Permalink
Merge pull request #104 from Daethyra/v1.5.2
Browse files Browse the repository at this point in the history
V1.5.2
  • Loading branch information
Daethyra authored Aug 30, 2023
2 parents 81d844e + 238977d commit afce039
Show file tree
Hide file tree
Showing 31 changed files with 237 additions and 331 deletions.
24 changes: 24 additions & 0 deletions .github/dependabot-misc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Enable version updates for npm
- package-ecosystem: "npm"
# Look for `package.json` and `lock` files in the `root` directory
directory: "/project/modules/web-ui" # MUST BE UPDATED UPON TYPESCRIPT PROGRAMMING
# Check the npm registry for updates every day (weekdays)
schedule:
interval: "daily"
# Check for npm updates at 9am UTC
time: "09:00"
timezone: "America/Los_Angeles"
target-branch: "master"
versioning-strategy: auto

# Enable version updates for Docker
- package-ecosystem: "docker"
# Look for a `Dockerfile` in the `root` directory
directory: "/project/modules/orchestration"
# Check for updates once a week
schedule:
interval: "daily"
# Check for npm updates at 9am UTC
time: "09:00"
timezone: "America/Los_Angeles"
target-branch: "master"
24 changes: 24 additions & 0 deletions .github/dependabot-python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
# Enable version updates for multiple branches
- package-ecosystem: "pip"
directory: "project/modules/orchestration"
schedule:
interval: "daily"
# Check for pip updates at 9am UTC
time: "09:00"
timezone: "America/Los_Angeles"
target-branch: "master"
versioning-strategy: auto

- package-ecosystem: "pip"
directory: "project/modules/orchestration"
schedule:
interval: "daily"
# Check for pip updates at 9am UTC
time: "09:00"
timezone: "America/Los_Angeles"
target-branch: "v1.5*"
versioning-strategy: auto
60 changes: 0 additions & 60 deletions .github/dependabot.yml

This file was deleted.

4 changes: 0 additions & 4 deletions .github/workflows/.gitignore

This file was deleted.

4 changes: 2 additions & 2 deletions .github/workflows/greetings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ jobs:
- uses: actions/first-interaction@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
issue-message: "Message that will be displayed on users' first issue"
pr-message: "Message that will be displayed on users' first pull request"
issue-message: "Thanks for chippin' in, choom. We'll have this looked at right away."
pr-message: "OK! Big legend! Nice job, can't wait to see your work."
35 changes: 0 additions & 35 deletions documents/todo.txt

This file was deleted.

File renamed without changes.
2 changes: 2 additions & 0 deletions project/documents/supplementary-information/explanation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
These subdirectories contain contextual information for the project.
- The AI may make use of everything inside 'supplementary-information/'
16 changes: 15 additions & 1 deletion project/modules/CyberSentinel/.env.template
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
TRAINING_DATA_PATH=
# Create a copy of this file and name it '.env'

# Model Training Configuration
TRAINING_DATA_PATH=project/modules/CyberSentinel/training-data/
LEARNING_RATE=0.001
BATCH_SIZE=32
EPOCHS=10
L2_REG=0.01

# Preprocessor Configuration (Use '.' for current working directory)
INPUT_FILE_PATH=
PREPROCESSED_DATA_FILE_PATH=project/modules/CyberSentinel/training-data/Processed-Data/

# Temporary file paths for DataLabeler
TEMP_PDF_FILE_PATH=temp_pdf_data.csv
TEMP_TXT_FILE_PATH=temp_txt_data.csv

# Path to save labeled data
LABELED_DATA_FILE_PATH=project/modules/CyberSentinel/preprocess/
Empty file.
26 changes: 17 additions & 9 deletions project/modules/CyberSentinel/preprocess/data_labeler.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
from dotenv import load_dotenv
import os
import csv
from typing import List, Tuple

# Point to the location of the .env file relative to the script's location
env_path = os.path.join(os.path.dirname(__file__), '../../../.env')

# Load the .env file
load_dotenv(dotenv_path=env_path)

class DataLabeler:
def __init__(self, temp_pdf_file_path: str = "temp_pdf_data.csv", temp_txt_file_path: str = "temp_txt_data.csv"):
self.temp_pdf_file_path = temp_pdf_file_path
self.temp_txt_file_path = temp_txt_file_path
def __init__(self):
default_temp_path = os.path.dirname(__file__)
self.temp_pdf_file_path = os.getenv('TEMP_PDF_FILE_PATH', os.path.join(default_temp_path, 'temp_pdf_data.csv'))
self.temp_txt_file_path = os.getenv('TEMP_TXT_FILE_PATH', os.path.join(default_temp_path, 'temp_txt_data.csv'))
self.output_file_path = os.getenv('LABELED_DATA_FILE_PATH')
self.labeled_pdf_data = self.load_temp_data(self.temp_pdf_file_path)
self.labeled_txt_data = self.load_temp_data(self.temp_txt_file_path)

Expand Down Expand Up @@ -36,13 +45,13 @@ def load_temp_data(self, file_path: str) -> List[Tuple[str, bool]]:
def label_data(self, data: List[str]) -> List[Tuple[str, bool]]:
labeled_data = []
for text in data:
print("\nSample:")
print(text)
print(f"\\nSample:{text}")
label = self.get_user_input("Does this text indicate the intention to commit acts of hate-based violence? (True/False): ")
labeled_data.append((text, label))
return labeled_data

def save_labeled_data_to_csv(self, labeled_data: List[Tuple[str, bool]], file_path: str):
def save_labeled_data_to_csv(self, labeled_data: List[Tuple[str, bool]]):
file_path = self.output_file_path or input("Enter the path to save the labeled data: ")
with open(file_path, 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['text', 'label'])
Expand All @@ -52,12 +61,11 @@ def save_labeled_data_to_csv(self, labeled_data: List[Tuple[str, bool]], file_pa
if __name__ == "__main__":
data_labeler = DataLabeler()
# Load the preprocessed data from the file saved by the Preprocessor
file_path = input("Enter the path to the preprocessed data file: ")
file_path = os.getenv('PREPROCESSED_DATA_FILE_PATH') or input("Enter the path to the preprocessed data file: ")
with open(file_path, 'r', encoding='utf-8') as file:
reader = csv.reader(file)
next(reader) # Skip the header
data = [row[0] for row in reader]

labeled_data = data_labeler.label_data(data)
output_file_path = input("Enter the path to save the labeled data: ")
data_labeler.save_labeled_data_to_csv(labeled_data, output_file_path)
data_labeler.save_labeled_data_to_csv(labeled_data)
Loading

0 comments on commit afce039

Please sign in to comment.