Skip to content

Commit

Permalink
Update cache generator.
Browse files Browse the repository at this point in the history
  • Loading branch information
smeinecke committed Jan 29, 2025
1 parent cebe5f3 commit 3d6493e
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-page-cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
git checkout -b gh-pages
# Add all generated files (if your script generates files to be committed)
git add cache/
git add cache/ --force
# Copy README_pages.md to README.md
cp README_pages.md README.md
Expand Down
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ fabric.properties
.LSOverride

# Icon must end with two \r
Icon
Icon

# Thumbnails
._*
Expand Down Expand Up @@ -412,3 +412,6 @@ $RECYCLE.BIN/
*.lnk

blacklist.txt

# ignore cache files in normal commit
cache/
56 changes: 30 additions & 26 deletions assets/js/lookup.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,34 +80,38 @@ async function processDomain(input) {

return jsonData[sha1Hash];
}

document.getElementById('lookup-form').addEventListener('submit', function (event) {
event.preventDefault(); // Prevent form submission
document.getElementById('lookup-form').addEventListener('submit', async (event) => {
event.preventDefault();

const domainInput = document.getElementById('domain').value;

processDomain(domainInput).then((data) => {
if (!data) {
return;
}
let msg = `<h1>Domain ${data.domain} is listed!</h1><p><h2>Sources:</h2><ul>`;
for (let i = 0; i < data.source.length; i++) {
let url = data.source[i],
link = url;

if (url.startsWith('https://raw.githubusercontent.com/')) {
// reformat link to github repository page in https://github.com/<user>/<repo>/blob/<branch>/<filepath>
const parts = url.split('/');
const user = parts[3];
const repo = parts[4];
const branch = parts[6];
const file = parts.slice(7).join('/');
link = `https://github.com/${user}/${repo}/blob/${branch}/${file}`;
url = url.replace('https://raw.githubusercontent.com/', '');
}
msg += `<li><a href="${link}" target="_blank">${url}</a></li>`;
const data = await processDomain(domainInput);
if (!data) {
return;
}

const sources = data.src.map(entry => {
const url = entry.url,
external = entry.ext,
is_github = false;

if (url.startsWith('https://raw.githubusercontent.com/')) {
const parts = url.split('/'),
user = parts[3],
repo = parts[4],
branch = parts[6],
file = parts.slice(7).join('/').trim('/');
is_github = true;
url = `https://github.com/${user}/${repo}/blob/${branch}/${file}`;
}
msg += '</ul></p>';
showMessage(msg, 'info');
return {
url,
external,
is_github,
};
});
});

const msg = `<h1>Domain ${data.domain} is listed!</h1><p><h2>Sources:</h2><ul>${sources.map(source => `<li><a href="${source.url}" target="_blank">${source.url}</a>${source.external ? ' (external)' : ''}</li>`).join('')}</ul></p>`;

showMessage(msg, 'info');
});
2 changes: 1 addition & 1 deletion disposable
23 changes: 20 additions & 3 deletions generate-cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
import json
import os
import logging
import sys
sys.path.append('./disposable/')
from disposable import disposableHostGenerator

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')


def create_cache():
"""Create a hash-based cache of domains and their sources.
Expand All @@ -15,6 +19,12 @@ def create_cache():
Each file contains a JSON object with the domain hash as keys
and the domain and its source as values.
"""

external_sources = []
for source in disposableHostGenerator.sources:
if source.get('external'):
external_sources.append(source['src'])

domain_cache = {}

if not os.path.exists('cache'):
Expand Down Expand Up @@ -43,17 +53,23 @@ def create_cache():
if line.startswith('#') or line == '' or ':' not in line:
continue

source, domain = line.rsplit(':', 1)
source_url, domain = line.rsplit(':', 1)
domain_hash = hashlib.sha1(domain.encode('utf8')).hexdigest()

hash_prefix = domain_hash[:2]
if domain_hash not in domain_cache.get(hash_prefix, {}):
continue

if 'source' in domain_cache[hash_prefix][domain_hash]:
domain_cache[hash_prefix][domain_hash]['source'].append(source)
domain_cache[hash_prefix][domain_hash]['src'].append({
'url': source_url,
'ext': source_url in external_sources
})
else:
domain_cache[hash_prefix][domain_hash]['source'] = [source]
domain_cache[hash_prefix][domain_hash]['src'] = [{
'url': source_url,
'ext': source_url in external_sources
}]

for hash_prefix, domain_data in domain_cache.items():
with open('cache/' + hash_prefix + '.json', 'w') as f:
Expand All @@ -62,5 +78,6 @@ def create_cache():
except Exception as e:
logging.error(e)


if __name__ == '__main__':
create_cache()

0 comments on commit 3d6493e

Please sign in to comment.