diff --git a/README.md b/README.md
index d7b13c4..eb1b494 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
[![Packaging status](https://repology.org/badge/tiny-repos/bopscrk.svg)](https://repology.org/project/bopscrk/versions)
![[GPL-3.0 License](https://github.com/r3nt0n)](https://img.shields.io/badge/license-GPL%203.0-brightgreen.svg)
![[Python 3.8](https://github.com/r3nt0n)](http://img.shields.io/badge/python-3.8-blue.svg)
-![[Version 2.3.1](https://github.com/r3nt0n)](http://img.shields.io/badge/version-2.3.1-orange.svg)
+![[Version 2.4](https://github.com/r3nt0n)](http://img.shields.io/badge/version-2.4-orange.svg)
@@ -24,8 +24,18 @@ Included in **BlackArch Linux** pentesting
## Requirements
+ **Python 3** (secondary branch keeps Python 2.7 legacy support)
-+ *optional* - to use `lyricpass` module:
- `pip install requirements.txt`
++ requests python module
+
+## Get started
+### Download and install
+```
+git clone --recurse-submodules https://github.com/r3nt0n/bopscrk
+pip install -r requirements.txt
+```
+### Run interactive mode
+```
+python3 bopscrk.py -i
+```
## Usage
```
@@ -39,8 +49,6 @@ Included in **BlackArch Linux** pentesting
-l, --leet enable leet transformations
-n max amount of words to combine each time (default: 2)
-a , --artists artists to search song lyrics (comma-separated)
- -x , --exclude exclude all the words included in other wordlists
- (several wordlists should be comma-separated)
-o , --output output file to save the wordlist (default: tmp.txt)
-C , --config specify config file to use (default: ./bopscrk.cfg)
@@ -100,10 +108,12 @@ It will retrieve all lyrics from all songs which belongs to artists that you pro
`leet_charset=a:4 e:3`
-#### Weighted-words system
-[...] Coming soon [...]
-
## Changelist
++ `2.4 version notes (26/7/2022`
+ + make the installation process easier
+ + starting to implement better memory management (cached wordlists writing and reading i/o files), not working yet
+ + updating and fixing minor bugs related to dependencies
+ + 'exclude from other wordlists' feature removed
+ `2.3.1 version notes`
+ fixing namespace bug (related to aux.py module, renamed to auxiliars.py) when running on windows systems
+ **unittest** (and simple unitary tests for transforms, excluders and combinators functions) **implemented**.
@@ -134,7 +144,6 @@ It will retrieve all lyrics from all songs which belongs to artists that you pro
## TO-DO list
-+ Implement **weighted-words system**.
+ Create options to **custom case transforms** (e.g.: disable pair/odd transforms).
+ **Lyricpass** integration was upgraded to last version released by initstring, but still needs some tweaks to speed up the search process (I would appreciate any help).
diff --git a/bopscrk.py b/bopscrk.py
index 4e5b359..9f62012 100644
--- a/bopscrk.py
+++ b/bopscrk.py
@@ -9,7 +9,7 @@
name = 'bopscrk.py'
__author__ = 'r3nt0n'
-__version__ = '2.3.1'
+__version__ = '2.4'
__status__ = 'Development'
diff --git a/modules/args.py b/modules/args.py
index 7a8a64c..4072164 100644
--- a/modules/args.py
+++ b/modules/args.py
@@ -41,10 +41,10 @@ def __init__(self):
dest='artists', default=False,
help='artists to search song lyrics (comma-separated)')
- parser.add_argument('-x', '--exclude', action="store", metavar='', type=str,
- dest='exclude', default=False,
- help='exclude all the words included in other wordlists '
- '(several wordlists should be comma-separated)')
+ # parser.add_argument('-x', '--exclude', action="store", metavar='', type=str,
+ # dest='exclude', default=False,
+ # help='exclude all the words included in other wordlists '
+ # '(several wordlists should be comma-separated)')
parser.add_argument('-o', '--output', action="store", metavar='', type=str,
dest='outfile', default=self.DEFAULT_OUTPUT_FILE,
@@ -96,6 +96,12 @@ def set_interactive_options(self):
else:
break
+ self.artists = input(' {}[?]{} Artist names to search song lyrics (comma-separated) >>> '.format(color.BLUE, color.END))
+ if is_empty(self.artists):
+ self.artists = False
+ else:
+ self.artists = self.artists.split(',')
+
others = input(' {}[?]{} Some other relevant words (comma-separated) >>> '.format(color.BLUE, color.END))
leet = input(' {}[?]{} Do yo want to make leet transforms? [y/n] >>> '.format(color.BLUE, color.END))
@@ -122,24 +128,20 @@ def set_interactive_options(self):
except ValueError:
print(' {}[!]{} Should be an integer'.format(color.RED, color.END))
- self.artists = input(' {}[?]{} Artist names to search song lyrics (comma-separated) >>> '.format(color.BLUE, color.END))
- if is_empty(self.artists): self.artists = False
- else: self.artists = self.artists.split(',')
-
- while True:
- exclude = input(' {}[?]{} Exclude words from other wordlists? >>> '.format(color.BLUE, color.END))
- if is_empty(exclude):
- self.exclude_wordlists = False; break
- else:
- exclude = exclude.split(',')
- valid_paths = True
- for wl_path in exclude:
- if not os.path.isfile(wl_path):
- valid_paths = False
- print(' {}[!]{} {} not found'.format(color.RED, color.END, wl_path))
- if valid_paths:
- self.exclude_wordlists = exclude
- break
+ # while True:
+ # exclude = input(' {}[?]{} Exclude words from other wordlists? >>> '.format(color.BLUE, color.END))
+ # if is_empty(exclude):
+ # self.exclude_wordlists = False; break
+ # else:
+ # exclude = exclude.split(',')
+ # valid_paths = True
+ # for wl_path in exclude:
+ # if not os.path.isfile(wl_path):
+ # valid_paths = False
+ # print(' {}[!]{} {} not found'.format(color.RED, color.END, wl_path))
+ # if valid_paths:
+ # self.exclude_wordlists = exclude
+ # break
self.outfile = input(' {}[?]{} Output file [{}] >>> '.format(color.BLUE, color.END, self.DEFAULT_OUTPUT_FILE))
if is_empty(self.outfile): self.outfile = self.DEFAULT_OUTPUT_FILE
@@ -178,12 +180,12 @@ def set_cli_options(self):
self.n_words = self.args.n_words
self.artists = self.args.artists
self.outfile = self.args.outfile
- self.exclude_wordlists = self.args.exclude
- if self.exclude_wordlists:
- self.exclude_wordlists = self.exclude_wordlists.split(',')
- for wl_path in self.exclude_wordlists:
- if not os.path.isfile(wl_path):
- print(' {}[!]{} {} not found'.format(color.RED, color.END, wl_path))
- sys.exit(4)
+ # self.exclude_wordlists = self.args.exclude
+ # if self.exclude_wordlists:
+ # self.exclude_wordlists = self.exclude_wordlists.split(',')
+ # for wl_path in self.exclude_wordlists:
+ # if not os.path.isfile(wl_path):
+ # print(' {}[!]{} {} not found'.format(color.RED, color.END, wl_path))
+ # sys.exit(4)
if self.artists:
self.artists = self.artists.split(',')
diff --git a/modules/auxiliars.py b/modules/auxiliars.py
index 7a4cd1f..9958ad9 100644
--- a/modules/auxiliars.py
+++ b/modules/auxiliars.py
@@ -29,4 +29,33 @@ def is_valid_date(date_str):
datetime.datetime.strptime(date_str, '%d/%m/%Y')
return True
except ValueError:
- return False
\ No newline at end of file
+ return False
+
+def append_wordlist_to_file(filepath, wordlist):
+ """
+ Save wordlist into filepath provided (creates it if not exists, add words to the end if exists).
+ :param filepath: path to file
+ :param wordlist: list of words to save
+ :return: True or False
+ """
+ try:
+ with open(filepath, 'a') as f:
+ for word in wordlist:
+ f.write(word + '\n')
+ return True
+ except:
+ return False
+
+
+def remove_duplicates_from_file(infile_path, outfile_path="temp.000000000.bopscrk"):
+ lines_seen = set() # holds lines already seen
+ outfile = open(outfile_path, "w")
+ infile = open(infile_path, "r")
+ for line in infile:
+ if line not in lines_seen: # not a duplicate
+ outfile.write(line)
+ lines_seen.add(line)
+ outfile.close()
+ infile.close()
+ os.remove(infile_path)
+ os.rename(outfile_path, infile_path)
\ No newline at end of file
diff --git a/modules/main.py b/modules/main.py
index a634705..3794a7a 100644
--- a/modules/main.py
+++ b/modules/main.py
@@ -6,10 +6,10 @@
import sys, os, datetime
from bopscrk import name, __version__, __author__, args, Config
-from modules.auxiliars import clear
+from modules.auxiliars import clear, remove_duplicates_from_file
from modules import banners
from modules.color import color
-from modules.transforms import leet_transforms, case_transforms, artist_space_transforms, lyric_space_transforms, multithread_transforms, take_initials
+from modules.transforms import leet_transforms, case_transforms, artist_space_transforms, lyric_space_transforms, multithread_transforms, take_initials, transform_cached_wordlist_and_save
from modules.combinators import combinator, add_common_separators
from modules.excluders import remove_by_lengths, remove_duplicates, multithread_exclude
@@ -59,7 +59,7 @@ def run():
# Search lyrics if it meets dependencies for lyricpass
try:
from modules.lyricpass import lyricpass
- print('\n{} -- Starting lyricpass module (by initstring) --\n'.format(color.GREY))
+ print('\n{} -- Starting lyricpass module --\n'.format(color.GREY))
print(' {}[*]{} Looking for {}\'s lyrics...'.format(color.CYAN, color.END, artist.title()))
lyrics = lyricpass.lyricpass(artist)
#lyrics = [s.decode("utf-8") for s in lyfinder.lyrics]
@@ -73,7 +73,7 @@ def run():
# Add the phrases to BASE wordlist
lyrics = remove_by_lengths(lyrics, args.min_length, args.max_length)
- print(' {}[+]{} Removing by min and max length range ({} phrases remain)...'.format(color.BLUE, color.END,len(lyrics)))
+ print(' {}[+]{} Adding raw phrases filtering by min and max length range ({} phrases remain)...'.format(color.BLUE, color.END,len(lyrics)))
final_wordlist += lyrics
# Take just the initials on each phrase and add as a new word to FINAL wordlist
@@ -121,6 +121,24 @@ def run():
# (!) Check for duplicates (is checked before return in combinator() and add_common_separators())
#final_wordlist = remove_duplicates(final_wordlist)
+
+ # # CASE TRANSFORMS
+ # if args.case:
+ # print(' {}[+]{} Applying case transforms to {} words...'.format(color.BLUE, color.END, len(final_wordlist)))
+ #
+ # # transform_cached_wordlist_and_save(case_transforms, args.outfile) # not working yet, infinite loop ?¿?¿
+ # temp_wordlist = []
+ # temp_wordlist += multithread_transforms(case_transforms, final_wordlist)
+ # final_wordlist += temp_wordlist
+ #
+ # final_wordlist = remove_duplicates(final_wordlist)
+ #
+ # # SAVE WORDLIST TO FILE BEFORE LEET TRANSFORMS
+ # ############################################################################
+ # with open(args.outfile, 'w') as f:
+ # for word in final_wordlist:
+ # f.write(word + '\n')
+
# LEET TRANSFORMS
if args.leet:
if not Config.LEET_CHARSET:
@@ -135,36 +153,44 @@ def run():
' could take several minutes{}\n'.format(color.ORANGE,color.END,args.max_length,color.ORANGE,color.END,len(final_wordlist),color.ORANGE,color.END))
recursive_msg = '{}recursive{} '.format(color.RED,color.END)
print(' {}[+]{} Applying {}leet transforms to {} words...'.format(color.BLUE, color.END, recursive_msg,len(final_wordlist)))
- #print(final_wordlist)
+
+ #transform_cached_wordlist_and_save(leet_transforms, args.outfile)
+ #remove_duplicates_from_file(args.outfile)
+
temp_wordlist = []
temp_wordlist += multithread_transforms(leet_transforms, final_wordlist)
final_wordlist += temp_wordlist
# CASE TRANSFORMS
if args.case:
- print(' {}[+]{} Applying case transforms to {} words...'.format(color.BLUE, color.END, len(final_wordlist)))
+ print(' {}[+]{} Applying case transforms to {} words...'.format(color.BLUE, color.END,len(final_wordlist)))
+
+ # transform_cached_wordlist_and_save(case_transforms, args.outfile) # not working yet, infinite loop ?¿?¿
+
temp_wordlist = []
temp_wordlist += multithread_transforms(case_transforms, final_wordlist)
final_wordlist += temp_wordlist
+ final_wordlist = remove_duplicates(final_wordlist)
+
# EXCLUDE FROM OTHER WORDLISTS
- if args.exclude_wordlists:
+ #if args.exclude_wordlists:
# For each path to wordlist provided
- for wl_path in args.exclude_wordlists:
- print(' {}[+]{} Excluding wordlist {}...'.format(color.BLUE, color.END, os.path.basename(wl_path)))
- # Open the file
- with open(wl_path, 'r') as x_wordlist_file:
- # Read line by line in a loop
- while True:
- word_to_exclude = x_wordlist_file.readline()
- if not word_to_exclude: break # breaks the loop when file ends
- final_wordlist = multithread_exclude(word_to_exclude, final_wordlist)
+ # for wl_path in args.exclude_wordlists:
+ # print(' {}[+]{} Excluding wordlist {}...'.format(color.BLUE, color.END, os.path.basename(wl_path)))
+ # # Open the file
+ # with open(wl_path, 'r') as x_wordlist_file:
+ # # Read line by line in a loop
+ # while True:
+ # word_to_exclude = x_wordlist_file.readline()
+ # if not word_to_exclude: break # breaks the loop when file ends
+ # final_wordlist = multithread_exclude(word_to_exclude, final_wordlist)
# re-check for duplicates
- final_wordlist = remove_duplicates(final_wordlist)
+ #final_wordlist = remove_duplicates(final_wordlist)
# SAVE WORDLIST TO FILE
- ############################################################################
+ ###########################################################################
with open(args.outfile, 'w') as f:
for word in final_wordlist:
f.write(word + '\n')
@@ -178,7 +204,8 @@ def run():
############################################################################
print('\n {}[+]{} Time elapsed:\t{}'.format(color.GREEN, color.END, total_time))
print(' {}[+]{} Output file:\t{}{}{}{}'.format(color.GREEN, color.END, color.BOLD, color.BLUE, args.outfile, color.END))
- print(' {}[+]{} Words generated:\t{}{}{}\n'.format(color.GREEN, color.END, color.RED, len(final_wordlist), color.END))
+ #print(' {}[+]{} Words generated:\t{}{}{}\n'.format(color.GREEN, color.END, color.RED, str(sum(1 for line in open(args.outfile))), color.END))
+ print(' {}[+]{} Words generated:\t{}{}{}\n'.format(color.GREEN, color.END, color.RED,len(final_wordlist), color.END))
sys.exit(0)
except KeyboardInterrupt:
diff --git a/modules/transforms.py b/modules/transforms.py
index 3d97d56..56cdd0a 100644
--- a/modules/transforms.py
+++ b/modules/transforms.py
@@ -7,6 +7,7 @@
from bopscrk import Config
from modules.excluders import remove_duplicates
+from modules.auxiliars import append_wordlist_to_file
def case_transforms(word):
@@ -143,4 +144,40 @@ def multithread_transforms(transform_type, wordlist):
return new_wordlist
+def transform_cached_wordlist_and_save(transform_type, filepath):
+
+ last_position = 0
+
+ while True:
+
+ cached_wordlist = []
+ new_wordlist = []
+
+ with open(filepath, 'r', encoding='utf-8') as f:
+ counter = 0
+ f.seek(last_position) # put point on last position
+ while True:
+ line = f.readline()
+ if counter >= 8000:
+ last_position = f.tell() # save last_position and break inner loop
+ break
+ if not line:
+ break
+ if line.strip() not in cached_wordlist:
+ cached_wordlist.append(line.strip())
+ counter += 1
+ last_position = f.tell() # save last_position
+
+ new_wordlist += multithread_transforms(transform_type, cached_wordlist)
+ #cached_wordlist += new_wordlist
+ append_wordlist_to_file(filepath, new_wordlist)
+
+ # read again the file to check if it ended
+ with open(filepath, 'r', encoding='utf-8') as f:
+ f.seek(last_position) # put point on last position
+ line = f.readline()
+ if not line:
+ break
+
+
diff --git a/tests/bopscrk.cfg b/tests/bopscrk.cfg
index 28485f2..3a97688 100644
--- a/tests/bopscrk.cfg
+++ b/tests/bopscrk.cfg
@@ -27,7 +27,7 @@ separators_strings=123 xXx !!
# LEET REPLACEMENT CHARSET
# characters to replace and correspondent substitute in leet transforms
leet_charset=a:4 e:3 i:1 o:0 s:$ t:7
-# Uncomment the following line to get an extensive charset
+# Uncomment the following line to get an extensive (and expensive) charset
# leet_charset=a:4 a:@ e:3 i:1 i:! i:¡ l:1 o:0 s:$ s:5 b:8 t:7 c:(
# RECURSIVE LEET TRANSFORMS - Enables a recursive call to leet_transforms() function