diff --git a/README.md b/README.md index d7b13c4..eb1b494 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Packaging status](https://repology.org/badge/tiny-repos/bopscrk.svg)](https://repology.org/project/bopscrk/versions) ![[GPL-3.0 License](https://github.com/r3nt0n)](https://img.shields.io/badge/license-GPL%203.0-brightgreen.svg) ![[Python 3.8](https://github.com/r3nt0n)](http://img.shields.io/badge/python-3.8-blue.svg) -![[Version 2.3.1](https://github.com/r3nt0n)](http://img.shields.io/badge/version-2.3.1-orange.svg) +![[Version 2.4](https://github.com/r3nt0n)](http://img.shields.io/badge/version-2.4-orange.svg) @@ -24,8 +24,18 @@ Included in **BlackArch Linux** pentesting ## Requirements + **Python 3** (secondary branch keeps Python 2.7 legacy support) -+ *optional* - to use `lyricpass` module: - `pip install requirements.txt` ++ requests python module + +## Get started +### Download and install +``` +git clone --recurse-submodules https://github.com/r3nt0n/bopscrk +pip install -r requirements.txt +``` +### Run interactive mode +``` +python3 bopscrk.py -i +``` ## Usage ``` @@ -39,8 +49,6 @@ Included in **BlackArch Linux** pentesting -l, --leet enable leet transformations -n max amount of words to combine each time (default: 2) -a , --artists artists to search song lyrics (comma-separated) - -x , --exclude exclude all the words included in other wordlists - (several wordlists should be comma-separated) -o , --output output file to save the wordlist (default: tmp.txt) -C , --config specify config file to use (default: ./bopscrk.cfg) @@ -100,10 +108,12 @@ It will retrieve all lyrics from all songs which belongs to artists that you pro `leet_charset=a:4 e:3` -#### Weighted-words system -[...] Coming soon [...] - ## Changelist ++ `2.4 version notes (26/7/2022` + + make the installation process easier + + starting to implement better memory management (cached wordlists writing and reading i/o files), not working yet + + updating and fixing minor bugs related to dependencies + + 'exclude from other wordlists' feature removed + `2.3.1 version notes` + fixing namespace bug (related to aux.py module, renamed to auxiliars.py) when running on windows systems + **unittest** (and simple unitary tests for transforms, excluders and combinators functions) **implemented**. @@ -134,7 +144,6 @@ It will retrieve all lyrics from all songs which belongs to artists that you pro ## TO-DO list -+ Implement **weighted-words system**. + Create options to **custom case transforms** (e.g.: disable pair/odd transforms). + **Lyricpass** integration was upgraded to last version released by initstring, but still needs some tweaks to speed up the search process (I would appreciate any help). diff --git a/bopscrk.py b/bopscrk.py index 4e5b359..9f62012 100644 --- a/bopscrk.py +++ b/bopscrk.py @@ -9,7 +9,7 @@ name = 'bopscrk.py' __author__ = 'r3nt0n' -__version__ = '2.3.1' +__version__ = '2.4' __status__ = 'Development' diff --git a/modules/args.py b/modules/args.py index 7a8a64c..4072164 100644 --- a/modules/args.py +++ b/modules/args.py @@ -41,10 +41,10 @@ def __init__(self): dest='artists', default=False, help='artists to search song lyrics (comma-separated)') - parser.add_argument('-x', '--exclude', action="store", metavar='', type=str, - dest='exclude', default=False, - help='exclude all the words included in other wordlists ' - '(several wordlists should be comma-separated)') + # parser.add_argument('-x', '--exclude', action="store", metavar='', type=str, + # dest='exclude', default=False, + # help='exclude all the words included in other wordlists ' + # '(several wordlists should be comma-separated)') parser.add_argument('-o', '--output', action="store", metavar='', type=str, dest='outfile', default=self.DEFAULT_OUTPUT_FILE, @@ -96,6 +96,12 @@ def set_interactive_options(self): else: break + self.artists = input(' {}[?]{} Artist names to search song lyrics (comma-separated) >>> '.format(color.BLUE, color.END)) + if is_empty(self.artists): + self.artists = False + else: + self.artists = self.artists.split(',') + others = input(' {}[?]{} Some other relevant words (comma-separated) >>> '.format(color.BLUE, color.END)) leet = input(' {}[?]{} Do yo want to make leet transforms? [y/n] >>> '.format(color.BLUE, color.END)) @@ -122,24 +128,20 @@ def set_interactive_options(self): except ValueError: print(' {}[!]{} Should be an integer'.format(color.RED, color.END)) - self.artists = input(' {}[?]{} Artist names to search song lyrics (comma-separated) >>> '.format(color.BLUE, color.END)) - if is_empty(self.artists): self.artists = False - else: self.artists = self.artists.split(',') - - while True: - exclude = input(' {}[?]{} Exclude words from other wordlists? >>> '.format(color.BLUE, color.END)) - if is_empty(exclude): - self.exclude_wordlists = False; break - else: - exclude = exclude.split(',') - valid_paths = True - for wl_path in exclude: - if not os.path.isfile(wl_path): - valid_paths = False - print(' {}[!]{} {} not found'.format(color.RED, color.END, wl_path)) - if valid_paths: - self.exclude_wordlists = exclude - break + # while True: + # exclude = input(' {}[?]{} Exclude words from other wordlists? >>> '.format(color.BLUE, color.END)) + # if is_empty(exclude): + # self.exclude_wordlists = False; break + # else: + # exclude = exclude.split(',') + # valid_paths = True + # for wl_path in exclude: + # if not os.path.isfile(wl_path): + # valid_paths = False + # print(' {}[!]{} {} not found'.format(color.RED, color.END, wl_path)) + # if valid_paths: + # self.exclude_wordlists = exclude + # break self.outfile = input(' {}[?]{} Output file [{}] >>> '.format(color.BLUE, color.END, self.DEFAULT_OUTPUT_FILE)) if is_empty(self.outfile): self.outfile = self.DEFAULT_OUTPUT_FILE @@ -178,12 +180,12 @@ def set_cli_options(self): self.n_words = self.args.n_words self.artists = self.args.artists self.outfile = self.args.outfile - self.exclude_wordlists = self.args.exclude - if self.exclude_wordlists: - self.exclude_wordlists = self.exclude_wordlists.split(',') - for wl_path in self.exclude_wordlists: - if not os.path.isfile(wl_path): - print(' {}[!]{} {} not found'.format(color.RED, color.END, wl_path)) - sys.exit(4) + # self.exclude_wordlists = self.args.exclude + # if self.exclude_wordlists: + # self.exclude_wordlists = self.exclude_wordlists.split(',') + # for wl_path in self.exclude_wordlists: + # if not os.path.isfile(wl_path): + # print(' {}[!]{} {} not found'.format(color.RED, color.END, wl_path)) + # sys.exit(4) if self.artists: self.artists = self.artists.split(',') diff --git a/modules/auxiliars.py b/modules/auxiliars.py index 7a4cd1f..9958ad9 100644 --- a/modules/auxiliars.py +++ b/modules/auxiliars.py @@ -29,4 +29,33 @@ def is_valid_date(date_str): datetime.datetime.strptime(date_str, '%d/%m/%Y') return True except ValueError: - return False \ No newline at end of file + return False + +def append_wordlist_to_file(filepath, wordlist): + """ + Save wordlist into filepath provided (creates it if not exists, add words to the end if exists). + :param filepath: path to file + :param wordlist: list of words to save + :return: True or False + """ + try: + with open(filepath, 'a') as f: + for word in wordlist: + f.write(word + '\n') + return True + except: + return False + + +def remove_duplicates_from_file(infile_path, outfile_path="temp.000000000.bopscrk"): + lines_seen = set() # holds lines already seen + outfile = open(outfile_path, "w") + infile = open(infile_path, "r") + for line in infile: + if line not in lines_seen: # not a duplicate + outfile.write(line) + lines_seen.add(line) + outfile.close() + infile.close() + os.remove(infile_path) + os.rename(outfile_path, infile_path) \ No newline at end of file diff --git a/modules/main.py b/modules/main.py index a634705..3794a7a 100644 --- a/modules/main.py +++ b/modules/main.py @@ -6,10 +6,10 @@ import sys, os, datetime from bopscrk import name, __version__, __author__, args, Config -from modules.auxiliars import clear +from modules.auxiliars import clear, remove_duplicates_from_file from modules import banners from modules.color import color -from modules.transforms import leet_transforms, case_transforms, artist_space_transforms, lyric_space_transforms, multithread_transforms, take_initials +from modules.transforms import leet_transforms, case_transforms, artist_space_transforms, lyric_space_transforms, multithread_transforms, take_initials, transform_cached_wordlist_and_save from modules.combinators import combinator, add_common_separators from modules.excluders import remove_by_lengths, remove_duplicates, multithread_exclude @@ -59,7 +59,7 @@ def run(): # Search lyrics if it meets dependencies for lyricpass try: from modules.lyricpass import lyricpass - print('\n{} -- Starting lyricpass module (by initstring) --\n'.format(color.GREY)) + print('\n{} -- Starting lyricpass module --\n'.format(color.GREY)) print(' {}[*]{} Looking for {}\'s lyrics...'.format(color.CYAN, color.END, artist.title())) lyrics = lyricpass.lyricpass(artist) #lyrics = [s.decode("utf-8") for s in lyfinder.lyrics] @@ -73,7 +73,7 @@ def run(): # Add the phrases to BASE wordlist lyrics = remove_by_lengths(lyrics, args.min_length, args.max_length) - print(' {}[+]{} Removing by min and max length range ({} phrases remain)...'.format(color.BLUE, color.END,len(lyrics))) + print(' {}[+]{} Adding raw phrases filtering by min and max length range ({} phrases remain)...'.format(color.BLUE, color.END,len(lyrics))) final_wordlist += lyrics # Take just the initials on each phrase and add as a new word to FINAL wordlist @@ -121,6 +121,24 @@ def run(): # (!) Check for duplicates (is checked before return in combinator() and add_common_separators()) #final_wordlist = remove_duplicates(final_wordlist) + + # # CASE TRANSFORMS + # if args.case: + # print(' {}[+]{} Applying case transforms to {} words...'.format(color.BLUE, color.END, len(final_wordlist))) + # + # # transform_cached_wordlist_and_save(case_transforms, args.outfile) # not working yet, infinite loop ?¿?¿ + # temp_wordlist = [] + # temp_wordlist += multithread_transforms(case_transforms, final_wordlist) + # final_wordlist += temp_wordlist + # + # final_wordlist = remove_duplicates(final_wordlist) + # + # # SAVE WORDLIST TO FILE BEFORE LEET TRANSFORMS + # ############################################################################ + # with open(args.outfile, 'w') as f: + # for word in final_wordlist: + # f.write(word + '\n') + # LEET TRANSFORMS if args.leet: if not Config.LEET_CHARSET: @@ -135,36 +153,44 @@ def run(): ' could take several minutes{}\n'.format(color.ORANGE,color.END,args.max_length,color.ORANGE,color.END,len(final_wordlist),color.ORANGE,color.END)) recursive_msg = '{}recursive{} '.format(color.RED,color.END) print(' {}[+]{} Applying {}leet transforms to {} words...'.format(color.BLUE, color.END, recursive_msg,len(final_wordlist))) - #print(final_wordlist) + + #transform_cached_wordlist_and_save(leet_transforms, args.outfile) + #remove_duplicates_from_file(args.outfile) + temp_wordlist = [] temp_wordlist += multithread_transforms(leet_transforms, final_wordlist) final_wordlist += temp_wordlist # CASE TRANSFORMS if args.case: - print(' {}[+]{} Applying case transforms to {} words...'.format(color.BLUE, color.END, len(final_wordlist))) + print(' {}[+]{} Applying case transforms to {} words...'.format(color.BLUE, color.END,len(final_wordlist))) + + # transform_cached_wordlist_and_save(case_transforms, args.outfile) # not working yet, infinite loop ?¿?¿ + temp_wordlist = [] temp_wordlist += multithread_transforms(case_transforms, final_wordlist) final_wordlist += temp_wordlist + final_wordlist = remove_duplicates(final_wordlist) + # EXCLUDE FROM OTHER WORDLISTS - if args.exclude_wordlists: + #if args.exclude_wordlists: # For each path to wordlist provided - for wl_path in args.exclude_wordlists: - print(' {}[+]{} Excluding wordlist {}...'.format(color.BLUE, color.END, os.path.basename(wl_path))) - # Open the file - with open(wl_path, 'r') as x_wordlist_file: - # Read line by line in a loop - while True: - word_to_exclude = x_wordlist_file.readline() - if not word_to_exclude: break # breaks the loop when file ends - final_wordlist = multithread_exclude(word_to_exclude, final_wordlist) + # for wl_path in args.exclude_wordlists: + # print(' {}[+]{} Excluding wordlist {}...'.format(color.BLUE, color.END, os.path.basename(wl_path))) + # # Open the file + # with open(wl_path, 'r') as x_wordlist_file: + # # Read line by line in a loop + # while True: + # word_to_exclude = x_wordlist_file.readline() + # if not word_to_exclude: break # breaks the loop when file ends + # final_wordlist = multithread_exclude(word_to_exclude, final_wordlist) # re-check for duplicates - final_wordlist = remove_duplicates(final_wordlist) + #final_wordlist = remove_duplicates(final_wordlist) # SAVE WORDLIST TO FILE - ############################################################################ + ########################################################################### with open(args.outfile, 'w') as f: for word in final_wordlist: f.write(word + '\n') @@ -178,7 +204,8 @@ def run(): ############################################################################ print('\n {}[+]{} Time elapsed:\t{}'.format(color.GREEN, color.END, total_time)) print(' {}[+]{} Output file:\t{}{}{}{}'.format(color.GREEN, color.END, color.BOLD, color.BLUE, args.outfile, color.END)) - print(' {}[+]{} Words generated:\t{}{}{}\n'.format(color.GREEN, color.END, color.RED, len(final_wordlist), color.END)) + #print(' {}[+]{} Words generated:\t{}{}{}\n'.format(color.GREEN, color.END, color.RED, str(sum(1 for line in open(args.outfile))), color.END)) + print(' {}[+]{} Words generated:\t{}{}{}\n'.format(color.GREEN, color.END, color.RED,len(final_wordlist), color.END)) sys.exit(0) except KeyboardInterrupt: diff --git a/modules/transforms.py b/modules/transforms.py index 3d97d56..56cdd0a 100644 --- a/modules/transforms.py +++ b/modules/transforms.py @@ -7,6 +7,7 @@ from bopscrk import Config from modules.excluders import remove_duplicates +from modules.auxiliars import append_wordlist_to_file def case_transforms(word): @@ -143,4 +144,40 @@ def multithread_transforms(transform_type, wordlist): return new_wordlist +def transform_cached_wordlist_and_save(transform_type, filepath): + + last_position = 0 + + while True: + + cached_wordlist = [] + new_wordlist = [] + + with open(filepath, 'r', encoding='utf-8') as f: + counter = 0 + f.seek(last_position) # put point on last position + while True: + line = f.readline() + if counter >= 8000: + last_position = f.tell() # save last_position and break inner loop + break + if not line: + break + if line.strip() not in cached_wordlist: + cached_wordlist.append(line.strip()) + counter += 1 + last_position = f.tell() # save last_position + + new_wordlist += multithread_transforms(transform_type, cached_wordlist) + #cached_wordlist += new_wordlist + append_wordlist_to_file(filepath, new_wordlist) + + # read again the file to check if it ended + with open(filepath, 'r', encoding='utf-8') as f: + f.seek(last_position) # put point on last position + line = f.readline() + if not line: + break + + diff --git a/tests/bopscrk.cfg b/tests/bopscrk.cfg index 28485f2..3a97688 100644 --- a/tests/bopscrk.cfg +++ b/tests/bopscrk.cfg @@ -27,7 +27,7 @@ separators_strings=123 xXx !! # LEET REPLACEMENT CHARSET # characters to replace and correspondent substitute in leet transforms leet_charset=a:4 e:3 i:1 o:0 s:$ t:7 -# Uncomment the following line to get an extensive charset +# Uncomment the following line to get an extensive (and expensive) charset # leet_charset=a:4 a:@ e:3 i:1 i:! i:¡ l:1 o:0 s:$ s:5 b:8 t:7 c:( # RECURSIVE LEET TRANSFORMS - Enables a recursive call to leet_transforms() function