Skip to content

Commit

Permalink
libcdb-cli: add --offline-only, refactor unstrip and add fetch pars…
Browse files Browse the repository at this point in the history
…er for download libc-database (#2478)

* Add `return_raw` for `search_by_symbol_offsets`

* Add `--offline-only` and unstrip libc as default behavior

* Add short arg name for `--download-libc`

* Fix bugs

* Add fetch parser

* Fix bugs

* file parse not unstrip default

* Update function docs

* Update CHANGELOG

* Edit dest of `--no-strip`

* Update CHANGELOG

* Unstrip before `return cache`

* Revert `--unstrip` help

* Improve `libcdb fetch` default behavior

* Perf fetch command

* Fix fetch command `-u`

---------

Co-authored-by: peace-maker <[email protected]>
  • Loading branch information
the-soloist and peace-maker authored Oct 24, 2024
1 parent d225311 commit fa14663
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 74 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ The table below shows which release corresponds to each branch, and what date th
- [#2479][2479] Support extracting libraries from Docker image in `pwn template`
- [#2483][2483] Only print `checksec` output of `ELF.libc` when it was printed for the `ELF` already
- [#2482][2482] Throw error when using `sni` and setting `server_hostname` manually in `remote`
- [#2478][2478] libcdb-cli: add `--offline-only`, refactor unstrip and add fetch parser for download libc-database

[2471]: https://github.com/Gallopsled/pwntools/pull/2471
[2358]: https://github.com/Gallopsled/pwntools/pull/2358
Expand All @@ -92,6 +93,7 @@ The table below shows which release corresponds to each branch, and what date th
[2479]: https://github.com/Gallopsled/pwntools/pull/2479
[2483]: https://github.com/Gallopsled/pwntools/pull/2483
[2482]: https://github.com/Gallopsled/pwntools/pull/2482
[2478]: https://github.com/Gallopsled/pwntools/pull/2478

## 4.14.0 (`beta`)

Expand Down
180 changes: 110 additions & 70 deletions pwnlib/commandline/libcdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,26 +37,27 @@
)

lookup_parser.add_argument(
'--download-libc',
'-d', '--download-libc',
action = 'store_true',
default = False,
help = 'Attempt to download the matching libc.so'
)

lookup_parser.add_argument(
'--unstrip',
action = 'store_true',
default = True,
help = 'Attempt to unstrip the libc binary with debug symbols from a debuginfod server'
)

lookup_parser.add_argument(
'--no-unstrip',
action = 'store_false',
dest = 'unstrip',
help = 'Do NOT attempt to unstrip the libc binary with debug symbols from a debuginfod server'
)

lookup_parser.add_argument(
'--offline-only',
action = 'store_true',
default = False,
dest = 'offline_only',
help = 'Attempt to searching with offline only mode'
)

hash_parser = libc_commands.add_parser(
'hash',
help = 'Display information of a libc version given an unique hash',
Expand All @@ -80,26 +81,27 @@
)

hash_parser.add_argument(
'--download-libc',
'-d', '--download-libc',
action = 'store_true',
default = False,
help = 'Attempt to download the matching libc.so'
)

hash_parser.add_argument(
'--unstrip',
action = 'store_true',
default = True,
help = 'Attempt to unstrip the libc binary with debug symbols from a debuginfod server'
)

hash_parser.add_argument(
'--no-unstrip',
action = 'store_false',
dest = 'unstrip',
help = 'Do NOT attempt to unstrip the libc binary with debug symbols from a debuginfod server'
)

hash_parser.add_argument(
'--offline-only',
action = 'store_true',
default = False,
dest = 'offline_only',
help = 'Attempt to searching with offline only mode'
)

file_parser = libc_commands.add_parser(
'file',
help = 'Dump information about a libc binary',
Expand Down Expand Up @@ -130,25 +132,34 @@
file_parser.add_argument(
'--unstrip',
action = 'store_true',
default = False,
dest = 'unstrip',
help = 'Attempt to unstrip the libc binary inplace with debug symbols from a debuginfod server'
)

common_symbols = ['dup2', 'printf', 'puts', 'read', 'system', 'write']
fetch_parser = libc_commands.add_parser(
'fetch',
help = 'Fetch libc database',
description = 'Fetch libc database. If no argument passed, it will init and upgrade libc-database repository',
)

def find_libc(params):
import requests
url = "https://libc.rip/api/find"
result = requests.post(url, json=params, timeout=20)
log.debug('Request: %s', params)
log.debug('Result: %s', result.json())
if result.status_code != 200 or len(result.json()) == 0:
log.failure("Could not find libc for %s on libc.rip", params)
return []
fetch_parser.add_argument(
'path',
nargs = '?',
default = context.local_libcdb,
help = 'Set libc-database path, If it is empty, the default path will be `context.local_libcdb` (%s)' % context.local_libcdb
)

fetch_parser.add_argument(
'-u', '--update',
metavar = 'update',
nargs = '+',
choices = ['all', 'ubuntu', 'debian', 'rpm', 'centos', 'arch', 'alpine', 'kali', 'parrotsec', 'launchpad'],
help = 'Fetch the desired libc categories'
)

return result.json()
common_symbols = ['dup2', 'printf', 'puts', 'read', 'system', 'write']

def print_libc(libc):
def print_libc_info(libc):
log.info('%s', text.red(libc['id']))
log.indented('\t%-20s %s', text.green('BuildID:'), libc['buildid'])
log.indented('\t%-20s %s', text.green('MD5:'), libc['md5'])
Expand All @@ -158,14 +169,39 @@ def print_libc(libc):
for symbol in libc['symbols'].items():
log.indented('\t%25s = %s', symbol[0], symbol[1])

def handle_remote_libc(args, libc):
print_libc(libc)
if args.download_libc:
path = libcdb.search_by_build_id(libc['buildid'], args.unstrip)
if path:
if args.unstrip:
libcdb.unstrip_libc(path)
shutil.copy(path, './{}.so'.format(libc['id']))
def print_libc_elf(exe):
from hashlib import md5, sha1, sha256

log.info('%s', text.red(os.path.basename(exe.path)))

libc_version = get_libc_version(exe)
if libc_version:
log.indented('%-20s %s', text.green('Version:'), libc_version)

if exe.buildid:
log.indented('%-20s %s', text.green('BuildID:'), enhex(exe.buildid))

log.indented('%-20s %s', text.green('MD5:'), md5(exe.data).hexdigest())
log.indented('%-20s %s', text.green('SHA1:'), sha1(exe.data).hexdigest())
log.indented('%-20s %s', text.green('SHA256:'), sha256(exe.data).hexdigest())

# Always dump the basic list of common symbols
log.indented('%s', text.green('Symbols:'))
synthetic_symbols = collect_synthetic_symbols(exe)

symbols = common_symbols + (args.symbols or []) + synthetic_symbols
symbols.sort()
for symbol in symbols:
if symbol not in exe.symbols:
log.indented('%25s = %s', symbol, text.red('not found'))
else:
log.indented('%25s = %#x', symbol, translate_offset(exe.symbols[symbol], args, exe))

def get_libc_version(exe):
res = re.search(br'libc[ -](\d+\.\d+)', exe.data)
if res:
return res.group(1).decode()
return None

def translate_offset(offs, args, exe):
if args.offset:
Expand All @@ -182,7 +218,7 @@ def collect_synthetic_symbols(exe):
available_symbols.append('str_bin_sh')
except StopIteration:
pass

libc_start_main_return = exe.libc_start_main_return
if libc_start_main_return > 0:
exe.symbols['__libc_start_main_ret'] = libc_start_main_return
Expand All @@ -200,52 +236,56 @@ def main(args):
if len(pairs) % 2 != 0:
log.failure('Uneven number of arguments. Please provide "symbol offset" pairs')
return

symbols = {pairs[i]:pairs[i+1] for i in range(0, len(pairs), 2)}
matched_libcs = find_libc({'symbols': symbols})
matched_libcs = libcdb.search_by_symbol_offsets(symbols, offline_only=args.offline_only, return_raw=True)

for libc in matched_libcs:
handle_remote_libc(args, libc)
print_libc_info(libc)
if args.download_libc:
path = libcdb.search_by_build_id(libc['buildid'], args.unstrip)
if path:
shutil.copy(path, './{}.so'.format(libc['id']))

elif args.libc_command == 'hash':
inverted_map = {v: k for k, v in libcdb.MAP_TYPES.items()}
hash_type = inverted_map.get(args.hash_type, args.hash_type)

for hash_value in args.hash_value:
matched_libcs = find_libc({args.hash_type: hash_value})
for libc in matched_libcs:
handle_remote_libc(args, libc)
path = libcdb.search_by_hash(hash_value, hash_type, unstrip=args.unstrip, offline_only=args.offline_only)
exe = ELF(path, checksec=False)
print_libc_elf(exe)

if args.download_libc:
# if we cannot get actual libc version then copy with cache name
shutil.copy(path, './libc-{}.so'.format(get_libc_version(exe) or Path(path).stem))

elif args.libc_command == 'file':
from hashlib import md5, sha1, sha256
for file in args.files:
if not os.path.exists(file) or not os.path.isfile(file):
log.failure('File does not exist %s', args.file)
continue

if args.unstrip:
libcdb.unstrip_libc(file)

exe = ELF(file, checksec=False)
log.info('%s', text.red(os.path.basename(file)))

libc_version = re.search(br'libc[ -](\d+\.\d+)', exe.data)
if libc_version:
log.indented('%-20s %s', text.green('Version:'), libc_version.group(1).decode())

if exe.buildid:
log.indented('%-20s %s', text.green('BuildID:'), enhex(exe.buildid))
log.indented('%-20s %s', text.green('MD5:'), md5(exe.data).hexdigest())
log.indented('%-20s %s', text.green('SHA1:'), sha1(exe.data).hexdigest())
log.indented('%-20s %s', text.green('SHA256:'), sha256(exe.data).hexdigest())

# Always dump the basic list of common symbols
log.indented('%s', text.green('Symbols:'))
synthetic_symbols = collect_synthetic_symbols(exe)

symbols = common_symbols + (args.symbols or []) + synthetic_symbols
symbols.sort()
for symbol in symbols:
if symbol not in exe.symbols:
log.indented('%25s = %s', symbol, text.red('not found'))
else:
log.indented('%25s = %#x', symbol, translate_offset(exe.symbols[symbol], args, exe))
print_libc_elf(ELF(file, checksec=False))

elif args.libc_command == 'fetch':

if args.update:
subprocess.check_call(['./get'] + args.update, cwd=args.path)

else:
if not Path(args.path).exists():
if yesno("Would you like to initialize the libc-database repository? "
"If the path already exists, this prompt will not display, and automatically upgrade repository."):
log.waitfor("init libc-database repository")
subprocess.check_call(['git', 'clone', 'https://github.com/niklasb/libc-database/', args.path])
else:
log.waitfor("upgrade libc-database repository")
subprocess.check_call(['git', 'pull'], cwd=args.path)


if __name__ == '__main__':
pwnlib.commandline.common.main(__file__, main)
15 changes: 11 additions & 4 deletions pwnlib/libcdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ def search_by_hash(search_target, search_type='build_id', unstrip=True, offline_
# Ensure that the libcdb cache directory exists
cache, cache_valid = _check_elf_cache('libcdb', search_target, search_type)
if cache_valid:
if unstrip:
unstrip_libc(cache)
return cache

# We searched for this buildid before, but didn't find anything.
Expand Down Expand Up @@ -653,7 +655,7 @@ def _handle_multiple_matching_libcs(matching_libcs):
selected_index = options("Select the libc version to use:", [libc['id'] for libc in matching_libcs])
return matching_libcs[selected_index]

def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False, search_type='build_id'):
def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, offline_only=False, search_type='build_id', return_as_list=False, return_raw=False):
"""
Lookup possible matching libc versions based on leaked function addresses.
Expand All @@ -672,14 +674,16 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
The libc to select if there are multiple matches (starting at 1).
unstrip(bool):
Try to fetch debug info for the libc and apply it to the downloaded file.
return_as_list(bool):
Return a list of build ids of all matching libc versions
instead of a path to a downloaded file.
offline_only(bool):
When pass `offline_only=True`, restricts search mode to offline sources only,
disable online lookup. Defaults to `False`, and enable both offline and online providers.
search_type(str):
An option to select searched hash.
return_as_list(bool):
Return a list of build ids of all matching libc versions
instead of a path to a downloaded file.
return_raw(bool):
Return raw list of matched libc.
Returns:
Path to the downloaded library on disk, or :const:`None`.
Expand Down Expand Up @@ -735,6 +739,9 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as
if return_as_list:
return [libc['buildid'] for libc in matching_list]

if return_raw:
return matching_list

mapped_type = MAP_TYPES.get(search_type, search_type)

# If there's only one match, return it directly
Expand Down

0 comments on commit fa14663

Please sign in to comment.