-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add scripts to modernize to Python 3
These scripts were developed to modernize https://github.com/pantsbuild/pants as it drops Python 2 and can start to use Python 3-only features. - remove_builtins.py unravels part of the future library by removing all from builtins imports, which no-op in Py3. Its main innovations are the CLI interface and removing BUILD entries if possible. - update_headers.py removes from __future__ imports and # coding=utf-8 lines, which both no-op on Python 3. - update_decode_encode.py uses the default utf-8 encoding to simplify string calls to encode() and decode(). - modernize_classes.py simplifies calls to super() and removes the unnecessary object base class, as all classes are new style in Python 3. (sapling split of f017c31bc73d2c8345333cc7062de2b3c196a9cc)
- Loading branch information
1 parent
735f8f3
commit 332e7a2
Showing
4 changed files
with
261 additions
and
0 deletions.
There are no files selected for viewing
52 changes: 52 additions & 0 deletions
52
scripts/fsqio/python3-port-utils/pants/modernize_classes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import re | ||
from pathlib import Path | ||
|
||
from typing import Sequence, Set | ||
|
||
SUPER_REGEX = r"super\([a-zA-Z]+, [a-z]+\)" | ||
OBJECT_REGEX = r"class (?P<className>[a-zA-Z]*)\(object\):" | ||
|
||
|
||
def main() -> None: | ||
folders = create_parser().parse_args().folders | ||
for fp in get_relevant_files(folders): | ||
simplify(file_path=fp, regex=SUPER_REGEX, replacement="super()") | ||
simplify(file_path=fp, regex=OBJECT_REGEX, replacement=r"class \g<className>:") | ||
|
||
|
||
def create_parser() -> argparse.ArgumentParser: | ||
parser = argparse.ArgumentParser( | ||
description='Remove `from builtins import x`, and possibly the BUILD entry for `future`.') | ||
parser.add_argument('folders', nargs='*') | ||
return parser | ||
|
||
|
||
def get_relevant_files(folders: Sequence[str]) -> Set[Path]: | ||
return { | ||
fp | ||
for folder in folders | ||
for fp in Path(folder).rglob("*.py") | ||
if any( | ||
re.search(SUPER_REGEX, line) or re.search(OBJECT_REGEX, line) | ||
for line in fp.read_text().splitlines() | ||
) | ||
} | ||
|
||
|
||
def simplify(*, file_path: Path, regex: str, replacement: str) -> None: | ||
lines = file_path.read_text().splitlines() | ||
indexes = [i for i, line in enumerate(lines) if re.search(regex, line)] | ||
for index in indexes: | ||
new_line = re.sub(regex, replacement, lines[index]) | ||
lines[index] = new_line | ||
file_path.write_text("\n".join(lines) + "\n") | ||
|
||
|
||
if __name__ == '__main__': | ||
try: | ||
main() | ||
except KeyboardInterrupt: | ||
pass |
103 changes: 103 additions & 0 deletions
103
scripts/fsqio/python3-port-utils/pants/remove_builtins.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import subprocess | ||
from pathlib import Path | ||
from textwrap import dedent | ||
|
||
from typing import List, Sequence, Set | ||
|
||
|
||
def main() -> None: | ||
folders = create_parser().parse_args().folders | ||
for fp in get_files_with_import(folders): | ||
remove_builtins(file_path=fp) | ||
if safe_to_remove_future_from_build(file_path=fp): | ||
target_name = determine_pants_target_name(file_path=fp) | ||
update_build_dependencies(file_path=fp, pants_target_name=target_name) | ||
|
||
|
||
def create_parser() -> argparse.ArgumentParser: | ||
parser = argparse.ArgumentParser( | ||
description='Remove `from builtins import x`, and possibly the BUILD entry for `future`.') | ||
parser.add_argument('folders', nargs='*') | ||
return parser | ||
|
||
|
||
def get_files_with_import(folders: Sequence[str]) -> Set[Path]: | ||
return { | ||
fp | ||
for folder in folders | ||
for fp in Path(folder).rglob("*.py") | ||
if not fp.name.endswith("__init__.py") | ||
and "from builtins import" in fp.read_text() | ||
} | ||
|
||
|
||
def determine_pants_target_name(file_path: Path) -> str: | ||
file_map = subprocess.run([ | ||
'./pants', | ||
'filemap', | ||
f'{file_path.parent}:' | ||
], stdout=subprocess.PIPE, encoding="utf-8").stdout.strip().split('\n') | ||
target_entry = next((line for line in file_map if file_path.name in line), None) | ||
if target_entry is None: | ||
raise SystemExit(dedent(f"""\n | ||
ERROR: File '{file_path}' invalid. Not found anywhere in {file_path.parent}/BUILD.""")) | ||
pants_target_path = target_entry.split(' ')[1] | ||
pants_target_name = pants_target_path.split(':')[1] | ||
return pants_target_name | ||
|
||
|
||
def remove_builtins(*, file_path: Path) -> None: | ||
lines = file_path.read_text().splitlines() | ||
builtins_line_index = next( | ||
(i for i, line in enumerate(lines) if "from builtins" in line), None | ||
) | ||
if builtins_line_index: | ||
lines.pop(builtins_line_index) | ||
file_path.write_text("\n".join(lines) + "\n") | ||
|
||
|
||
def safe_to_remove_future_from_build(*, file_path: Path) -> bool: | ||
lines = file_path.read_text().splitlines() | ||
return all( | ||
"from future.utils" not in line and | ||
"from future.moves" not in line | ||
for line in lines | ||
) | ||
|
||
|
||
def _find_target_index_in_build( | ||
*, build_lines: List[str], pants_target_name: str, file_name: str | ||
) -> int: | ||
index = next((i for i, line in enumerate(build_lines) | ||
if f"name = '{pants_target_name}'" in line | ||
or f"name='{pants_target_name}'" in line), | ||
None) | ||
if index is None: # mono-target | ||
index = next((i for i, line in enumerate(build_lines) if file_name in line), None) | ||
if index is None: # only one target block in file, and sources aren't specified | ||
index = next(i for i, line in enumerate(build_lines) if 'python_' in line and '(' in line) | ||
return index | ||
|
||
|
||
def update_build_dependencies(*, file_path: Path, pants_target_name: str) -> None: | ||
build_file: Path = file_path.parent / "BUILD" | ||
lines = build_file.read_text().splitlines() | ||
target_index = _find_target_index_in_build( | ||
build_lines=lines, pants_target_name=pants_target_name, file_name=file_path.name | ||
) | ||
future_line_index = next( | ||
(i for i, line in enumerate(lines[target_index:]) if '3rdparty/python:future' in line), None | ||
) | ||
if future_line_index: | ||
lines.pop(future_line_index + target_index) | ||
build_file.write_text("\n".join(lines) + "\n") | ||
|
||
|
||
if __name__ == '__main__': | ||
try: | ||
main() | ||
except KeyboardInterrupt: | ||
pass |
53 changes: 53 additions & 0 deletions
53
scripts/fsqio/python3-port-utils/pants/update_decode_encode.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import re | ||
from pathlib import Path | ||
|
||
from typing import Sequence, Set | ||
|
||
ENCODING_REGEX = r"""('utf-8'|"utf-8"|'UTF-8'|"UTF-8")""" | ||
DECODE_REGEX = rf".decode\({ENCODING_REGEX}\)" | ||
ENCODE_REGEX = rf".encode\({ENCODING_REGEX}\)" | ||
|
||
|
||
def main() -> None: | ||
folders = create_parser().parse_args().folders | ||
for fp in get_relevant_files(folders): | ||
simplify(file_path=fp, regex=DECODE_REGEX, replacement=".decode()") | ||
simplify(file_path=fp, regex=ENCODE_REGEX, replacement=r".encode()") | ||
|
||
|
||
def create_parser() -> argparse.ArgumentParser: | ||
parser = argparse.ArgumentParser( | ||
description='Remove `from builtins import x`, and possibly the BUILD entry for `future`.') | ||
parser.add_argument('folders', nargs='*') | ||
return parser | ||
|
||
|
||
def get_relevant_files(folders: Sequence[str]) -> Set[Path]: | ||
return { | ||
fp | ||
for folder in folders | ||
for fp in Path(folder).rglob("*.py") | ||
if any( | ||
re.search(ENCODE_REGEX, line) or re.search(DECODE_REGEX, line) | ||
for line in fp.read_text().splitlines() | ||
) | ||
} | ||
|
||
|
||
def simplify(*, file_path: Path, regex: str, replacement: str) -> None: | ||
lines = file_path.read_text().splitlines() | ||
indexes = [i for i, line in enumerate(lines) if re.search(regex, line)] | ||
for index in indexes: | ||
new_line = re.sub(regex, replacement, lines[index]) | ||
lines[index] = new_line | ||
file_path.write_text("\n".join(lines) + "\n") | ||
|
||
|
||
if __name__ == '__main__': | ||
try: | ||
main() | ||
except KeyboardInterrupt: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
|
||
from typing import List, Set, Sequence | ||
from glob import glob | ||
|
||
|
||
ENCODING_INDEX = 0 | ||
FUTURE_IMPORT_INDEX = 4 | ||
|
||
|
||
def main() -> None: | ||
folders = create_parser().parse_args().folders | ||
for fp in get_files(folders): | ||
with open(fp, "r") as f: | ||
lines = list(f.readlines()) | ||
if is_py2_header(lines[:FUTURE_IMPORT_INDEX + 1]): | ||
rewrite(fp, lines) | ||
|
||
|
||
def create_parser() -> argparse.ArgumentParser: | ||
parser = argparse.ArgumentParser( | ||
description='Use the new header without __future__ imports and # encoding.') | ||
parser.add_argument('folders', nargs='*') | ||
return parser | ||
|
||
|
||
def get_files(folders: Sequence[str]) -> Set[str]: | ||
return { | ||
f | ||
for folder in folders | ||
for f in glob(f"{folder}/**/*.py", recursive=True) | ||
if not f.endswith("__init__.py") | ||
} | ||
|
||
|
||
def is_py2_header(header: Sequence[str]) -> bool: | ||
return "# coding=utf-8" in header[ENCODING_INDEX] and "from __future__" in header[FUTURE_IMPORT_INDEX] | ||
|
||
|
||
def rewrite(path: str, lines: List[str]) -> None: | ||
with open(path, "w") as f: | ||
f.writelines( | ||
lines[ENCODING_INDEX + 1:FUTURE_IMPORT_INDEX] + lines[FUTURE_IMPORT_INDEX + 2:] | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
try: | ||
main() | ||
except KeyboardInterrupt: | ||
pass |