Skip to content

Commit

Permalink
Add scripts to modernize to Python 3
Browse files Browse the repository at this point in the history
These scripts were developed to modernize https://github.com/pantsbuild/pants as it drops Python 2 and can start to use Python 3-only features.

   - remove_builtins.py unravels part of the future library by removing all from builtins imports, which no-op in Py3. Its main innovations are the CLI interface and removing BUILD entries if possible.
   - update_headers.py removes from __future__ imports and # coding=utf-8 lines, which both no-op on Python 3.
   - update_decode_encode.py uses the default utf-8 encoding to simplify string calls to encode() and decode().
   - modernize_classes.py simplifies calls to super() and removes the unnecessary object base class, as all classes are new style in Python 3.

(sapling split of f017c31bc73d2c8345333cc7062de2b3c196a9cc)
  • Loading branch information
Eric-Arellano authored and OniOni committed Aug 21, 2019
1 parent 735f8f3 commit 332e7a2
Show file tree
Hide file tree
Showing 4 changed files with 261 additions and 0 deletions.
52 changes: 52 additions & 0 deletions scripts/fsqio/python3-port-utils/pants/modernize_classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env python3

import argparse
import re
from pathlib import Path

from typing import Sequence, Set

SUPER_REGEX = r"super\([a-zA-Z]+, [a-z]+\)"
OBJECT_REGEX = r"class (?P<className>[a-zA-Z]*)\(object\):"


def main() -> None:
folders = create_parser().parse_args().folders
for fp in get_relevant_files(folders):
simplify(file_path=fp, regex=SUPER_REGEX, replacement="super()")
simplify(file_path=fp, regex=OBJECT_REGEX, replacement=r"class \g<className>:")


def create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description='Remove `from builtins import x`, and possibly the BUILD entry for `future`.')
parser.add_argument('folders', nargs='*')
return parser


def get_relevant_files(folders: Sequence[str]) -> Set[Path]:
return {
fp
for folder in folders
for fp in Path(folder).rglob("*.py")
if any(
re.search(SUPER_REGEX, line) or re.search(OBJECT_REGEX, line)
for line in fp.read_text().splitlines()
)
}


def simplify(*, file_path: Path, regex: str, replacement: str) -> None:
lines = file_path.read_text().splitlines()
indexes = [i for i, line in enumerate(lines) if re.search(regex, line)]
for index in indexes:
new_line = re.sub(regex, replacement, lines[index])
lines[index] = new_line
file_path.write_text("\n".join(lines) + "\n")


if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
pass
103 changes: 103 additions & 0 deletions scripts/fsqio/python3-port-utils/pants/remove_builtins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python3

import argparse
import subprocess
from pathlib import Path
from textwrap import dedent

from typing import List, Sequence, Set


def main() -> None:
folders = create_parser().parse_args().folders
for fp in get_files_with_import(folders):
remove_builtins(file_path=fp)
if safe_to_remove_future_from_build(file_path=fp):
target_name = determine_pants_target_name(file_path=fp)
update_build_dependencies(file_path=fp, pants_target_name=target_name)


def create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description='Remove `from builtins import x`, and possibly the BUILD entry for `future`.')
parser.add_argument('folders', nargs='*')
return parser


def get_files_with_import(folders: Sequence[str]) -> Set[Path]:
return {
fp
for folder in folders
for fp in Path(folder).rglob("*.py")
if not fp.name.endswith("__init__.py")
and "from builtins import" in fp.read_text()
}


def determine_pants_target_name(file_path: Path) -> str:
file_map = subprocess.run([
'./pants',
'filemap',
f'{file_path.parent}:'
], stdout=subprocess.PIPE, encoding="utf-8").stdout.strip().split('\n')
target_entry = next((line for line in file_map if file_path.name in line), None)
if target_entry is None:
raise SystemExit(dedent(f"""\n
ERROR: File '{file_path}' invalid. Not found anywhere in {file_path.parent}/BUILD."""))
pants_target_path = target_entry.split(' ')[1]
pants_target_name = pants_target_path.split(':')[1]
return pants_target_name


def remove_builtins(*, file_path: Path) -> None:
lines = file_path.read_text().splitlines()
builtins_line_index = next(
(i for i, line in enumerate(lines) if "from builtins" in line), None
)
if builtins_line_index:
lines.pop(builtins_line_index)
file_path.write_text("\n".join(lines) + "\n")


def safe_to_remove_future_from_build(*, file_path: Path) -> bool:
lines = file_path.read_text().splitlines()
return all(
"from future.utils" not in line and
"from future.moves" not in line
for line in lines
)


def _find_target_index_in_build(
*, build_lines: List[str], pants_target_name: str, file_name: str
) -> int:
index = next((i for i, line in enumerate(build_lines)
if f"name = '{pants_target_name}'" in line
or f"name='{pants_target_name}'" in line),
None)
if index is None: # mono-target
index = next((i for i, line in enumerate(build_lines) if file_name in line), None)
if index is None: # only one target block in file, and sources aren't specified
index = next(i for i, line in enumerate(build_lines) if 'python_' in line and '(' in line)
return index


def update_build_dependencies(*, file_path: Path, pants_target_name: str) -> None:
build_file: Path = file_path.parent / "BUILD"
lines = build_file.read_text().splitlines()
target_index = _find_target_index_in_build(
build_lines=lines, pants_target_name=pants_target_name, file_name=file_path.name
)
future_line_index = next(
(i for i, line in enumerate(lines[target_index:]) if '3rdparty/python:future' in line), None
)
if future_line_index:
lines.pop(future_line_index + target_index)
build_file.write_text("\n".join(lines) + "\n")


if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
pass
53 changes: 53 additions & 0 deletions scripts/fsqio/python3-port-utils/pants/update_decode_encode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python3

import argparse
import re
from pathlib import Path

from typing import Sequence, Set

ENCODING_REGEX = r"""('utf-8'|"utf-8"|'UTF-8'|"UTF-8")"""
DECODE_REGEX = rf".decode\({ENCODING_REGEX}\)"
ENCODE_REGEX = rf".encode\({ENCODING_REGEX}\)"


def main() -> None:
folders = create_parser().parse_args().folders
for fp in get_relevant_files(folders):
simplify(file_path=fp, regex=DECODE_REGEX, replacement=".decode()")
simplify(file_path=fp, regex=ENCODE_REGEX, replacement=r".encode()")


def create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description='Remove `from builtins import x`, and possibly the BUILD entry for `future`.')
parser.add_argument('folders', nargs='*')
return parser


def get_relevant_files(folders: Sequence[str]) -> Set[Path]:
return {
fp
for folder in folders
for fp in Path(folder).rglob("*.py")
if any(
re.search(ENCODE_REGEX, line) or re.search(DECODE_REGEX, line)
for line in fp.read_text().splitlines()
)
}


def simplify(*, file_path: Path, regex: str, replacement: str) -> None:
lines = file_path.read_text().splitlines()
indexes = [i for i, line in enumerate(lines) if re.search(regex, line)]
for index in indexes:
new_line = re.sub(regex, replacement, lines[index])
lines[index] = new_line
file_path.write_text("\n".join(lines) + "\n")


if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
pass
53 changes: 53 additions & 0 deletions scripts/fsqio/python3-port-utils/pants/update_headers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python3

import argparse

from typing import List, Set, Sequence
from glob import glob


ENCODING_INDEX = 0
FUTURE_IMPORT_INDEX = 4


def main() -> None:
folders = create_parser().parse_args().folders
for fp in get_files(folders):
with open(fp, "r") as f:
lines = list(f.readlines())
if is_py2_header(lines[:FUTURE_IMPORT_INDEX + 1]):
rewrite(fp, lines)


def create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description='Use the new header without __future__ imports and # encoding.')
parser.add_argument('folders', nargs='*')
return parser


def get_files(folders: Sequence[str]) -> Set[str]:
return {
f
for folder in folders
for f in glob(f"{folder}/**/*.py", recursive=True)
if not f.endswith("__init__.py")
}


def is_py2_header(header: Sequence[str]) -> bool:
return "# coding=utf-8" in header[ENCODING_INDEX] and "from __future__" in header[FUTURE_IMPORT_INDEX]


def rewrite(path: str, lines: List[str]) -> None:
with open(path, "w") as f:
f.writelines(
lines[ENCODING_INDEX + 1:FUTURE_IMPORT_INDEX] + lines[FUTURE_IMPORT_INDEX + 2:]
)


if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
pass

0 comments on commit 332e7a2

Please sign in to comment.