-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_external_imports.py
286 lines (231 loc) · 9.19 KB
/
find_external_imports.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
"""
This script identifies external dependencies in a Python project.
The script scans Python files within a specified directory or matching a glob pattern,
extracts import statements, and determines which dependencies are not part of the standard
library or locally defined within the project. It then outputs these external dependencies
either to the console or to a requirements.txt file.
Usage:
python find_external_imports.py <path> [--pattern <pattern>] [--output <output>]
Arguments:
path The directory path or glob pattern to search for Python files.
--pattern The file pattern to search for Python files. Defaults to "*.py".
--output The output method: "print" to display on the console or "requirements"
to save to requirements.txt. Defaults to "print".
"""
import os
import ast
import glob
import argparse
import importlib.util
import sys
import pkgutil
from typing import Set, Tuple, Optional
def extract_imports_from_file(file_path: str) -> Set[str]:
"""
Extracts import statements from a Python file.
Args:
file_path (str): The path to the Python file.
Returns:
Set[str]: A set of import statements found in the file.
"""
with open(file_path, "r", encoding="utf-8") as file:
tree = ast.parse(file.read(), filename=file_path)
imports = set()
for node in tree.body:
if isinstance(node, ast.Import):
for alias in node.names:
imports.add(alias.name)
elif isinstance(node, ast.ImportFrom):
if node.module:
imports.add(node.module)
return imports
def extract_defined_entities_from_file(file_path: str) -> Set[str]:
"""
Extracts defined classes and functions from a Python file.
Args:
file_path (str): The path to the Python file.
Returns:
Set[str]: A set of class and function names defined in the file.
"""
with open(file_path, "r", encoding="utf-8") as file:
tree = ast.parse(file.read(), filename=file_path)
defined_entities = set()
for node in tree.body:
if isinstance(node, ast.ClassDef):
defined_entities.add(node.name)
elif isinstance(node, ast.FunctionDef):
defined_entities.add(node.name)
return defined_entities
def find_external_dependencies(path: str) -> Tuple[Set[str], Set[str]]:
"""
Finds external dependencies in a given directory or file pattern.
Args:
path (str): The directory path or glob pattern.
file_pattern (str): The file pattern to search for Python files. Defaults to "*.py".
Returns:
Tuple[Set[str], Set[str]]: A tuple containing sets of external dependencies and local definitions.
"""
if os.path.isdir(path):
return find_external_dependencies_in_directory(path)
return find_external_dependencies_matching_glob(path)
def find_external_dependencies_in_directory(directory) -> Tuple[Set[str], Set[str]]:
"""
Finds external dependencies in all Python files within a directory.
Args:
directory (str): The directory path.
Returns:
Tuple[Set[str], Set[str]]: A tuple containing sets of external dependencies and local definitions.
"""
external_dependencies = set()
local_definitions = set()
for root, _, files in os.walk(directory):
for file_name in files:
if file_name.endswith(".py"):
file_path = os.path.join(root, file_name)
imports = extract_imports_from_file(file_path)
definitions = extract_defined_entities_from_file(file_path)
external_dependencies.update(imports)
local_definitions.update(definitions)
return external_dependencies, local_definitions
def find_external_dependencies_matching_glob(pattern: str) -> Tuple[Set[str], Set[str]]:
"""
Finds external dependencies in Python files matching a glob pattern.
Args:
pattern (str): The glob pattern to search for Python files.
Returns:
Tuple[Set[str], Set[str]]: A tuple containing sets of external dependencies and local definitions.
"""
external_dependencies = set()
local_definitions = set()
files = glob.glob(pattern)
for file_path in files:
imports = extract_imports_from_file(file_path)
definitions = extract_defined_entities_from_file(file_path)
external_dependencies.update(imports)
local_definitions.update(definitions)
return external_dependencies, local_definitions
def resolve_import_name(import_name: str) -> Optional[str]:
"""
Resolves an import name to its actual module name.
Args:
import_name (str): The import name to resolve.
Returns:
Optional[str]: The resolved module name, or the original name if resolution fails.
"""
try:
module = importlib.import_module(import_name)
return module.__name__
except ImportError:
return import_name # Return the original name if the module is not found
def get_std_lib_modules() -> Set[str]:
"""
Retrieves the names of all standard library modules.
Returns:
Set[str]: A set of standard library module names.
"""
std_lib_modules = set(sys.builtin_module_names)
std_lib_modules.update(
module.name for module in pkgutil.iter_modules() if module.module_finder is None
)
# Ensure all submodules are captured
std_lib_modules.update({name for _, name, _ in pkgutil.iter_modules()})
return std_lib_modules
def is_local_module(
module_name: str, project_root: str, local_definitions: Set[str]
) -> bool:
"""
Determines if a module is a local module within the project.
Args:
module_name (str): The name of the module to check.
project_root (str): The root directory of the project.
local_definitions (Set[str]): A set of local definitions (classes and functions).
Returns:
bool: True if the module is local, False otherwise.
"""
if module_name in local_definitions:
return True
module_path = module_name.replace(".", os.sep)
module_file = module_path + ".py"
module_dir = os.path.join(module_path, "__init__.py")
for root, _, files in os.walk(project_root):
rel_files = [
os.path.relpath(os.path.join(root, file), start=project_root)
for file in files
]
if module_file in rel_files or module_dir in rel_files:
return True
return False
def save_to_requirements_file(
external_dependencies: Set[str], file_path: str = "requirements.txt"
) -> None:
"""
Saves the external dependencies to a requirements.txt file.
Args:
external_dependencies (Set[str]): The set of external dependencies.
file_path (str): The path to the requirements file. Defaults to "requirements.txt".
"""
with open(file_path, "w", encoding="utf-8") as file:
for module_name in external_dependencies:
file.write(module_name + "\n")
def print_dependencies(external_dependencies: Set[str]) -> None:
"""
Prints the external dependencies.
Args:
external_dependencies (Set[str]): The set of external dependencies.
"""
if external_dependencies:
print("External dependencies:")
for module_name in external_dependencies:
print(f"- {module_name}")
else:
print("No external dependencies found.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Find external dependencies in files within a directory."
)
parser.add_argument(
"path", metavar="path", type=str, help="Directory path or glob pattern"
)
parser.add_argument(
"--pattern",
metavar="pattern",
type=str,
default="*.py",
help='File pattern (default is "*.py")',
)
parser.add_argument(
"--output",
metavar="output",
type=str,
default="print",
choices=["print", "requirements"],
help="Output method (print or requirements.txt)",
)
args = parser.parse_args()
path = args.path
pattern = args.pattern
output = args.output
external_dependencies, local_definitions = find_external_dependencies(path)
# Use map to resolve import names to actual modules
resolved_dependencies = set(map(resolve_import_name, external_dependencies))
# Remove None values resulting from failed import attempts
resolved_dependencies = {
module_name for module_name in resolved_dependencies if module_name
}
std_lib_modules = get_std_lib_modules()
# Determine non-standard library dependencies
non_std_lib_dependencies = resolved_dependencies - std_lib_modules
external_dependencies = {
module_name
for module_name in non_std_lib_dependencies
if not is_local_module(module_name, path, local_definitions)
}
# Filter out only external packages installed via pip
if output == "print":
print_dependencies(external_dependencies)
elif output == "requirements":
if external_dependencies:
save_to_requirements_file(external_dependencies)
print("External dependencies saved to requirements.txt.")
else:
print("No external dependencies found.")