-
Notifications
You must be signed in to change notification settings - Fork 0
/
combine.py
240 lines (181 loc) · 8.41 KB
/
combine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# // ---------------------------------------------------------------------
# // ------- [Noir] Combiner Tool
# // ---------------------------------------------------------------------
"""
A tool for combining all files in a directory into one.
Repo: https://github.com/cuhHub/Noir
Copyright (C) 2024 Cuh4
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# ---- // Imports
from pathlib import Path
import click
import json
# ---- // Classes
class Combiner():
"""
A class used to combine all files in a directory into one.
"""
def __init__(self, directory: Path, destination: Path, whitelisted_extensions: list[str], blacklisted_extensions: list[str], ignored: list[Path]):
"""
Initialize the class.
Args:
directory (Path): The directory containing files to combine.
destination (Path): The file which should have the content of all files combined.
whitelisted_extensions (list[str]): The file extensions to allow. Leave empty to allow all extensions.
blacklisted_extensions (list[str]): The file extensions to ignore. Leave empty to ignore no extensions.
ignored (list[Path]): The paths (inc. files) to ignore when combining.
Raises:
ValueError: If both whitelisted_extensions and blacklisted_extensions are used at the same time.
ValueError: If the directory does not exist.
"""
if len(whitelisted_extensions) > 0 and len(blacklisted_extensions) > 0:
raise ValueError("Cannot use both whitelisted_extensions and blacklisted_extensions at the same time.")
if not directory.exists():
raise ValueError("Directory does not exist.")
self.directory = directory
self.destination = destination
self.whitelisted_extensions = whitelisted_extensions
self.blacklisted_extensions = blacklisted_extensions
self.ignored = ignored
self.ignored.extend([Path(destination)])
def combine(self, prevent_write: bool = False, *, _directory: Path|None = None) -> tuple[str, dict[Path, str]]:
"""
Combine all files in the directory into one.
Args:
prevent_write (bool, optional): Whether or not to prevent writing the combined file. Defaults to False.
_directory (Path, optional): The directory to combine. Used internally. Defaults to None.
Returns:
str: The combined content of all files, joined together by two newlines.
dict[Path, str]: The contents of all combined files.
Raises:
ValueError: If an existing `__order.json` file is invalid.
"""
# Validation
if _directory is None:
_directory = self.directory
# For later
contents: dict[Path, str] = {}
# Read __order.json if it exists
order = self._read_order(_directory)
if order is not None:
orderedFiles: list[str]|None = order.get("order")
if orderedFiles is None:
raise ValueError(f"Invalid `__order.json` file @ {path}. Missing `order` list.")
paths = [_directory / file for file in orderedFiles]
else:
paths = [*_directory.iterdir()]
# Read files
for path in paths:
if path.is_file():
# Check if the file is allowed
if not self.is_file_allowed(path):
continue
# Read and save
try:
contents[path] = path.read_text("utf-8")
except:
continue
else:
# Check if the directory is allowed
if not self._is_directory_allowed(path):
continue
# Iterate through files
_, results = self.combine(prevent_write = prevent_write, _directory = path)
contents.update(results)
# Write
result = "\n\n".join(contents.values())
if not prevent_write:
self.destination.parents[0].mkdir(exist_ok = True)
self.destination.write_text(result, encoding = "utf-8")
# Return
return result, contents
def _read_order(self, directory: Path) -> dict|None:
"""
Read an __order.json file.
Args:
directory (Path): The directory containing the file.
Returns:
dict|None: The __order.json contents as a dictionary, or None if it does not exist.
"""
order_definition = directory / "__order.json"
if not order_definition.exists():
return None
try:
return json.loads(order_definition.read_text("utf-8"))
except json.JSONDecodeError:
raise ValueError(f"Invalid `__order.json` file @ {order_definition}.")
def _is_directory_allowed(self, path: Path) -> bool:
"""
Check if a directory is allowed to be parsed.
Args:
path (Path): The path to check.
Returns:
bool: Whether or not the directory is allowed to be parsed.
"""
if self.in_paths(path, self.ignored):
return False
return True
def is_file_allowed(self, path: Path) -> bool:
"""
Check if a file is allowed to be parsed.
Args:
path (Path): The path to check.
Returns:
bool: Whether or not the file is allowed to be parsed.
"""
if len(self.whitelisted_extensions) > 0 and not path.suffix in self.whitelisted_extensions:
return False
if len(self.blacklisted_extensions) > 0 and path.suffix in self.blacklisted_extensions:
return False
if self.in_paths(path, self.ignored):
return False
return True
def in_paths(self, path: Path, paths: list[Path]) -> bool:
"""
Check if a path is in a list of paths.
Args:
path (Path): The path to check.
paths (list[Path]): A list of paths to check against.
Returns:
bool: Whether or not the path is in the list of paths.
"""
for current_path in paths:
if path.absolute() == current_path.absolute():
return True
return False
# -----------------------------------------
# // ---- Main
# -----------------------------------------
@click.command()
@click.option("--directory", "-d", "-p", "--path", type = str, required = True, help = "The directory containing files to combine.")
@click.option("--destination", "-de", type = str, required = True, help = "The file which should have the content of all files combined. Created automatically if it doesn't exist.")
@click.option("--allow_file_extension", "-afe", default = [], multiple = True, help = "The file extensions to allow.")
@click.option("--ignore_path", "-ip", default = [], multiple = True, help = "The paths to ignore when combining.")
def combiner_tool(directory: str, destination: str, allow_file_extension: list[str], ignore_path: list[str]):
# Combine files
ignored = [Path(path) for path in ignore_path]
ignored.extend([Path(__file__)])
combiner = Combiner(
directory = Path(directory),
destination = Path(destination),
whitelisted_extensions = allow_file_extension,
blacklisted_extensions = [],
ignored = ignored
)
_, contents = combiner.combine()
# Output
click.secho("[Done] Combined the following files:", fg = "green", underline = True, bold = True)
click.echo(
click.style("- " + "\n- ".join([str(path) for path in contents.keys()]), fg = "yellow")
)
if __name__ == "__main__":
combiner_tool()