-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create a log parser for gathering statistics
- Loading branch information
Showing
4 changed files
with
5,962 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
120 changes: 120 additions & 0 deletions
120
src/lisfloodutilities/gridding/tools/read_stats_from_logs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
from dask.dataframe.io.tests.test_json import df | ||
__author__="Goncalo Gomes" | ||
__date__="$Jun 06, 2024 10:45:00$" | ||
__version__="0.1" | ||
__updated__="$Jun 06, 2024 10:45:00$" | ||
|
||
""" | ||
Copyright 2019-2020 European Union | ||
Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); | ||
You may not use this work except in compliance with the Licence. | ||
You may obtain a copy of the Licence at: | ||
https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt | ||
Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the Licence for the specific language governing permissions and limitations under the Licence. | ||
""" | ||
|
||
import sys | ||
import os | ||
from pathlib import Path | ||
from argparse import ArgumentParser, ArgumentTypeError | ||
import pandas as pd | ||
import json | ||
from lisfloodutilities.gridding.lib.utils import FileUtils | ||
|
||
|
||
def run(infolder: str, outfile: str, search_string: str): | ||
inwildcard = '*.log' | ||
|
||
out_df = None | ||
|
||
outfilepath = Path(outfile) | ||
# Create the output parent folders if not exist yet | ||
Path(outfilepath.parent).mkdir(parents=True, exist_ok=True) | ||
|
||
for filename in sorted(Path(infolder).rglob(inwildcard)): | ||
print(f'Processing file: {filename}') | ||
with open(filename, 'r') as file: | ||
lines = file.readlines() | ||
filtered_lines = [line for line in lines if line.startswith(search_string)] | ||
stats_dictionaries = [json.loads(line.strip()[len(search_string):]) for line in filtered_lines] | ||
if len(stats_dictionaries) > 0: | ||
df = pd.DataFrame(stats_dictionaries) | ||
if out_df is None: | ||
out_df = df | ||
else: | ||
out_df = pd.concat(out_df, df) | ||
if out_df is None or out_df.empty: | ||
print('WARNING: No lines containing statistics where found.') | ||
else: | ||
out_df.to_csv(outfilepath, index=False, header=True, sep='\t') | ||
print(f'Wrote file: {outfilepath}') | ||
print(out_df) | ||
|
||
|
||
def main(argv): | ||
'''Command line options.''' | ||
global quiet_mode | ||
|
||
program_name = os.path.basename(sys.argv[0]) | ||
program_path = os.path.dirname(os.path.realpath(sys.argv[0])) | ||
program_version = "v%s" % __version__ | ||
program_build_date = "%s" % __updated__ | ||
|
||
program_version_string = 'version %s (%s)\n' % (program_version, program_build_date) | ||
program_longdesc = ''' | ||
This script parses a list of log files containing statistics in the form of dictionary and converts the logged statistics into one tab separated file. | ||
''' | ||
program_license = """ | ||
Copyright 2019-2020 European Union | ||
Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); | ||
You may not use this work except in compliance with the Licence. | ||
You may obtain a copy of the Licence at: | ||
https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt | ||
Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the Licence for the specific language governing permissions and limitations under the Licence. | ||
""" | ||
|
||
try: | ||
# setup option parser | ||
parser = ArgumentParser(epilog=program_license, description=program_version_string+program_longdesc) | ||
|
||
# set defaults | ||
parser.set_defaults(search_string='#APP_STATS: ') | ||
|
||
parser.add_argument("-i", "--in", dest="infolder", required=True, type=FileUtils.folder_type, | ||
help="Set input folder path with log files (*.log)", | ||
metavar="/input/folder/logfiles/") | ||
parser.add_argument("-o", "--out", dest="outfile", required=True, type=FileUtils.file_type, | ||
help="Set output file name (*.tsv).", | ||
metavar="/path/to/output_file.tsv") | ||
parser.add_argument("-s", "--search", dest="search_string", required=False, type=str, | ||
help=('Set line tag that identifies the statistics dictionary. ' | ||
'It will be used to parse the line, so the space at the end is necessary.'), | ||
metavar='"#APP_STATS: "') | ||
|
||
# process options | ||
args = parser.parse_args(argv) | ||
|
||
print(f"Input Folder: {args.infolder}") | ||
print(f"Output Filer: {args.outfile}") | ||
print(f'Search String: [{args.search_string}]') | ||
|
||
run(args.infolder, args.outfile, args.search_string) | ||
print("Finished.") | ||
except Exception as e: | ||
indent = len(program_name) * " " | ||
sys.stderr.write(program_name + ": " + repr(e) + "\n") | ||
sys.stderr.write(indent + " for help use --help") | ||
return 2 | ||
|
||
|
||
def main_script(): | ||
sys.exit(main(sys.argv[1:])) | ||
|
||
|
||
if __name__ == "__main__": | ||
main_script() |
Binary file not shown.
Oops, something went wrong.