Skip to content

Commit

Permalink
Added support for multiple, read-only, caching directories.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmfernandez committed Mar 3, 2025
1 parent 83b161b commit 20918d6
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 25 deletions.
23 changes: 21 additions & 2 deletions cached-translated-groovy3-parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,16 @@ def analyze_nf_source(
jsonfile: "str",
resultfile: "str",
cache_directory: "Optional[str]" = None,
ro_cache_directories: "Sequence[str]" = [],
) -> "Union[RuleNode, LeafNode, EmptyNode]":
with open(filename, mode="r", encoding="utf-8") as wfH:
content = wfH.read()

t_tree = parse_and_digest_groovy_content(content, cache_directory=cache_directory)
t_tree = parse_and_digest_groovy_content(
content,
cache_directory=cache_directory,
ro_cache_directories=ro_cache_directories,
)

# These are for debugging purposes
# logging.debug(tree.pretty())
Expand Down Expand Up @@ -409,6 +414,16 @@ def analyze_nf_source(
print(
"[WARNING] No caching is done. If you want to cache parsed content declare variable GROOVY_CACHEDIR"
)

ro_cache_directories = []
cache_directory_ro = os.environ.get("GROOVY_CACHEDIRS_RO")
if cache_directory_ro is not None:
print(f"* Using as read-only caching directories {cache_directory_ro}")
ro_cache_directories = cache_directory_ro.split(":")
else:
print(
"[WARNING] No read-only caching is used. If you want to use cached parsed contents declare variable GROOVY_CACHEDIRS_RO, separating more than one path by colons"
)
for filename in sys.argv[1:]:
print(f"* Parsing {filename}")
logfile = filename + ".lark"
Expand All @@ -420,7 +435,11 @@ def analyze_nf_source(
log.addHandler(fH) # set the new handler
try:
analyze_nf_source(
filename, jsonfile, resultfile, cache_directory=cache_directory
filename,
jsonfile,
resultfile,
cache_directory=cache_directory,
ro_cache_directories=ro_cache_directories,
)
except Exception as e:
print(f"\tParse failed, see {logfile}")
Expand Down
6 changes: 3 additions & 3 deletions groovy_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

# SPDX-License-Identifier: Apache-2.0
# Copyright (C) 2024 Barcelona Supercomputing Center, José M. Fernández
# Copyright (C) 2025 Barcelona Supercomputing Center, José M. Fernández
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -17,8 +17,8 @@
# limitations under the License.

__author__ = "José M. Fernández <https://orcid.org/0000-0002-4806-5140>"
__copyright__ = 2024 Barcelona Supercomputing Center (BSC), ES"
__copyright__ = 2025 Barcelona Supercomputing Center (BSC), ES"
__license__ = "Apache-2.0"

# https://www.python.org/dev/peps/pep-0396/
__version__ = "0.1.2"
__version__ = "0.2.0"
76 changes: 56 additions & 20 deletions groovy_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

# SPDX-License-Identifier: Apache-2.0
# Copyright (C) 2024 Barcelona Supercomputing Center, José M. Fernández
# Copyright (C) 2025 Barcelona Supercomputing Center, José M. Fernández
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,7 @@
import json
import os
import os.path
import pathlib
from typing import (
cast,
TYPE_CHECKING,
Expand Down Expand Up @@ -157,6 +158,7 @@ def create_groovy_parser() -> "Lark":
# parser='lalr',
# debug=True,
start="compilation_unit",
# ambiguity='explicit',
# lexer_callbacks={
# 'square_bracket_block': jarlmethod
# }
Expand Down Expand Up @@ -217,13 +219,21 @@ def digest_lark_tree(

def parse_and_digest_groovy_content(
content: "str",
cache_directory: "Optional[str]" = None,
ro_cache_directories: "Optional[Sequence[Union[str, os.PathLike[str]]]]" = None,
cache_directory: "Optional[Union[str, os.PathLike[str]]]" = None,
prune: "Sequence[str]" = ["sep", "nls"],
noflat: "Sequence[str]" = ["script_statement"],
) -> "Union[RuleNode, LeafNode, EmptyNode]":
t_tree: "Optional[Union[RuleNode, LeafNode, EmptyNode]]" = None
hashfile: "Optional[str]" = None
if cache_directory is not None and os.path.isdir(cache_directory):
hashpath: "Optional[pathlib.Path]" = None
cache_path: "Optional[pathlib.Path]" = None
if cache_directory is not None:
if isinstance(cache_directory, pathlib.Path):
cache_path = cache_directory
else:
cache_path = pathlib.Path(cache_directory)

if cache_path is not None and cache_path.is_dir():
h = hashlib.sha256()
buff = bytearray(BLOCK_SIZE)

Expand All @@ -246,31 +256,57 @@ def parse_and_digest_groovy_content(
# Now we can obtain the relative directory, unique to this
# version of the software and its dependencies
hreldir = h.copy().hexdigest()
this_cache_directory = os.path.join(cache_directory, hreldir)
os.makedirs(this_cache_directory, exist_ok=True)

# Now, let's go for the content signature
h.update(content.encode("utf-8"))
hashfile = os.path.join(this_cache_directory, h.hexdigest() + ".json.gz")
ro_cache_paths: "MutableSequence[pathlib.Path]" = []
if ro_cache_directories is not None:
for ro_cache_directory in ro_cache_directories:
if isinstance(ro_cache_directory, pathlib.Path):
ro_cache_path = ro_cache_directory
else:
ro_cache_path = pathlib.Path(ro_cache_directory)

# Include only existing cache paths
this_ro_cache_path = ro_cache_path / hreldir
if this_ro_cache_path.is_dir():
ro_cache_paths.append(this_ro_cache_path)

if os.path.isfile(hashfile):
try:
with gzip.open(hashfile, mode="rt", encoding="utf-8") as jH:
t_tree = json.load(jH)
except:
# If it is unreadable, re-create
pass
this_cache_path = cache_path / hreldir
this_cache_path.mkdir(parents=True, exist_ok=True)

if t_tree is None:
ro_cache_paths.append(this_cache_path)

# Now, let's go for the content signature
h.update(content.encode("utf-8"))
rel_hashpath = h.hexdigest() + ".json.gz"

# This is needed in case nothing was available
hashpath = this_cache_path / rel_hashpath
for ro_cache_path in ro_cache_paths:
ro_hashpath = ro_cache_path / rel_hashpath
if ro_hashpath.is_file():
try:
with gzip.open(
ro_hashpath.as_posix(), mode="rt", encoding="utf-8"
) as jH:
t_tree = json.load(jH)
hashpath = None
break
except:
# If it is unreadable, re-create
pass

if t_tree is None and (hashpath is not None or cache_path is None):
tree = parse_groovy_content(content)
t_tree = LarkFilteringTreeEncoder().default(
tree,
prune=prune,
noflat=noflat,
)

if hashfile is not None:
with gzip.open(hashfile, mode="wt", encoding="utf-8") as jH:
json.dump(t_tree, jH, sort_keys=True)
assert t_tree is not None

if hashpath is not None:
with gzip.open(hashpath.as_posix(), mode="wt", encoding="utf-8") as jH:
json.dump(t_tree, jH, sort_keys=True)

return t_tree

0 comments on commit 20918d6

Please sign in to comment.