Skip to content

Commit

Permalink
Update package assembly to consider other manifests
Browse files Browse the repository at this point in the history
Previously we used to yeild resources for the entire subtree after
we find a package manifest, and ingore and skip creating package
and dependencies from other manifests present there. The package
assembly process is updated to only yeild and ignore package manifest
resources used already to create package/dependencies.

Reference: #3604
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jul 29, 2024
1 parent dd675aa commit eb5903b
Show file tree
Hide file tree
Showing 15 changed files with 872 additions and 381 deletions.
11 changes: 7 additions & 4 deletions src/packagedcode/about.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,11 @@ def parse(cls, location, package_only=False):
declared_license_expression = package_data.get('license_expression')

owner = package_data.get('owner')
if not isinstance(owner, str):
owner = repr(owner)
parties = [models.Party(type=models.party_person, name=owner, role='owner')]
parties = []
if owner:
if not isinstance(owner, str):
owner = repr(owner)
parties.append(models.Party(type=models.party_person, name=owner, role='owner'))

# FIXME: also include notice_file and license_file(s) as file_references
file_references = []
Expand Down Expand Up @@ -157,7 +159,8 @@ def assemble(cls, package_data, resource, codebase, package_adder):
missing = sorted(file_references_by_path.values(), key=lambda r: r.path)
package.extra_data['missing_file_references'] = missing
else:
package.extra_data['missing_file_references'] = package_data.file_references[:]
if package.file_references:
package.extra_data['missing_file_references'] = package_data.file_references[:]

# we yield this as we do not want this further processed
yield resource
9 changes: 7 additions & 2 deletions src/packagedcode/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,18 @@ def logger_debug(*args):


class CargoBaseHandler(models.DatafileHandler):

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Cargo.toml and possible Cargo.lock datafiles. Also
support cargo workspaces where we have multiple packages from
a repository and some shared information present at top-level.
"""
datafile_name_patterns = (
CargoLockHandler.path_patterns + CargoTomlHandler.path_patterns
)

workspace = package_data.extra_data.get('workspace', {})
workspace_members = workspace.get("members", [])
workspace_package_data = workspace.get("package", {})
Expand Down Expand Up @@ -89,14 +94,14 @@ def assemble(cls, package_data, resource, codebase, package_adder):
resource.save(codebase)

yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
datafile_name_patterns=datafile_name_patterns,
directory=workspace_directory,
codebase=codebase,
package_adder=package_adder,
)
else:
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
datafile_name_patterns=datafile_name_patterns,
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
Expand Down
5 changes: 4 additions & 1 deletion src/packagedcode/chef.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,11 @@ def assemble(cls, package_data, resource, codebase, package_adder):
"""
Assemble Package from Chef metadata.rb, then from metadata.json files.
"""
datafile_name_patterns = (
ChefMetadataRbHandler.path_patterns + ChefMetadataJsonHandler.path_patterns
)
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('metadata.rb', 'metadata.json',),
datafile_name_patterns=datafile_name_patterns,
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
Expand Down
7 changes: 4 additions & 3 deletions src/packagedcode/cocoapods.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import logging
import sys
from functools import partial
from fnmatch import fnmatchcase

import saneyaml
from packageurl import PackageURL
Expand Down Expand Up @@ -142,18 +143,18 @@ def assemble(cls, package_data, resource, codebase, package_adder):
else:
# do we have more than one podspec?
parent = resource.parent(codebase)
podspec_path_pattern = PodspecHandler.path_patterns[0]
sibling_podspecs = [
r for r in parent.children(codebase)
if r.name.endswith('.podspec')
if fnmatchcase(r.name, podspec_path_pattern)
]

siblings_counts = len(sibling_podspecs)
has_single_podspec = siblings_counts == 1
has_multiple_podspec = siblings_counts > 1

datafile_name_patterns = (
'Podfile.lock',
'Podfile',
PodfileHandler.path_patterns + PodfileLockHandler.path_patterns
)

if has_single_podspec:
Expand Down
5 changes: 4 additions & 1 deletion src/packagedcode/golang.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,11 @@ def assemble(cls, package_data, resource, codebase, package_adder):
"""
Always use go.mod first then go.sum
"""
datafile_name_patterns = (
GoModHandler.path_patterns + GoSumHandler.path_patterns
)
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=('go.mod', 'go.sum',),
datafile_name_patterns=datafile_name_patterns,
directory=resource.parent(codebase),
codebase=codebase,
package_adder=package_adder,
Expand Down
2 changes: 1 addition & 1 deletion src/packagedcode/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def assemble(cls, package_data, resource, codebase, package_adder=models.add_to_

# This order is important as we want pom.xml to be used for package
# creation and then to update from MANIFEST later
manifest_path_pattern = '*/META-INF/MANIFEST.MF'
manifest_path_pattern = JavaJarManifestHandler.path_patterns[0]
nested_pom_xml_path_pattern = '*/META-INF/maven/**/pom.xml'
datafile_name_patterns = (nested_pom_xml_path_pattern, manifest_path_pattern)

Expand Down
14 changes: 9 additions & 5 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,7 @@ def assemble_from_many(
else:
# FIXME: What is the package_data is NOT for the same package as package?
# FIXME: What if the update did not do anything? (it does return True or False)
# FIXME: There we would be missing out packges AND/OR errors
# FIXME: There we would be missing out packages AND/OR errors
package.update(
package_data=package_data,
datafile_path=resource.path,
Expand Down Expand Up @@ -1309,7 +1309,7 @@ def assemble_from_many(
yield package
yield from dependencies

# Associate Package to Resources and yield them
# Associate Package to the manifest resources and yield them
for resource in resources:
package_adder(package_uid, resource, codebase)
yield resource
Expand All @@ -1318,11 +1318,12 @@ def assemble_from_many(
package_adder(package_uid, resource, codebase)
yield resource

# the whole parent subtree of the base_resource is for this package
# the whole parent subtree of the base_resource is for this package,
# so assign resources to package
if package_uid:
for res in base_resource.walk(codebase):
package_adder(package_uid, res, codebase)
yield res

if parent_resource:
package_adder(package_uid, parent_resource, codebase)
yield parent_resource
Expand Down Expand Up @@ -1368,7 +1369,10 @@ def assemble_from_many_datafiles(
# we iterate on datafile_name_patterns because their order matters
for datafile_name_pattern in datafile_name_patterns:
for sibling in siblings:
if fnmatchcase(sibling.name, datafile_name_pattern):
if (
fnmatchcase(sibling.name, datafile_name_pattern) or
fnmatchcase(sibling.location, datafile_name_pattern)
):
for package_data in sibling.package_data:
package_data = PackageData.from_dict(package_data)
pkgdata_resources.append((package_data, sibling,))
Expand Down
5 changes: 2 additions & 3 deletions src/packagedcode/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
for npm_res in cls.walk_npm(resource=workspace_root, codebase=codebase):
if package_uid and package_uid not in npm_res.for_packages:
package_adder(package_uid, npm_res, codebase)
yield npm_res

yield package_resource

elif workspaces:
Expand Down Expand Up @@ -190,7 +190,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
for npm_res in cls.walk_npm(resource=workspace_root, codebase=codebase):
if package_uid and not npm_res.for_packages:
package_adder(package_uid, npm_res, codebase)
yield npm_res

yield package_resource

else:
Expand Down Expand Up @@ -257,7 +257,6 @@ def create_packages_from_workspaces(
for npm_res in cls.walk_npm(resource=member_root, codebase=codebase):
if package_uid and package_uid not in npm_res.for_packages:
package_adder(package_uid, npm_res, codebase)
yield npm_res

yield from cls.yield_npm_dependencies_and_resources(
package_resource=workspace_member,
Expand Down
6 changes: 2 additions & 4 deletions src/packagedcode/phpcomposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@ class BasePhpComposerHandler(models.DatafileHandler):

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
datafile_name_patterns = (
'composer.json',
'composer.lock',
)
datafile_name_patterns = \
PhpComposerJsonHandler.path_patterns + PhpComposerLockHandler.path_patterns

if resource.has_parent():
dir_resource = resource.parent(codebase)
Expand Down
10 changes: 5 additions & 5 deletions src/packagedcode/plugin_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,15 +376,15 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
packages = []
dependencies = []

seen_resource_paths = set()
seen_package_manifest_paths = set()

has_single_resource = codebase.has_single_resource
# track resource ids that have been already processed
for resource in codebase.walk(topdown=False):
if not resource.package_data:
continue

if resource.path in seen_resource_paths:
if resource.path in seen_package_manifest_paths:
continue

if TRACE_ASSEMBLY:
Expand Down Expand Up @@ -430,12 +430,12 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
dependencies.append(item)

elif isinstance(item, Resource):
seen_resource_paths.add(item.path)
seen_package_manifest_paths.add(item.path)

if TRACE_ASSEMBLY:
logger_debug(
' get_package_and_deps: seen_resource_path:',
seen_resource_paths,
' get_package_and_deps: seen_package_manifest_paths:',
seen_package_manifest_paths,
)

else:
Expand Down
14 changes: 5 additions & 9 deletions src/packagedcode/rubygems.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,9 @@ def assemble_extracted_gem(cls, package_data, resource, codebase, package_adder)
An assemble implementation shared by handlers for manifests found in an
extracted gem using extractcode.
"""
datafile_name_patterns = (
'metadata.gz-extract/metadata.gz-extract',
'data.gz-extract/*.gemspec',
'data.gz-extract/Gemfile',
'data.gz-extract/Gemfile.lock',
datafile_name_patterns = ('metadata.gz-extract/metadata.gz-extract',) + (
GemspecHandler.path_patterns + GemfileHandler.path_patterns
+ GemfileLockHandler.path_patterns
)

gemroot = get_ancestor(levels_up=2, resource=resource, codebase=codebase)
Expand Down Expand Up @@ -105,11 +103,9 @@ class BaseGemProjectHandler(models.DatafileHandler):
@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
datafile_name_patterns = (
'*.gemspec',
'Gemfile',
'Gemfile.lock',
GemspecHandler.path_patterns + GemfileHandler.path_patterns
+ GemfileLockHandler.path_patterns
)

yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=datafile_name_patterns,
directory=resource.parent(codebase),
Expand Down
Loading

0 comments on commit eb5903b

Please sign in to comment.