Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(report): add export system and its endpoint #6746

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .env
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the ARTIFACTS_* environment variables should be prefixed with DJANGO because it is the convention we've been using for the ones used in Django.

Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,23 @@ VALKEY_HOST=valkey
VALKEY_PORT=6379
VALKEY_DB=0

# API scan settings
# The AWS access key to be used when uploading scan artifacts to an S3 bucket
# If left empty, default AWS credentials resolution behavior will be used
ARTIFACTS_AWS_ACCESS_KEY_ID=""

# The AWS secret key to be used when uploading scan artifacts to an S3 bucket
ARTIFACTS_AWS_SECRET_ACCESS_KEY=""

# An optional AWS session token
ARTIFACTS_AWS_SESSION_TOKEN=""

# The AWS region where your S3 bucket is located (e.g., "us-east-1")
ARTIFACTS_AWS_DEFAULT_REGION=""

# The name of the S3 bucket where scan artifacts should be stored
ARTIFACTS_AWS_S3_OUTPUT_BUCKET=""

# Django settings
DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1,prowler-api
DJANGO_BIND_ADDRESS=0.0.0.0
Expand Down
32 changes: 32 additions & 0 deletions api/src/backend/api/specs/v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4093,6 +4093,38 @@ paths:
schema:
$ref: '#/components/schemas/ScanUpdateResponse'
description: ''
/api/v1/scans/{id}/report:
get:
operationId: scans_report_retrieve
description: Returns a ZIP file containing the requested report
summary: Download ZIP report
parameters:
- in: query
name: fields[scans]
schema:
type: array
items:
type: string
enum: []
description: endpoint return only specific fields in the response on a per-type
basis by including a fields[TYPE] query parameter.
explode: false
- in: path
name: id
schema:
type: string
format: uuid
description: A UUID string identifying this scan.
required: true
tags:
- Scan
security:
- jwtAuth: []
responses:
'200':
description: Report obtanined successfully
'404':
description: Report not found
/api/v1/schedules/daily:
post:
operationId: schedules_daily_create
Expand Down
8 changes: 8 additions & 0 deletions api/src/backend/api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,14 @@ class Meta:
]


class ScanReportSerializer(RLSSerializer):
class Meta:
model = Scan
fields = [
"id",
]


class ResourceTagSerializer(RLSSerializer):
"""
Serializer for the ResourceTag model
Expand Down
106 changes: 106 additions & 0 deletions api/src/backend/api/v1/views.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import glob
import os

import boto3
from botocore.exceptions import ClientError, NoCredentialsError, ParamValidationError
from celery.result import AsyncResult
from config.env import env
from django.conf import settings as django_settings
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.search import SearchQuery
from django.db import transaction
from django.db.models import Count, F, OuterRef, Prefetch, Q, Subquery, Sum
from django.http import HttpResponse
from django.db.models.functions import Coalesce
from django.urls import reverse
from django.utils.decorators import method_decorator
Expand Down Expand Up @@ -114,6 +121,7 @@
RoleSerializer,
RoleUpdateSerializer,
ScanCreateSerializer,
ScanReportSerializer,
ScanSerializer,
ScanUpdateSerializer,
ScheduleDailyCreateSerializer,
Expand All @@ -126,6 +134,7 @@
UserSerializer,
UserUpdateSerializer,
)
from prowler.config.config import tmp_output_directory

CACHE_DECORATOR = cache_control(
max_age=django_settings.CACHE_MAX_AGE,
Expand Down Expand Up @@ -1073,6 +1082,8 @@ def get_serializer_class(self):
return ScanCreateSerializer
elif self.action == "partial_update":
return ScanUpdateSerializer
elif self.action == "report":
return ScanReportSerializer
return super().get_serializer_class()

def partial_update(self, request, *args, **kwargs):
Expand Down Expand Up @@ -1127,6 +1138,101 @@ def create(self, request, *args, **kwargs):
},
)

@extend_schema(
tags=["Scan"],
summary="Download ZIP report",
description="Returns a ZIP file containing the requested report",
request=ScanReportSerializer,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this required? there shouldn't be any payload in this endpoint.

responses={
200: OpenApiResponse(description="Report obtanined successfully"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
200: OpenApiResponse(description="Report obtanined successfully"),
200: OpenApiResponse(description="Report obtained successfully"),

404: OpenApiResponse(description="Report not found"),
},
Comment on lines +1147 to +1149
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be autogenerated since this is detailed action view. Please avoid this kind of documentation details unless you add them for every viewset in the module.

)
@action(detail=True, methods=["get"], url_name="report")
def report(self, request, pk=None):
s3_client = None
try:
s3_client = boto3.client("s3")
s3_client.list_buckets()
except (ClientError, NoCredentialsError, ParamValidationError):
try:
s3_client = boto3.client(
"s3",
aws_access_key_id=env.str("ARTIFACTS_AWS_ACCESS_KEY_ID"),
aws_secret_access_key=env.str("ARTIFACTS_AWS_SECRET_ACCESS_KEY"),
aws_session_token=env.str("ARTIFACTS_AWS_SESSION_TOKEN"),
region_name=env.str("ARTIFACTS_AWS_DEFAULT_REGION"),
)
s3_client.list_buckets()
except (ClientError, NoCredentialsError, ParamValidationError):
s3_client = None

if s3_client:
bucket_name = env.str("ARTIFACTS_AWS_S3_OUTPUT_BUCKET")
s3_prefix = f"{request.tenant_id}/{pk}/"

try:
response = s3_client.list_objects_v2(
Bucket=bucket_name, Prefix=s3_prefix
)
if response["KeyCount"] == 0:
return Response(
{"detail": "No files found in S3 storage"},
status=status.HTTP_404_NOT_FOUND,
)

zip_files = [
obj["Key"]
for obj in response.get("Contents", [])
if obj["Key"].endswith(".zip")
]
if not zip_files:
return Response(
{"detail": "No ZIP files found in S3 storage"},
status=status.HTTP_404_NOT_FOUND,
)

s3_key = zip_files[0]
s3_object = s3_client.get_object(Bucket=bucket_name, Key=s3_key)
file_content = s3_object["Body"].read()
filename = os.path.basename(s3_key)

except ClientError:
return Response(
{"detail": "Error accessing cloud storage"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

else:
local_path = os.path.join(
tmp_output_directory,
str(request.tenant_id),
str(pk),
"*.zip",
)
zip_files = glob.glob(local_path)
if not zip_files:
return Response(
{"detail": "No local files found"}, status=status.HTTP_404_NOT_FOUND
)

try:
file_path = zip_files[0]
with open(file_path, "rb") as f:
file_content = f.read()
filename = os.path.basename(file_path)
except IOError:
return Response(
{"detail": "Error reading local file"},
Comment on lines +1153 to +1226
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this belongs in here. It also makes unit testing way more difficult. Please create a service layer to encapsulate all the logic related to the s3 integration.

status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

response = HttpResponse(
file_content, content_type="application/x-zip-compressed"
)
response["Content-Disposition"] = f'attachment; filename="{filename}"'
return response


@extend_schema_view(
list=extend_schema(
Expand Down
Loading
Loading