Skip to content

Commit

Permalink
Increase default clamav file size limits. (#28)
Browse files Browse the repository at this point in the history
By default clamav has a 25MB limit for files. Increase these
limits, and apply a smaller max file size limit for the service.

A 500MiB file takes 5 mins to scan, so we also need to to increase
the timeout for the service.
  • Loading branch information
nielm authored Mar 1, 2022
1 parent 2f5b8a2 commit 5265c08
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 11 deletions.
37 changes: 34 additions & 3 deletions cloudrun-malware-scanner/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,40 @@ apt-get update && apt-get install clamav-daemon -y
# Get latest definitions
freshclam

# Reload Services
# Set Clam config file values
# see clamd.conf documentation:
# https://manpages.debian.org/bullseye/clamav-daemon/clamd.conf.5.en.html

# Note: clamav takes the _first_ config value found in the file, so first
# remove any existing values, then append the new values.
grep -vE "^(StreamMaxLength|MaxScanSize|MaxFileSize|MaxRecursion|MaxFiles)" /etc/clamav/clamd.conf > /etc/clamav/clamd.conf.new
cat >> /etc/clamav/clamd.conf.new << EOF
# This option allows you to specify the upper limit for data size that will be transfered to remote daemon when scanning a single file.
StreamMaxLength 521M
# Sets the maximum amount of data to be scanned for each input file.
# Archives and other containers are recursively extracted and scanned up to this value.
MaxScanSize 512M
# Files larger than this limit won't be scanned.
# Affects the input file itself as well as files contained inside it (when the input file is an archive, a document or some other kind of container).
MaxFileSize 512M
# Nested archives are scanned recursively, e.g. if a Zip archive contains a RAR file, all files within it will also be scanned.
# This options specifies how deeply the process should be continued.
MaxRecursion 16
# Number of files to be scanned within an archive, a document, or any other kind of container.
MaxFiles 10000
EOF
mv -f /etc/clamav/clamd.conf.new /etc/clamav/clamd.conf

# Report options to log
clamconf

# Restart Services
service clamav-daemon force-reload
service clamav-freshclam force-reload

# Run node process
npm start
# Run node server process
npm start
2 changes: 1 addition & 1 deletion cloudrun-malware-scanner/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "gcs-malware-scanner",
"version": "1.5.0",
"version": "1.6.0",
"description": "Service to scan GCS documents for the malware and move the analyzed documents to appropriate buckets",
"main": "index.js",
"scripts": {
Expand Down
32 changes: 25 additions & 7 deletions cloudrun-malware-scanner/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,20 @@ const metrics = require('./metrics.js');
const PORT = process.env.PORT || 8080;
const CLAMD_HOST = '127.0.0.1';
const CLAMD_PORT = 3310;
const MAX_FILE_SIZE = 5000000000; // 5GiB

// 10 min timeout for scanning.
const CLAMD_TIMEOUT = 600000;

// Note: MAX_FILE_SIZE limits the size of files which are sent to th
// ClamAV Daemon.
//
// ClamAV itself has internal limits, which apply both to the total file
// size, and to the size of compressed files inside file containers.
// These are set in the clamd.conf file by bootstrap.sh
//
// Note scanning a 500MiB file can take 5 minutes, so ensure timeout is
// large enough.
const MAX_FILE_SIZE = 500000000; // 500MiB

/**
* Configuration object.
Expand Down Expand Up @@ -59,6 +72,11 @@ const storage = new Storage();
* Route that is invoked by Cloud Run when a malware scan is requested
* for a document uploaded to GCS.
*
* For command line testing, use
*
* curl -d '{"kind": "storage#object","name":"sparse_file_1G", "bucket": "BUCKET_NAME" }' -H "Content-Type: application/json" http://localhost:8080
*
*
* @param {!Request} req The request payload
* @param {!Response} res The HTTP response object
*/
Expand Down Expand Up @@ -111,7 +129,7 @@ app.post('/', async (req, res) => {
const readStream = await gcsFile.createReadStream();
let result;
try {
result = await scanner.scanStream(readStream);
result = await scanner.scanStream(readStream, CLAMD_TIMEOUT);
} finally {
// Ensure stream is destroyed in all situations to prevent any
// resource leaks.
Expand Down Expand Up @@ -186,7 +204,7 @@ app.get('/', async (req, res) => {
.type('text/plain')
.send(
`${pkgJson.name} version ${pkgJson.version}
Using Clam AV version: ${await getClamVersion()}
Using Clam AV version: ${await getClamVersion()}
${pkgJson.description}
(Responds to POST requests containing a GCS object only)
Expand Down Expand Up @@ -217,7 +235,7 @@ function handleErrorResponse(res, statusCode, errorMessage,
* @return {Promise<string>}
*/
async function getClamVersion() {
return (await clamd.version(CLAMD_HOST, CLAMD_PORT)).replaceAll('\x00', '');
return (await clamd.version(CLAMD_HOST, CLAMD_PORT)).replace('\x00', '');
}

/**
Expand Down Expand Up @@ -300,9 +318,9 @@ async function readAndVerifyConfig() {
success = false;
}
}
if(config.unscanned === config.clean
|| config.unscanned === config.quarantined
|| config.clean === config.quarantined) {
if (config.unscanned === config.clean ||
config.unscanned === config.quarantined ||
config.clean === config.quarantined) {
logger.fatal(
`Error in bucket config[${x}]: bucket names are not unique`);
success = false;
Expand Down

0 comments on commit 5265c08

Please sign in to comment.