From e09f92a6090abae18a75d7d82da34e735329b538 Mon Sep 17 00:00:00 2001 From: williamlardier Date: Thu, 28 Nov 2024 14:54:52 +0100 Subject: [PATCH 01/15] Complete quota tests - Test that we properly account for deletions between count items runs. - Test that we can expire objects when a bucket has a quota enabled. Issue: ZENKO-4941 --- tests/ctst/features/quotas/Quotas.feature | 40 +++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/ctst/features/quotas/Quotas.feature b/tests/ctst/features/quotas/Quotas.feature index e9de9ba341..8fca68a8c0 100644 --- a/tests/ctst/features/quotas/Quotas.feature +++ b/tests/ctst/features/quotas/Quotas.feature @@ -96,3 +96,43 @@ Feature: Quota Management for APIs | 100 | 200 | 0 | IAM_USER | | 100 | 0 | 200 | IAM_USER | | 100 | 200 | 200 | IAM_USER | + + @2.6.0 + @PreMerge + @Quotas + @CronJob + @DataDeletion + @NonVersioned + Scenario Outline: Quotas are affected by deletion operations between count items runs + Given an action "DeleteObject" + And a permission to perform the "PutObject" action + And a STORAGE_MANAGER type + And a bucket quota set to 10000 B + And an account quota set to 10000 B + And an upload size of 1000 B for the object "obj-1" + And a bucket quota set to B + And an account quota set to B + And a type + And an environment setup for the API + And an "existing" IAM Policy that "applies" with "ALLOW" effect for the current API + When I wait 3 seconds + And I PUT an object with size + Then the API should "fail" with "QuotaExceeded" + When the "count-items" cronjobs completes without error + When I wait 3 seconds + # At this point if negative inflights are not supported, write should + # not be possible, as the previous inflights are now part of the current + # metrics. + And i delete object "obj-1" + And I wait 3 seconds + And I PUT an object with size + Then the API should "succeed" with "" + + Examples: + | uploadSize | bucketQuota | accountQuota | userType | + | 100 | 200 | 0 | ACCOUNT | + | 100 | 0 | 200 | ACCOUNT | + | 100 | 200 | 200 | ACCOUNT | + | 100 | 200 | 0 | IAM_USER | + | 100 | 0 | 200 | IAM_USER | + | 100 | 200 | 200 | IAM_USER | From 1799ce7fd92477f024a744603705cbdce73956dc Mon Sep 17 00:00:00 2001 From: williamlardier Date: Fri, 29 Nov 2024 16:44:09 +0100 Subject: [PATCH 02/15] Test that negative inflights do not allow to bypass the quota Issue: ZENKO-4941 --- tests/ctst/features/quotas/Quotas.feature | 48 ++++++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/tests/ctst/features/quotas/Quotas.feature b/tests/ctst/features/quotas/Quotas.feature index 8fca68a8c0..a61b6cf9a9 100644 --- a/tests/ctst/features/quotas/Quotas.feature +++ b/tests/ctst/features/quotas/Quotas.feature @@ -107,8 +107,8 @@ Feature: Quota Management for APIs Given an action "DeleteObject" And a permission to perform the "PutObject" action And a STORAGE_MANAGER type - And a bucket quota set to 10000 B - And an account quota set to 10000 B + And a bucket quota set to 1000 B + And an account quota set to 1000 B And an upload size of 1000 B for the object "obj-1" And a bucket quota set to B And an account quota set to B @@ -119,11 +119,13 @@ Feature: Quota Management for APIs And I PUT an object with size Then the API should "fail" with "QuotaExceeded" When the "count-items" cronjobs completes without error + # Wait for inflights to be read by SCUBA When I wait 3 seconds # At this point if negative inflights are not supported, write should # not be possible, as the previous inflights are now part of the current # metrics. And i delete object "obj-1" + # Wait for inflights to be read by SCUBA And I wait 3 seconds And I PUT an object with size Then the API should "succeed" with "" @@ -136,3 +138,45 @@ Feature: Quota Management for APIs | 100 | 200 | 0 | IAM_USER | | 100 | 0 | 200 | IAM_USER | | 100 | 200 | 200 | IAM_USER | + + @2.6.0 + @PreMerge + @Quotas + @CronJob + @DataDeletion + @NonVersioned + Scenario Outline: Negative inflights do not allow to bypass the quota + Given an action "DeleteObject" + And a permission to perform the "PutObject" action + And a STORAGE_MANAGER type + And a bucket quota set to 1000 B + And an account quota set to 1000 B + And an upload size of 1000 B for the object "obj-1" + And a bucket quota set to B + And an account quota set to B + And a type + And an environment setup for the API + And an "existing" IAM Policy that "applies" with "ALLOW" effect for the current API + When I wait 3 seconds + And I PUT an object with size + Then the API should "fail" with "QuotaExceeded" + When the "count-items" cronjobs completes without error + # Wait for inflights to be read by SCUBA + When I wait 3 seconds + # At this point if negative inflights are not supported, write should + # not be possible, as the previous inflights are now part of the current + # metrics. + And i delete object "obj-1" + # Wait for inflights to be read by SCUBA + And I wait 3 seconds + And I PUT an object with size 1000 + Then the API should "fail" with "QuotaExceeded" + + Examples: + | uploadSize | bucketQuota | accountQuota | userType | + | 100 | 200 | 0 | ACCOUNT | + | 100 | 0 | 200 | ACCOUNT | + | 100 | 200 | 200 | ACCOUNT | + | 100 | 200 | 0 | IAM_USER | + | 100 | 0 | 200 | IAM_USER | + | 100 | 200 | 200 | IAM_USER | \ No newline at end of file From f08141433b1ab649e45161bd33f7313bb7e37e17 Mon Sep 17 00:00:00 2001 From: williamlardier Date: Mon, 2 Dec 2024 14:46:33 +0100 Subject: [PATCH 03/15] Update how zenko-ctst account is created It seems that we are not robust against parallel account creations in Vault/Pensieve, leading in some cases to two accounts in the __account collection, but also in the pensieve overlay. However, when the overlay is updated (adding a new location, or a bucket configuration), ZKOP will thus fail the overlay validation and the reconciliations will all fail. It was affecting the bucket tests, but may affect more tests later. Issue: ZENKO-4941 --- tests/ctst/world/Zenko.ts | 54 ++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/tests/ctst/world/Zenko.ts b/tests/ctst/world/Zenko.ts index 4421348cdc..1005442fe0 100644 --- a/tests/ctst/world/Zenko.ts +++ b/tests/ctst/world/Zenko.ts @@ -4,6 +4,8 @@ import { AccessKey } from '@aws-sdk/client-iam'; import { Credentials } from '@aws-sdk/client-sts'; import { aws4Interceptor } from 'aws4-axios'; import qs from 'qs'; +import fs from 'fs'; +import lockFile from 'proper-lockfile'; import Werelogs from 'werelogs'; import { CacheHelper, @@ -630,24 +632,36 @@ export default class Zenko extends World { if (!Identity.hasIdentity(IdentityEnum.ACCOUNT, accountName)) { Identity.useIdentity(IdentityEnum.ADMIN, site.adminIdentityName); - + const filePath = `/tmp/account-init-${accountName}.json`; + if (!fs.existsSync(filePath)) { + fs.writeFileSync(filePath, JSON.stringify({ + ready: false, + })); + } let account = null; - CacheHelper.logger.debug('Creating account', { - accountName, - adminIdentityName: site.adminIdentityName, - credentials: Identity.getCurrentCredentials(), - }); - // Create the account if already exist will not throw any error + let releaseLock: (() => Promise) | null = null; try { - await SuperAdmin.createAccount({ accountName }); - /* eslint-disable */ - } catch (err: any) { - CacheHelper.logger.debug('Error while creating account', { - accountName, - err, + releaseLock = await lockFile.lock(filePath, { + stale: Constants.DEFAULT_TIMEOUT / 2, + retries: { + retries: 5, + factor: 3, + minTimeout: 1000, + maxTimeout: 5000, + } }); - if (!err.EntityAlreadyExists && err.code !== 'EntityAlreadyExists') { - throw err; + + try { + await SuperAdmin.createAccount({ accountName }); + /* eslint-disable */ + } catch (err: any) { + if (!err.EntityAlreadyExists && err.code !== 'EntityAlreadyExists') { + throw err; + } + } + } finally { + if (releaseLock) { + await releaseLock(); } } /* eslint-enable */ @@ -690,7 +704,7 @@ export default class Zenko extends World { const accountName = this.sites['source']?.accountName || CacheHelper.parameters.AccountName!; const accountAccessKeys = Identity.getCredentialsForIdentity( IdentityEnum.ACCOUNT, this.sites['source']?.accountName - || CacheHelper.parameters.AccountName!) || { + || CacheHelper.parameters.AccountName!) || { accessKeyId: '', secretAccessKey: '', }; @@ -862,7 +876,7 @@ export default class Zenko extends World { } async awsS3Request(method: Method, path: string, - userCredentials: AWSCredentials, headers: object = {}, payload: object = {}) : Promise { + userCredentials: AWSCredentials, headers: object = {}, payload: object = {}): Promise { const interceptor = aws4Interceptor({ options: { region: 'us-east-1', @@ -888,7 +902,7 @@ export default class Zenko extends World { statusCode: response.status, data: response.data as unknown, }; - /* eslint-disable */ + /* eslint-disable */ } catch (err: any) { return { stdout: '', @@ -964,7 +978,7 @@ export default class Zenko extends World { } } - async addWebsiteEndpoint(this: Zenko, endpoint: string) : + async addWebsiteEndpoint(this: Zenko, endpoint: string): Promise<{ statusCode: number; data: object } | { statusCode: number; err: unknown }> { return await this.managementAPIRequest('POST', `/config/${this.parameters.InstanceID}/website/endpoint`, @@ -974,7 +988,7 @@ export default class Zenko extends World { `"${endpoint}"`); } - async deleteLocation(this: Zenko, locationName: string) : + async deleteLocation(this: Zenko, locationName: string): Promise<{ statusCode: number; data: object } | { statusCode: number; err: unknown }> { return await this.managementAPIRequest('DELETE', `/config/${this.parameters.InstanceID}/location/${locationName}`); From ab26a964b4bbf8a0b85176f96e0c6df86e58eb0d Mon Sep 17 00:00:00 2001 From: williamlardier Date: Mon, 2 Dec 2024 15:28:08 +0100 Subject: [PATCH 04/15] Set scuba log level as debug in CI Issue: ZENKO-4941 --- .github/scripts/end2end/configs/zenko.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/scripts/end2end/configs/zenko.yaml b/.github/scripts/end2end/configs/zenko.yaml index 9465ee2013..00ad3aaaf8 100644 --- a/.github/scripts/end2end/configs/zenko.yaml +++ b/.github/scripts/end2end/configs/zenko.yaml @@ -108,6 +108,9 @@ spec: command-timeout: "60s" pending-job-poll-after-age: "10s" pending-job-poll-check-interval: "10s" + scuba: + logging: + logLevel: debug ingress: workloadPlaneClass: 'nginx' controlPlaneClass: 'nginx' From 71b255cd125a823f8f3459b071f4a1622b2ed5c4 Mon Sep 17 00:00:00 2001 From: williamlardier Date: Mon, 2 Dec 2024 18:26:30 +0100 Subject: [PATCH 05/15] Check that quota is set before conttunuing test Issue: ZENKO-4941 --- tests/ctst/steps/quotas/quotas.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/ctst/steps/quotas/quotas.ts b/tests/ctst/steps/quotas/quotas.ts index cd7ce1b022..850119f908 100644 --- a/tests/ctst/steps/quotas/quotas.ts +++ b/tests/ctst/steps/quotas/quotas.ts @@ -6,6 +6,7 @@ import { Scality, Command, Utils, AWSCredentials, Constants, Identity, IdentityE import { createJobAndWaitForCompletion } from '../utils/kubernetes'; import { createBucketWithConfiguration, putObject } from '../utils/utils'; import { hashStringAndKeepFirst20Characters } from 'common/utils'; +import assert from 'assert'; export async function prepareQuotaScenarios(world: Zenko, scenarioConfiguration: ITestCaseHookParameter) { /** @@ -136,6 +137,16 @@ Given('a bucket quota set to {int} B', async function (this: Zenko, quota: numbe result, }); + // Ensure the quota is set + const resultGet: Command = await Scality.getBucketQuota( + this.parameters, + this.getCommandParameters()); + this.logger.debug('GetBucketQuota result', { + resultGet, + }); + + assert(resultGet.stdout.includes(`${quota}`)); + if (result.err) { throw new Error(result.err); } @@ -158,6 +169,9 @@ Given('an account quota set to {int} B', async function (this: Zenko, quota: num result, }); + // Ensure the quota is set + assert(JSON.parse(result.stdout).quota === quota); + if (result.err) { throw new Error(result.err); } From d460fcb9744ffd32fd0d9fe1c6b67a5eb58da7a2 Mon Sep 17 00:00:00 2001 From: williamlardier Date: Tue, 3 Dec 2024 11:26:49 +0100 Subject: [PATCH 06/15] Ensure no cleaning of quota upon test failure Issue: ZENKO-4941 --- tests/ctst/common/hooks.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/ctst/common/hooks.ts b/tests/ctst/common/hooks.ts index ed19aea392..1ce8d7ee4f 100644 --- a/tests/ctst/common/hooks.ts +++ b/tests/ctst/common/hooks.ts @@ -54,7 +54,13 @@ After(async function (this: Zenko, results) { ); }); -After({ tags: '@Quotas' }, async function () { +After({ tags: '@Quotas' }, async function (this: Zenko, results) { + if (results.result?.status === 'FAILED') { + this.logger.warn('quota was not cleaned for test', { + bucket: this.getSaved('bucketName'), + }); + return; + } await teardownQuotaScenarios(this as Zenko); }); From 22d65a27c6625dd1eadf85f7af0b6b9887e58971 Mon Sep 17 00:00:00 2001 From: williamlardier Date: Tue, 3 Dec 2024 14:52:31 +0100 Subject: [PATCH 07/15] Ensure count items is not already running A race condition where a count items was running when the new one started at the beggining of the quota tests, and the old one completing after the new one, would lead to an override of the infostore collection, in turn leading to missing metrics for the quota buckets/accounts, making the quota tests fail till a new count items starts. Use a file locking mechanism to ensure no concurrent job runs - Watching the resource or using kube api is prone to race condition, as their is a delay between the request to create a job, and its run, plus, the logic is not easy to centralize if we consider job already completed or not yet started, as we delete the old jobs upon completion - Using alock mechanism is similar to what we already have for quotas, plus, it ensures no race condition and tests being able to restart as soon as possible. Issue: ZENKO-4941 --- tests/ctst/steps/utils/kubernetes.ts | 70 +++++++++++++++++++++------- 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/tests/ctst/steps/utils/kubernetes.ts b/tests/ctst/steps/utils/kubernetes.ts index 32e9aceb91..147cf099b9 100644 --- a/tests/ctst/steps/utils/kubernetes.ts +++ b/tests/ctst/steps/utils/kubernetes.ts @@ -1,3 +1,6 @@ +import fs from 'fs'; +import * as path from 'path'; +import lockFile from 'proper-lockfile'; import { KubernetesHelper, Utils } from 'cli-testing'; import Zenko from 'world/Zenko'; import { @@ -71,12 +74,40 @@ export function createKubeCustomObjectClient(world: Zenko): CustomObjectsApi { return KubernetesHelper.customObject; } -export async function createJobAndWaitForCompletion(world: Zenko, jobName: string, customMetadata?: string) { +export async function createJobAndWaitForCompletion( + world: Zenko, + jobName: string, + customMetadata?: string +) { const batchClient = createKubeBatchClient(world); const watchClient = createKubeWatchClient(world); + + const lockFilePath = path.join('/tmp', `${jobName}.lock`); + let releaseLock: (() => Promise) | false = false; + + if (!fs.existsSync(lockFilePath)) { + fs.writeFileSync(lockFilePath, ''); + } + try { + releaseLock = await lockFile.lock(lockFilePath, { + // Expect the job does not take more than 2 minutes to complete + stale: 2 * 60 * 1000, + // use a non-exponential backoff strategy + // try once per second for 2min 10s + retries: { + retries: 130, + factor: 1, + minTimeout: 1000, + maxTimeout: 1000, + }, + }); + world.logger.debug(`Acquired lock for job: ${jobName}`); + + // Read the cron job and prepare the job spec const cronJob = await batchClient.readNamespacedCronJob(jobName, 'default'); const cronJobSpec = cronJob.body.spec?.jobTemplate.spec; + const job = new V1Job(); const metadata = new V1ObjectMeta(); job.apiVersion = 'batch/v1'; @@ -87,50 +118,57 @@ export async function createJobAndWaitForCompletion(world: Zenko, jobName: strin 'cronjob.kubernetes.io/instantiate': 'ctst', }; if (customMetadata) { - metadata.annotations = { - custom: customMetadata, - }; + metadata.annotations.custom = customMetadata; } job.metadata = metadata; + // Create the job const response = await batchClient.createNamespacedJob('default', job); - world.logger.debug('job created', { - job: response.body.metadata, - }); + world.logger.debug('Job created', { job: response.body.metadata }); const expectedJobName = response.body.metadata?.name; + // Watch for job completion await new Promise((resolve, reject) => { void watchClient.watch( '/apis/batch/v1/namespaces/default/jobs', {}, (type: string, apiObj, watchObj) => { - if (job.metadata?.name && expectedJobName && - (watchObj.object?.metadata?.name as string)?.startsWith?.(expectedJobName)) { + if ( + expectedJobName && + (watchObj.object?.metadata?.name as string)?.startsWith?.(expectedJobName) + ) { if (watchObj.object?.status?.succeeded) { - world.logger.debug('job succeeded', { - job: job.metadata, - }); + world.logger.debug('Job succeeded', { job: job.metadata }); resolve(); } else if (watchObj.object?.status?.failed) { - world.logger.debug('job failed', { + world.logger.debug('Job failed', { job: job.metadata, object: watchObj.object, }); - reject(new Error('job failed')); + reject(new Error('Job failed')); } } - }, reject); + }, + reject + ); }); } catch (err: unknown) { - world.logger.error('error creating job', { + world.logger.error('Error creating or waiting for job completion', { jobName, err, }); throw err; + } finally { + // Ensure the lock is released + if (releaseLock) { + await releaseLock(); + world.logger.debug(`Released lock for job: ${jobName}`); + } } } + export async function waitForZenkoToStabilize( world: Zenko, needsReconciliation = false, timeout = 15 * 60 * 1000, namespace = 'default') { // ZKOP pulls the overlay configuration from Pensieve every 5 seconds From e479cb4d474e21016790daf66912af4b8e15dc32 Mon Sep 17 00:00:00 2001 From: williamlardier Date: Wed, 4 Dec 2024 17:09:27 +0100 Subject: [PATCH 08/15] Improve test stability - Ensure no scenario requiring count items to run at the same time, as they were overriding each other: if count items completes between 2 checks of a quota scenario, the "current" may be cleaned, leading to wrong test results. - Incerase the count items lock stale duration: if we have too many buckets, we might fail to get the lock in time. - Put the count items scenario in a dedicated folder, as it is not related to quotas. Issue: ZENKO-4941 --- tests/ctst/common/hooks.ts | 2 +- .../{quotas => CountItems}/CountItems.feature | 0 tests/ctst/features/quotas/Quotas.feature | 26 +++++++------------ tests/ctst/steps/utils/kubernetes.ts | 9 +++---- 4 files changed, 14 insertions(+), 23 deletions(-) rename tests/ctst/features/{quotas => CountItems}/CountItems.feature (100%) diff --git a/tests/ctst/common/hooks.ts b/tests/ctst/common/hooks.ts index 1ce8d7ee4f..d6e64841c0 100644 --- a/tests/ctst/common/hooks.ts +++ b/tests/ctst/common/hooks.ts @@ -17,7 +17,7 @@ import { cleanS3Bucket } from './common'; process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'; const { atMostOnePicklePerTag } = parallelCanAssignHelpers; -const noParallelRun = atMostOnePicklePerTag(['@AfterAll', '@PRA', '@ColdStorage']); +const noParallelRun = atMostOnePicklePerTag(['@AfterAll', '@PRA', '@ColdStorage', '@Utilization']); setParallelCanAssign(noParallelRun); diff --git a/tests/ctst/features/quotas/CountItems.feature b/tests/ctst/features/CountItems/CountItems.feature similarity index 100% rename from tests/ctst/features/quotas/CountItems.feature rename to tests/ctst/features/CountItems/CountItems.feature diff --git a/tests/ctst/features/quotas/Quotas.feature b/tests/ctst/features/quotas/Quotas.feature index a61b6cf9a9..16eaf4f619 100644 --- a/tests/ctst/features/quotas/Quotas.feature +++ b/tests/ctst/features/quotas/Quotas.feature @@ -68,6 +68,7 @@ Feature: Quota Management for APIs @CronJob @DataDeletion @NonVersioned + @Utilization Scenario Outline: Quotas are affected by deletion operations Given an action "DeleteObject" And a permission to perform the "PutObject" action @@ -103,6 +104,7 @@ Feature: Quota Management for APIs @CronJob @DataDeletion @NonVersioned + @Utilization Scenario Outline: Quotas are affected by deletion operations between count items runs Given an action "DeleteObject" And a permission to perform the "PutObject" action @@ -133,11 +135,6 @@ Feature: Quota Management for APIs Examples: | uploadSize | bucketQuota | accountQuota | userType | | 100 | 200 | 0 | ACCOUNT | - | 100 | 0 | 200 | ACCOUNT | - | 100 | 200 | 200 | ACCOUNT | - | 100 | 200 | 0 | IAM_USER | - | 100 | 0 | 200 | IAM_USER | - | 100 | 200 | 200 | IAM_USER | @2.6.0 @PreMerge @@ -149,34 +146,29 @@ Feature: Quota Management for APIs Given an action "DeleteObject" And a permission to perform the "PutObject" action And a STORAGE_MANAGER type - And a bucket quota set to 1000 B - And an account quota set to 1000 B - And an upload size of 1000 B for the object "obj-1" - And a bucket quota set to B + And a bucket quota set to B And an account quota set to B + And an upload size of B for the object "obj-1" And a type And an environment setup for the API And an "existing" IAM Policy that "applies" with "ALLOW" effect for the current API When I wait 3 seconds And I PUT an object with size Then the API should "fail" with "QuotaExceeded" + # The inflights are now part of the current metrics When the "count-items" cronjobs completes without error # Wait for inflights to be read by SCUBA When I wait 3 seconds # At this point if negative inflights are not supported, write should # not be possible, as the previous inflights are now part of the current # metrics. + # Delete 200 B, inflights are now -200 And i delete object "obj-1" # Wait for inflights to be read by SCUBA - And I wait 3 seconds - And I PUT an object with size 1000 + And I wait 6 seconds + And I PUT an object with size 300 Then the API should "fail" with "QuotaExceeded" Examples: | uploadSize | bucketQuota | accountQuota | userType | - | 100 | 200 | 0 | ACCOUNT | - | 100 | 0 | 200 | ACCOUNT | - | 100 | 200 | 200 | ACCOUNT | - | 100 | 200 | 0 | IAM_USER | - | 100 | 0 | 200 | IAM_USER | - | 100 | 200 | 200 | IAM_USER | \ No newline at end of file + | 200 | 200 | 0 | ACCOUNT | \ No newline at end of file diff --git a/tests/ctst/steps/utils/kubernetes.ts b/tests/ctst/steps/utils/kubernetes.ts index 147cf099b9..636353c50b 100644 --- a/tests/ctst/steps/utils/kubernetes.ts +++ b/tests/ctst/steps/utils/kubernetes.ts @@ -91,12 +91,11 @@ export async function createJobAndWaitForCompletion( try { releaseLock = await lockFile.lock(lockFilePath, { - // Expect the job does not take more than 2 minutes to complete - stale: 2 * 60 * 1000, - // use a non-exponential backoff strategy - // try once per second for 2min 10s + // Expect the jobs in the queue does not take more than 5 minutes to complete + stale: 10 * 60 * 1000, + // use a linear backoff strategy retries: { - retries: 130, + retries: 610, factor: 1, minTimeout: 1000, maxTimeout: 1000, From 4b8c5552f70d5d078d9bf23abb5b8ab6f147b4fc Mon Sep 17 00:00:00 2001 From: williamlardier Date: Wed, 4 Dec 2024 17:48:36 +0100 Subject: [PATCH 09/15] Do not stop on error during repeated api calls - It may be prone to periodic instabilities - Not sopping won't change the result and make the step more close to what the sentence says Issue: ZENKO-4941 --- tests/ctst/common/common.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ctst/common/common.ts b/tests/ctst/common/common.ts index 78926f1ae8..163ee486a8 100644 --- a/tests/ctst/common/common.ts +++ b/tests/ctst/common/common.ts @@ -353,8 +353,7 @@ When('the user tries to perform the current S3 action on the bucket {int} times } await runActionAgainstBucket(this, this.getSaved('currentAction').action); if (this.getResult().err) { - // stop at any error, the error will be evaluated in a separated step - return; + this.logger.debug('Error during repeated action', { error: this.getResult().err }); } await Utils.sleep(delay); } From 6eb3ac9bf9c5ace4a5e9fc2bd6276f41d7715837 Mon Sep 17 00:00:00 2001 From: williamlardier Date: Wed, 4 Dec 2024 23:31:49 +0100 Subject: [PATCH 10/15] Increase test waiting time - We use 2s but the scuba frequency is 2s, so if we are unlucky, we may not be able to have it flushed, leading to flakies Issue: ZENKO-4941 --- tests/ctst/features/quotas/Quotas.feature | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ctst/features/quotas/Quotas.feature b/tests/ctst/features/quotas/Quotas.feature index 16eaf4f619..04c4da95a4 100644 --- a/tests/ctst/features/quotas/Quotas.feature +++ b/tests/ctst/features/quotas/Quotas.feature @@ -81,11 +81,11 @@ Feature: Quota Management for APIs And a type And an environment setup for the API And an "existing" IAM Policy that "applies" with "ALLOW" effect for the current API - When I wait 3 seconds + When I wait 6 seconds And I PUT an object with size Then the API should "fail" with "QuotaExceeded" When i delete object "obj-1" - And I wait 3 seconds + And I wait 6 seconds And I PUT an object with size Then the API should "succeed" with "" From fbc5e5960b49b784d3b144162f1a759e81e9e669 Mon Sep 17 00:00:00 2001 From: williamlardier Date: Thu, 12 Dec 2024 12:38:09 +0100 Subject: [PATCH 11/15] Permute the values in the quota scenario Won't change the test, but the sentences are now more meaningful. Issue: ZENKO-4941 --- tests/ctst/features/quotas/Quotas.feature | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ctst/features/quotas/Quotas.feature b/tests/ctst/features/quotas/Quotas.feature index 04c4da95a4..65bcb5a530 100644 --- a/tests/ctst/features/quotas/Quotas.feature +++ b/tests/ctst/features/quotas/Quotas.feature @@ -146,9 +146,9 @@ Feature: Quota Management for APIs Given an action "DeleteObject" And a permission to perform the "PutObject" action And a STORAGE_MANAGER type - And a bucket quota set to B + And a bucket quota set to B And an account quota set to B - And an upload size of B for the object "obj-1" + And an upload size of B for the object "obj-1" And a type And an environment setup for the API And an "existing" IAM Policy that "applies" with "ALLOW" effect for the current API From 3083021cd15482e195963b635dd778b24c4bfadc Mon Sep 17 00:00:00 2001 From: williamlardier Date: Mon, 23 Dec 2024 14:37:02 +0100 Subject: [PATCH 12/15] Bump CTST to v1.2.4 Issue: ZENKO-4941 --- tests/ctst/package.json | 2 +- tests/ctst/yarn.lock | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ctst/package.json b/tests/ctst/package.json index f2e107e48c..7f942831cf 100644 --- a/tests/ctst/package.json +++ b/tests/ctst/package.json @@ -24,7 +24,7 @@ "@aws-sdk/client-s3": "^3.583.0", "@aws-sdk/client-sts": "^3.583.0", "@eslint/compat": "^1.1.1", - "cli-testing": "github:scality/cli-testing.git#1.2.3", + "cli-testing": "github:scality/cli-testing.git#1.2.4", "eslint": "^9.9.1", "eslint-config-scality": "scality/Guidelines#8.3.0", "typescript-eslint": "^8.4.0" diff --git a/tests/ctst/yarn.lock b/tests/ctst/yarn.lock index 27828947b4..f8f4a38ab8 100644 --- a/tests/ctst/yarn.lock +++ b/tests/ctst/yarn.lock @@ -4527,9 +4527,9 @@ cli-table3@^0.6.0: optionalDependencies: "@colors/colors" "1.5.0" -"cli-testing@github:scality/cli-testing.git#1.2.3": - version "1.2.3" - resolved "git+ssh://git@github.com/scality/cli-testing.git#d6712d412dd7cd56cdf89a812203637d1ebab210" +"cli-testing@github:scality/cli-testing.git#1.2.4": + version "1.2.4" + resolved "git+ssh://git@github.com/scality/cli-testing.git#687c3bdb5d684c5de4082a5dd9f9c00e94ec104c" dependencies: "@aws-crypto/sha256-universal" "^5.2.0" "@aws-sdk/client-iam" "^3.637.0" From dcd2c8a734eb7f86e133a3767105c78a4cbaff5b Mon Sep 17 00:00:00 2001 From: williamlardier Date: Mon, 23 Dec 2024 14:37:33 +0100 Subject: [PATCH 13/15] When doing multiple actions in a step, continue if retryable Previously we would ignore all errors, even the ones that are not expected, now, we will stop at any error that we are not expecting. Issue: ZENKO-4941 --- tests/ctst/common/common.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ctst/common/common.ts b/tests/ctst/common/common.ts index 163ee486a8..eb4d01b5da 100644 --- a/tests/ctst/common/common.ts +++ b/tests/ctst/common/common.ts @@ -352,8 +352,9 @@ When('the user tries to perform the current S3 action on the bucket {int} times this.addToSaved('copyObject', `objectrepeatcopy-${Utils.randomString()}`); } await runActionAgainstBucket(this, this.getSaved('currentAction').action); - if (this.getResult().err) { + if (this.getResult().err && this.getResult().retryable?.throttling !== true) { this.logger.debug('Error during repeated action', { error: this.getResult().err }); + break; } await Utils.sleep(delay); } From 46f9b563d920aae589e6c99744cd7b902c641f1a Mon Sep 17 00:00:00 2001 From: williamlardier Date: Mon, 23 Dec 2024 14:38:48 +0100 Subject: [PATCH 14/15] Use a file with wx flag to control concurrency Issue: ZENKO-4941 --- tests/ctst/steps/utils/kubernetes.ts | 38 +++++++++++++--------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/tests/ctst/steps/utils/kubernetes.ts b/tests/ctst/steps/utils/kubernetes.ts index 636353c50b..62bfdb0a24 100644 --- a/tests/ctst/steps/utils/kubernetes.ts +++ b/tests/ctst/steps/utils/kubernetes.ts @@ -1,6 +1,5 @@ import fs from 'fs'; import * as path from 'path'; -import lockFile from 'proper-lockfile'; import { KubernetesHelper, Utils } from 'cli-testing'; import Zenko from 'world/Zenko'; import { @@ -83,24 +82,27 @@ export async function createJobAndWaitForCompletion( const watchClient = createKubeWatchClient(world); const lockFilePath = path.join('/tmp', `${jobName}.lock`); - let releaseLock: (() => Promise) | false = false; - if (!fs.existsSync(lockFilePath)) { - fs.writeFileSync(lockFilePath, ''); + let lockAquired = false; + let tries = 600; + while (!lockAquired && tries > 0) { + try { + fs.writeFileSync(lockFilePath, 'lock', { + flag: 'wx', + }); + lockAquired = true; + } catch { + world.logger.debug(`Failed to acquire lock for job: ${jobName}`, { + tries, + }); + } + tries--; + if (!lockAquired) { + await Utils.sleep(1000); + } } try { - releaseLock = await lockFile.lock(lockFilePath, { - // Expect the jobs in the queue does not take more than 5 minutes to complete - stale: 10 * 60 * 1000, - // use a linear backoff strategy - retries: { - retries: 610, - factor: 1, - minTimeout: 1000, - maxTimeout: 1000, - }, - }); world.logger.debug(`Acquired lock for job: ${jobName}`); // Read the cron job and prepare the job spec @@ -159,11 +161,7 @@ export async function createJobAndWaitForCompletion( }); throw err; } finally { - // Ensure the lock is released - if (releaseLock) { - await releaseLock(); - world.logger.debug(`Released lock for job: ${jobName}`); - } + fs.unlinkSync(lockFilePath); } } From 87ea053dba276a49e2ef636fd1acb19faa17471f Mon Sep 17 00:00:00 2001 From: williamlardier Date: Mon, 23 Dec 2024 17:51:36 +0100 Subject: [PATCH 15/15] Remove the atMost flag for utilization Not needed since we are now locking the job Issue: ZENKO-4941 --- tests/ctst/common/hooks.ts | 2 +- tests/ctst/features/CountItems/CountItems.feature | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ctst/common/hooks.ts b/tests/ctst/common/hooks.ts index d6e64841c0..1ce8d7ee4f 100644 --- a/tests/ctst/common/hooks.ts +++ b/tests/ctst/common/hooks.ts @@ -17,7 +17,7 @@ import { cleanS3Bucket } from './common'; process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'; const { atMostOnePicklePerTag } = parallelCanAssignHelpers; -const noParallelRun = atMostOnePicklePerTag(['@AfterAll', '@PRA', '@ColdStorage', '@Utilization']); +const noParallelRun = atMostOnePicklePerTag(['@AfterAll', '@PRA', '@ColdStorage']); setParallelCanAssign(noParallelRun); diff --git a/tests/ctst/features/CountItems/CountItems.feature b/tests/ctst/features/CountItems/CountItems.feature index e009f1766a..88da709a31 100644 --- a/tests/ctst/features/CountItems/CountItems.feature +++ b/tests/ctst/features/CountItems/CountItems.feature @@ -5,6 +5,7 @@ Feature: CountItems measures the utilization metrics @PreMerge @CronJob @CountItems +@Utilization Scenario Outline: Countitems runs without error and compute utilization metrics Given an existing bucket "" "without" versioning, "without" ObjectLock "without" retention mode And an object "" that "exists"