Merge branches 'development/2.10' and 'w/2.9/improvement/ZENKO-4941' …

…into tmp/octopus/w/2.10/improvement/ZENKO-4941
scality · Jan 9, 2025 · 780d74b · 780d74b
3 parents 2324933 + 73eb56d + b4093e6
commit 780d74b
Show file tree

Hide file tree

Showing 11 changed files with 145 additions and 40 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,13 +1,12 @@
-<!--
-Thank you for contributing to Zenko!
-
-Please enter applicable information below.
--->
-
 **What does this PR do, and why do we need it?**
 
 **Which issue does this PR fix?**
 
-fixes #<ISSUE>
+Fixes ZENKO-XXXX.
 
 **Special notes for your reviewers**:
+
+<!--
+When adding new behavior tests, please follow the
+`tests/ctst/HOW_TO_WRITE_TESTS.md` guidelines.
+-->
diff --git a/VERSION b/VERSION
@@ -1,4 +1,4 @@
-VERSION="2.10.10"
+VERSION="2.10.11"
 
 VERSION_SUFFIX=
 

diff --git a/solution/deps.yaml b/solution/deps.yaml
@@ -6,7 +6,7 @@ backbeat:
   dashboard: backbeat/backbeat-dashboards
   image: backbeat
   policy: backbeat/backbeat-policies
-  tag: 8.6.53
+  tag: 8.6.54
   envsubst: BACKBEAT_TAG
 busybox:
   image: busybox
@@ -16,7 +16,7 @@ cloudserver:
   sourceRegistry: ghcr.io/scality
   dashboard: cloudserver/cloudserver-dashboards
   image: cloudserver
-  tag: 8.8.38
+  tag: 8.8.39
   envsubst: CLOUDSERVER_TAG
 drctl:
   sourceRegistry: ghcr.io/scality
@@ -131,7 +131,7 @@ vault:
   dashboard: vault2/vault-dashboards
   policy: vault2/vault-policies
   image: vault2
-  tag: 8.9.1
+  tag: 8.9.3
   envsubst: VAULT_TAG
 zenko-operator:
   sourceRegistry: ghcr.io/scality

diff --git a/tests/ctst/HOW_TO_WRITE_TESTS.md b/tests/ctst/HOW_TO_WRITE_TESTS.md
@@ -0,0 +1,108 @@
+# How to write a test that runs in parallel
+
+Writing integration / behavior tests that runs in parallel is not trivial.
+This page lists all the rules to follow when writing a test that runs in
+parallel with other tests.
+
+*This is specific to Zenko, but the rules are applicable for any suite of*
+*tests in parallel.*
+
+## 1. Tests must be idempotent.
+
+In parallel, the tests can run in any order. It's not because a test works at a
+given time, that it will work if the test order changes after adding new ones
+or renaming files. You must ensure the scenarios are fully idempotent.
+"Flakies" are mostly due to non-idempotent tests, then bugs. It is rarely due
+to the platform.
+
+## 2. Use unique resources names. S3 Bucket, Account and S3 Objects should be unique.
+
+> Use unique resources names. S3 Bucket, Account and S3 Objects should be
+> unique.
+
+It limits the risks of collision between tests.
+For cross-account scenarios, use unique S3 Policies and S3 Roles.
+
+If data is used, such as lock files, ensure they are named appropriately.
+
+## 3. Do not reconfigure the environment if it affects other tests' resources.
+
+If a reconfiguration triggers a reconciliation of the resources, running tests
+might be affected. Also, it restarts the services, which leads to missing logs,
+harder debugging, and longer test execution.
+
+For examples, location creation or bucket Website endpoints, that triggers a
+restart of multiple services, must be created before the test starts, or in a
+dedicated set of tests, that is, a set of test executed before all tests having
+a specific tag used for identifying the feature(s).
+
+Note: Testing the reconfiguration of the environment is recommended, but it
+should be done carefully to not affect other tests.
+
+## 4. Do not use `atMostOnePicklePerTag`.
+
+If a set of scenario requires the use of `atMostOnePicklePerTag`, they won't
+be executed in parallel, which is:
+
+- Not realistic for the production environment.
+- Not efficient for the test execution: the duration of tests will suffer.
+- A proof that the test is not following **Rule #1**.
+
+Ensuring this rule may be hard, and idempotent scenarios are not always
+possible. Solutions exist:
+
+- Pre-create the resources required for the tests before they start.
+- Use `Before` / `After` hooks to setup your environment once.
+  Concurrency can be controlled with lock files.
+- Make your assertions more flexible: strict and absolute checks are good
+  for unit or functional testing, but in integration tests, prefer using
+  relative checks.
+- As a last resort, we might have a dedicated test suite.
+
+## 5. Focus on validating features.
+
+We only want to assert against externally visible state, as given in the
+feature requirements, and not rely on internal state or implementation
+details.
+
+It is tempting to write scenarios that test more than what is needed
+at the acceptance testing layer: use unit and functional tests to fully
+validate the behavior of each component; and end-to-end tests only to ensure
+that the product-level feature is working as expected per the requirements.
+Making good use of unit and functional tests makes testing all cases, including
+corner cases or error handling, much easier, leading to higher quality; and
+improves velocity by having a much faster feedback loop.
+
+## 6. Ensure assertions are idempotent after sleep times.
+
+Some tests might need to "sleep" for some time, either static or using
+more complex logic. The subsequent assertions must take into account
+any operation on the platform that might be asynchronous and take place
+during the sleep time.
+
+## 7. Avoid using the @Flaky tag.
+
+Whenever there is a flaky, there is an issue : either a bug (corner case or
+race condition) in the product, or simply an issue in the test. So the
+issue should be fixed either way. More often than not, retrying the build
+(i.e. ignoring the flaky) or automating the retry with @Flaky is just
+masking a bug, and leaving it in production instead of fixing it : so it
+must not be done. One frequent problem in flaky tests is that they are not
+idempotent, or that we did not do our job correctly to understand the
+problem. Flaky tests must be investigated, and issues in the tests must be
+fixed to ensure we don't ignore an actual problem in the product.
+
+## 8. Teardown resources.
+
+If a test creates resources, ensure they are deleted at the end of the test.
+Do not delete the resources in case of error when debugging mode is enabled,
+to ease debugging.
+
+Tests requiring to run at the very end of a parallel test suite with a specific
+naming (e.g., `zzz.*`) should be removed. Prefer using `After` hooks with
+specific tags.
+
+## 9. Validate your test before merging.
+
+If a test passes once, ensure it passes for the good reasons.
+False positives are hard to detect once merged.
diff --git a/tests/ctst/common/common.ts b/tests/ctst/common/common.ts
@@ -377,8 +377,9 @@ When('the user tries to perform the current S3 action on the bucket {int} times
                 this.addToSaved('copyObject', `objectrepeatcopy-${Utils.randomString()}`);
             }
             await runActionAgainstBucket(this, this.getSaved<ActionPermissionsType>('currentAction').action);
-            if (this.getResult().err) {
+            if (this.getResult().err && this.getResult().retryable?.throttling !== true) {
                 this.logger.debug('Error during repeated action', { error: this.getResult().err });
+                break;
             }
             await Utils.sleep(delay);
         }

diff --git a/tests/ctst/common/hooks.ts b/tests/ctst/common/hooks.ts
@@ -18,7 +18,7 @@ import {
 process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';
 
 const { atMostOnePicklePerTag } = parallelCanAssignHelpers;
-const noParallelRun = atMostOnePicklePerTag(['@AfterAll', '@PRA', '@ColdStorage', '@Utilization']);
+const noParallelRun = atMostOnePicklePerTag(['@AfterAll', '@PRA', '@ColdStorage']);
 
 setParallelCanAssign(noParallelRun);
 

diff --git a/tests/ctst/features/CountItems/CountItems.feature b/tests/ctst/features/CountItems/CountItems.feature
@@ -5,6 +5,7 @@ Feature: CountItems measures the utilization metrics
 @PreMerge
 @CronJob
 @CountItems
+@Utilization
 Scenario Outline: Countitems runs without error and compute utilization metrics
     Given an existing bucket "" "without" versioning, "without" ObjectLock "without" retention mode
     And an object "" that "exists"

diff --git a/tests/ctst/features/quotas/Quotas.feature b/tests/ctst/features/quotas/Quotas.feature
@@ -142,11 +142,9 @@ Feature: Quota Management for APIs
         Given an action "DeleteObject"
         And a permission to perform the "PutObject" action
         And a STORAGE_MANAGER type
-        And a bucket quota set to 1000 B
-        And an account quota set to 1000 B
-        And an upload size of 1000 B for the object "obj-1"
         And a bucket quota set to <bucketQuota> B
         And an account quota set to <accountQuota> B
+        And an upload size of <uploadSize> B for the object "obj-1"
         And a <userType> type
         And an environment setup for the API
         And an "existing" IAM Policy that "applies" with "ALLOW" effect for the current API

diff --git a/tests/ctst/package.json b/tests/ctst/package.json
@@ -24,7 +24,7 @@
     "@aws-sdk/client-s3": "^3.583.0",
     "@aws-sdk/client-sts": "^3.583.0",
     "@eslint/compat": "^1.1.1",
-    "cli-testing": "github:scality/cli-testing.git#1.2.3",
+    "cli-testing": "github:scality/cli-testing.git#1.2.4",
     "eslint": "^9.9.1",
     "eslint-config-scality": "scality/Guidelines#8.3.0",
     "typescript-eslint": "^8.4.0"

diff --git a/tests/ctst/steps/utils/kubernetes.ts b/tests/ctst/steps/utils/kubernetes.ts
@@ -1,6 +1,5 @@
 import fs from 'fs';
 import * as path from 'path';
-import lockFile from 'proper-lockfile';
 import { KubernetesHelper, Utils } from 'cli-testing';
 import Zenko from 'world/Zenko';
 import {
@@ -83,24 +82,27 @@ export async function createJobAndWaitForCompletion(
     const watchClient = createKubeWatchClient(world);
 
     const lockFilePath = path.join('/tmp', `${jobName}.lock`);
-    let releaseLock: (() => Promise<void>) | false = false;
 
-    if (!fs.existsSync(lockFilePath)) {
-        fs.writeFileSync(lockFilePath, '');
+    let lockAquired = false;
+    let tries = 600;
+    while (!lockAquired && tries > 0) {
+        try {
+            fs.writeFileSync(lockFilePath, 'lock', {
+                flag: 'wx',
+            });
+            lockAquired = true;
+        } catch {
+            world.logger.debug(`Failed to acquire lock for job: ${jobName}`, {
+                tries,
+            });            
+        }
+        tries--;
+        if (!lockAquired) {
+            await Utils.sleep(1000);
+        }
     }
 
     try {
-        releaseLock = await lockFile.lock(lockFilePath, {
-            // Expect the jobs in the queue does not take more than 5 minutes to complete
-            stale: 10 * 60 * 1000,
-            // use a linear backoff strategy
-            retries: {
-                retries: 610,
-                factor: 1,
-                minTimeout: 1000,
-                maxTimeout: 1000,
-            },
-        });
         world.logger.debug(`Acquired lock for job: ${jobName}`);
 
         // Read the cron job and prepare the job spec
@@ -159,11 +161,7 @@ export async function createJobAndWaitForCompletion(
         });
         throw err;
     } finally {
-        // Ensure the lock is released
-        if (releaseLock) {
-            await releaseLock();
-            world.logger.debug(`Released lock for job: ${jobName}`);
-        }
+        fs.unlinkSync(lockFilePath);
     }
 }
 

diff --git a/tests/ctst/yarn.lock b/tests/ctst/yarn.lock
@@ -4527,9 +4527,9 @@ cli-table3@^0.6.0:
   optionalDependencies:
     "@colors/colors" "1.5.0"
 
-"cli-testing@github:scality/cli-testing.git#1.2.3":
-  version "1.2.3"
-  resolved "git+ssh://[email protected]/scality/cli-testing.git#d6712d412dd7cd56cdf89a812203637d1ebab210"
+"cli-testing@github:scality/cli-testing.git#1.2.4":
+  version "1.2.4"
+  resolved "git+ssh://[email protected]/scality/cli-testing.git#687c3bdb5d684c5de4082a5dd9f9c00e94ec104c"
   dependencies:
     "@aws-crypto/sha256-universal" "^5.2.0"
     "@aws-sdk/client-iam" "^3.637.0"