-
Notifications
You must be signed in to change notification settings - Fork 50
152 lines (144 loc) · 8.39 KB
/
production-heartbeat.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
name: production-heartbeat
on:
workflow_dispatch: # As per documentation, the colon is necessary even though no config is required.
schedule:
# Cron syntax is "minute[0-59] hour[0-23] date[1-31] month[1-12] day[0-6]". '*' is 'any value', and multiple values
# can be specified with comma-separated lists. All times are UTC.
# So this expression means "run at 45 minutes past 1, 5, and 9 AM/PM UTC". The hours were chosen so that
# the jobs run only close to business hours of Central Time.
# Days were chosen to run only from Monday through Friday.
- cron: '45 13,17,21 * * 1,2,3,4,5'
jobs:
production-heartbeat:
strategy:
# By default, if any job in a matrix fails, all other jobs are immediately cancelled. This makes the jobs run to completion instead.
fail-fast: false
matrix:
os: [{vm: ubuntu-latest, exe: .sh}, {vm: windows-2019, exe: .cmd}]
node: ['lts/*']
runs-on: ${{ matrix.os.vm }}
timeout-minutes: 60
steps:
# === Setup. We need to get the code, set up nodejs, and create the results directory. ===
- uses: actions/checkout@v4
with:
ref: 'release'
- uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node }}
- uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '11'
- run: mkdir smoke-test-results
# === Set our environment variables, either using default values or the repo's secrets ===
- name: Set environment variables
id: env_var_setup
# We'll want to use bash for this, to avoid any cross-platform shenanigans
shell: bash
run: |
# In the following script, the use of the `echo "name=value" >> $GITHUB_ENV` structure is used to set/update
# environment variables. Such updates are visible to all subsequent steps.
#
# If the CLI_VERSION repo secret is set, we want to install that version ofsf-cli, so we set an environment
# variable. Otherwise, we leave the environment variable unset, so it implicitly defaults to `latest`.
# Note: This can be used to intentionally fail the GHA by providing an invalid version number.
if [[ -n "${{ secrets.CLI_VERSION }}" ]]; then
echo "CLI_VERSION=@${{ secrets.CLI_VERSION}}" >> $GITHUB_ENV
fi
# If the SCANNER_VERSION repo secret is set, we want to install that version of sfdx-scanner, so we set an
# environment variable. Otherwise, we leave the environment variable unset, so it implicitly defaults to `latest`.
# Note: This can be used to intentionally fail the GHA by providing an invalid version number.
if [[ -n "${{ secrets.SCANNER_VERSION }}" ]]; then
echo "SCANNER_VERSION=@${{ secrets.SCANNER_VERSION }}" >> $GITHUB_ENV
fi
# If the FAIL_SMOKE_TESTS repo secret is set to ANY value, we should respond by deleting the `test/test-jars`
# folder. The smoke tests expect this folder's contents to exist, so an invocation of `scanner:rule:add` should
# fail, thereby failing the smoke tests as a whole.
# Note: This serves no purpose aside from providing a way to simulate a smoke test failure.
if [[ -n "${{ secrets.FAIL_SMOKE_TESTS }}" ]]; then
rm -rf ./test/test-jars
fi
# === Make three attempts to install SF through npm ===
- name: Install SF
id: sf_install
# If the first attempt fails, wait a minute and try again. After a second failure, wait 5 minutes then try again. Then give up.
# Set an output parameter, `retry_count`, indicating the number of retry attempts that were made.
run: |
(echo "::set-output name=retry_count::0" && npm install -g @salesforce/cli${{ env.CLI_VERSION }}) ||
(echo "::set-output name=retry_count::1" && sleep 60 && npm install -g @salesforce/cli${{ env.CLI_VERSION }}) ||
(echo "::set-output name=retry_count::2" && sleep 300 && npm install -g @salesforce/cli${{ env.CLI_VERSION }})
# === Make three attempts to install the scanner plugin through sf ===
- name: Install Scanner Plugin
id: scanner_install
# If the first attempt fails, wait a minute and try again. After a second failure, wait 5 minutes then try again. Then give up.
# Set an output parameter, `retry_count`, indicating the number of retry attempts that were made.
run: |
(echo "::set-output name=retry_count::0" && sf plugins install @salesforce/sfdx-scanner${{ env.SCANNER_VERSION }}) ||
(echo "::set-output name=retry_count::1" && sleep 60 && sf plugins install @salesforce/sfdx-scanner${{ env.SCANNER_VERSION }}) ||
(echo "::set-output name=retry_count::2" && sleep 300 && sf plugins install @salesforce/sfdx-scanner${{ env.SCANNER_VERSION }})
# === Log the installed plugins for easier debugging ===
- name: Log plugins
run: sf plugins
# === Attempt to execute the smoke tests ===
- name: Run smoke tests
id: smoke_tests
run: smoke-tests/smoke-test${{ matrix.os.exe }} sf
# === Upload the smoke-test-results folder as an artifact ===
- name: Upload smoke-test-results folder as artifact
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: smoke-test-results-${{ runner.os }}
path: smoke-test-results
# === Report any problems ===
- name: Report problems
# There are problems if any step failed or was skipped.
# Note that the `join()` call omits null values, so if any steps were skipped, they won't have a corresponding
# value in the string.
if: ${{ failure() || cancelled() }}
shell: bash
env:
# If we're here because steps failed or were skipped, then that's a critical problem. Otherwise it's a normal one.
# We can't use the `failure()` or `cancelled()` convenience methods outside of the `if` condition, hence the
# `contains()` calls.
IS_CRITICAL: ${{ contains(join(steps.*.outcome), 'failure') || contains(join(steps.*.outcome), 'skipped') }}
# Build the status strings for each step as environment variables to save space later. Null retry_count values
# will be replaced with `n/a` to maintain readability in the alert.
CLI_INSTALL_STATUS: ${{ steps.sf_install.outcome }} after ${{ steps.sf_install.outputs.retry_count || 'n/a' }} retries
SCANNER_INSTALL_STATUS: ${{ steps.scanner_install.outcome }} after ${{ steps.scanner_install.outputs.retry_count || 'n/a' }} retries
SMOKE_TESTS_STATUS: ${{ steps.smoke_tests.outcome }}
# A link to this run, so the PagerDuty assignee can quickly get here.
RUN_LINK: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
# GHA env-vars don't have robust conditional logic, so we'll use this if-else branch to define some bash env-vars.
if [[ ${{ env.IS_CRITICAL }} == true ]]; then
ALERT_SEV="critical"
ALERT_SUMMARY="Production heartbeat script failed on ${{ runner.os }}"
else
ALERT_SEV="info"
ALERT_SUMMARY="Production heartbeat script succeeded with retries on ${{ runner.os }}"
fi
# Define a helper function to create our POST request's data, to sidestep issues with nested quotations.
generate_post_data() {
# This is known as a HereDoc, and it lets us declare multi-line input ending when the specified limit string,
# in this case EOF, is encountered.
cat <<EOF
{"payload": {
"summary": "${ALERT_SUMMARY}",
"source": "Github Actions",
"severity": "${ALERT_SEV}",
"custom_details": "SF install: ${{ env.CLI_INSTALL_STATUS }}. Scanner install: ${{ env.SCANNER_INSTALL_STATUS }}. Smoke tests: ${{ env.SMOKE_TESTS_STATUS }}."
},
"links": [{
"href": "${{ env.RUN_LINK }}",
"text": "Link to action execution"
}],
"event_action": "trigger",
"dedup_key": "GH-HB-${{ matrix.os.vm }}-${{ matrix.node }}",
"routing_key": "${{ secrets.PAGERDUTY_HEARTBEAT_KEY }}"
}
EOF
}
# Make our POST request
curl --request POST --data "$(generate_post_data)" https://events.pagerduty.com/v2/enqueue