diff --git a/DC-SBP-SLES4SAP-sap-infra-monitoring b/DC-SBP-SLES4SAP-sap-infra-monitoring
index b5d9a163..df59b626 100644
--- a/DC-SBP-SLES4SAP-sap-infra-monitoring
+++ b/DC-SBP-SLES4SAP-sap-infra-monitoring
@@ -4,7 +4,7 @@ ADOC_TYPE="article"
 
 ADOC_POST="yes"
 
-ADOC_ATTRIBUTES="--attribute docdate=2022-02-15"
+ADOC_ATTRIBUTES="--attribute docdate=2023-09-29"
 
 # stylesheets
 STYLEROOT=/usr/share/xml/docbook/stylesheet/sbp
diff --git a/adoc/SAP-S4HA10-setup-simplemount-sle15.adoc b/adoc/SAP-S4HA10-setup-simplemount-sle15.adoc
index 668931c9..8b0c4dd1 100644
--- a/adoc/SAP-S4HA10-setup-simplemount-sle15.adoc
+++ b/adoc/SAP-S4HA10-setup-simplemount-sle15.adoc
@@ -625,7 +625,7 @@ For the ERS and ASCS instances, edit the instance profiles
 profile directory _/usr/sap/{mySid}/SYS/profile/_.
 
 Tell the `{sapStartSrv}` service to load the HA script connector library and to
-use the connector `{s4sClConnector3}`. On the other hand, please make sure the
+use the connector `{s4sClConnector3}`. On the other hand, make sure the
 feature _Autostart_ is *not* used.
 
 [subs="attributes"]
@@ -993,8 +993,10 @@ primitive rsc_sap_{mySID}_{myInstAscs} SAPInstance \
 ================================================
 The shown SAPInstance monitor timeout is a trade-off between fast recovery of
 the ASCS vs. resilience against sporadic temporary NFS issues. You may slightly
-increase it to fit your infrastructure.
-See also manual pages ocf_heartbeat_SAPInstance(7), ocf_heartbeat_IPAddr2(7) and ocf_suse_SAPStartSrv(7).
+increase it to fit your infrastructure. Consult your storage or NFS server
+documentation for appropriate timeout values.
+See also manual pages ocf_heartbeat_SAPInstance(7), ocf_heartbeat_IPAddr2(7) ocf_suse_SAPStartSrv(7)
+and nfs(5).
 
 .ASCS group
 ================================================
@@ -1046,8 +1048,10 @@ primitive rsc_sap_{mySID}_{myInstErs} SAPInstance \
 ================================================
 The shown SAPInstance monitor timeout is a trade-off between fast recovery of
 the ERS vs. resilience against sporadic temporary NFS issues. You may slightly
-increase it to fit your infrastructure.
-See also manual pages ocf_heartbeat_SAPInstance(7), ocf_heartbeat_IPAddr2(7) and ocf_suse_SAPStartSrv(7).
+increase it to fit your infrastructure. Consult your storage or NFS server
+documentation for appropriate timeout values.
+See also manual pages ocf_heartbeat_SAPInstance(7), ocf_heartbeat_IPAddr2(7) ocf_suse_SAPStartSrv(7)
+and nfs(5).
 
 .ERS group
 ================================================
@@ -2237,7 +2241,7 @@ Find below the Corosync configuration for one corosync ring. Ideally two rings w
 [subs="specialchars,attributes"]
 ----
 {my2nd1}:~ # cat /etc/corosync/corosync.conf
-# Read the corosync.conf.5 manual page
+# Please read the corosync.conf.5 manual page
 totem {
     version: 2
     secauth: on
diff --git a/adoc/SAP-S4HA10-setupguide-sle15.adoc b/adoc/SAP-S4HA10-setupguide-sle15.adoc
index 157ca35f..5660ec51 100644
--- a/adoc/SAP-S4HA10-setupguide-sle15.adoc
+++ b/adoc/SAP-S4HA10-setupguide-sle15.adoc
@@ -993,6 +993,14 @@ Verify the SBD cluster configuration and if needed, modify them as described.
 
 First, configure the resources for the file system, IP address and the {sap}
 instance. You need to adapt the parameters for your specific environment.
+The shown file system and SAPInstance monitor timeouts are a trade-off between
+fast recovery vs. resilience against sporadic temporary NFS issues. You may
+slightly increase it to fit your infrastructure.
+The SAPInstance timeout needs to be higher than the file system timeout.
+Consult your storage or NFS server documentation for appropriate
+timeout values.
+See also manual pages ocf_heartbeat_Filesystem(7), ocf_heartbeat_SAPInstance(7)
+and nfs(5).
 
 .ASCS primitive
 ================================================
@@ -1042,6 +1050,14 @@ As user _root_, type the following command:
 
 Second, configure the resources for the file system, IP address and the {sap}
 instance. You need to adapt the parameters for your specific environment.
+The shown file system and SAPInstance monitor timeouts are a trade-off between
+fast recovery versus resilience against sporadic temporary NFS issues. You may
+slightly increase it to fit your infrastructure.
+The SAPInstance timeout needs to be higher than the file system timeout.
+Consult your storage or NFS server documentation for appropriate
+timeout values.
+See also manual pages ocf_heartbeat_Filesystem(7), ocf_heartbeat_SAPInstance(7)
+and nfs(5).
 
 The specific parameter _IS_ERS=true_ must only be set for the ERS instance.
 
diff --git a/adoc/SLES4SAP-hana-sr-guide-perfopt-15-aws.adoc b/adoc/SLES4SAP-hana-sr-guide-perfopt-15-aws.adoc
index 3d5adb37..44ff51d8 100644
--- a/adoc/SLES4SAP-hana-sr-guide-perfopt-15-aws.adoc
+++ b/adoc/SLES4SAP-hana-sr-guide-perfopt-15-aws.adoc
@@ -1864,12 +1864,13 @@ _crm-saphana.txt_, and load it with the command:
 .Typical Resource Agent parameter settings for different scenarios
 [width="99%",cols="52%,16%,16%,16%",options="header",]
 |============================================================
-|Parameter |Performance Optimized |Cost Optimized |Multi-Tier
-|PREFER_SITE_TAKEOVER |true |false |false / true
-|AUTOMATED_REGISTER |false / true |false / true |false
-|DUPLICATE_PRIMARY_TIMEOUT |7200 |7200 |7200
+|Parameter |Performance Optimized |Cost Optimized |Multi-Tier |Multi-Target
+|PREFER_SITE_TAKEOVER |true |false |false / true |false / true
+|AUTOMATED_REGISTER |false / true |false / true |false |true / false
+|DUPLICATE_PRIMARY_TIMEOUT |7200 |7200 |7200 |7200
 |============================================================
 
+
 // TODO PRIO1: Check if all parameters in special DUPLICATE_PRIMARY_TIMEOUT
 // are explained well
 
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-alertmanager.adoc b/adoc/SLES4SAP-sap-infra-monitoring-alertmanager.adoc
new file mode 100644
index 00000000..295cffc7
--- /dev/null
+++ b/adoc/SLES4SAP-sap-infra-monitoring-alertmanager.adoc
@@ -0,0 +1,79 @@
+// Alertmanager adoc file
+// Please use the following line to implement each tagged content to the main document:
+// include::SLES4SAP-sap-infra-monitoring-alertmanager.adoc[tag=alert-XXXXX]
+
+// Alertmanager general
+# tag::alert-general[]
+===== Alertmanager
+
+The  https://prometheus.io/docs/alerting/latest/alertmanager/[Alertmanager] handles alerts sent by client applications such as the Prometheus or Loki server.
+It takes care of deduplicating, grouping, and routing them to the correct receiver integration such as email or PagerDuty. It also takes care of
+silencing and inhibition of alerts.
+# end::alert-general[]
+
+
+// Alertmanager Implementing
+# tag::alert-impl[]
+=== Alertmanager
+The Alertmanager package can be found in the PackageHub repository.
+The repository needs to be activated via the SUSEConnect command first, unless you have activated it in the previous steps already.
+
+
+[source]
+----
+SUSEConnect --product PackageHub/15.3/x86_64
+----
+
+Alertmanager can then be installed via the `zypper` command:
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+zypper in  golang-github-prometheus-alertmanager
+----
+
+
+Notification can be done to different receivers. A receivers can be simply be an email, chat systems, webhooks and more. 
+(for a complete list please take a look at the https://prometheus.io/docs/alerting/latest/configuration/#receiver[Alertmanager documentation]) +
+
+
+The example configuration below is using email for notification (receiver). +
+
+
+Edit the Alertmanager configuration file `/etc/alertmanager/config.yml` like below: +
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+global:
+  resolve_timeout: 5m
+  smtp_smarthost: '<mailserver>'
+  smtp_from: '<mail-address>'
+  smtp_auth_username: '<username>'
+  smtp_auth_password: '<passwd>'
+  smtp_require_tls: true
+
+route:
+  group_by: ['...']
+  group_wait: 10s
+  group_interval: 5m
+  repeat_interval: 4h
+  receiver: 'email'
+
+receivers:
+  - name: 'email'
+    email_configs:
+      - send_resolved: true
+        to: '<target mail-address>'
+        from: 'mail-address>'
+        headers:
+          From: <mail-address>
+          Subject: '{{ template "email.default.subject" . }}'
+          html: '{{ template "email.default.html" . }}'
+----
+
+
+[subs="attributes,specialchars,verbatim,quotes"]
+Start and enable the alertmanager service:
+----
+systemctl enable --now prometheus-alertmanager.service
+----
+
+# end::alert-impl[]
\ No newline at end of file
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-collectd.adoc b/adoc/SLES4SAP-sap-infra-monitoring-collectd.adoc
new file mode 100644
index 00000000..fd10637d
--- /dev/null
+++ b/adoc/SLES4SAP-sap-infra-monitoring-collectd.adoc
@@ -0,0 +1,106 @@
+// Collectd adoc file
+// Please use the following line to implement each tagged content to the main document:
+// include::SLES4SAP-sap-infra-monitoring-collectd.adoc[tag=collectd-XXXXX]
+
+// Collectd general
+# tag::collectd-general[]
+
+===== `collectd` - System information collection daemon
+https://collectd.org/[`collectd`] is a small daemon which collects system information periodically and provides mechanisms to store and monitor the values in a variety of ways.
+
+# end::collectd-general[]
+
+
+// Collectd implementing
+# tag::collectd-impl[]
+
+=== `collectd`
+
+The `collectd` packages can be installed from the SUSE repositories as well. For the example at hand, we have used a newer version from the openSUSE repository.
+
+Create a file `/etc/zypp/repos.d/server_monitoring.repo` and add the following content to it:
+[subs="attributes,specialchars,verbatim,quotes"]
+.Content for /etc/zypp/repos.d/server_monitoring.repo
+----
+[server_monitoring]
+name=Server Monitoring Software (SLE_15_SP3)
+type=rpm-md
+baseurl=https://download.opensuse.org/repositories/server:/monitoring/SLE_15_SP3/
+gpgcheck=1
+gpgkey=https://download.opensuse.org/repositories/server:/monitoring/SLE_15_SP3/repodata/repomd.xml.key
+enabled=1
+----
+
+Afterward refresh the repository metadata and install `collectd` and its plugins.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# zypper ref
+# zypper in collectd collectd-plugins-all
+----
+
+Now the `collectd` must be adapted to collect the information you want to get and export it in the format you need.
+For example, when looking for network latency, use the ping plugin and expose the data in a Prometheus format.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+.Configuration of collectd in /etc/collectd.conf (excerpts)
+----
+...
+LoadPlugin ping
+...
+<Plugin ping>
+        Host "10.162.63.254"
+        Interval 1.0
+        Timeout 0.9
+        TTL 255
+#       SourceAddress "1.2.3.4"
+#       AddressFamily "any"
+        Device "eth0"
+        MaxMissed -1
+</Plugin>
+...
+LoadPlugin write_prometheus
+...
+<Plugin write_prometheus>
+        Port "9103"
+</Plugin>
+...
+----
+
+Uncomment the `LoadPlugin` line and check the `<Plugin ping>` section in the file.
+
+Modify the `systemd` unit that `collectd` works as expected. First, create a copy from the system-provided service file.
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# cp /usr/lib/systemd/system/collectd.service /etc/systemd/system/collectd.service
+----
+
+Second, adapt this local copy.
+Add the required `CapabilityBoundingSet` parameters in our local copy `/etc/systemd/system/collectd.service`.
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+...
+# Here's a (incomplete) list of the plugins known capability requirements:
+#   ping            CAP_NET_RAW
+CapabilityBoundingSet=CAP_NET_RAW
+...
+----
+
+Activate the changes and start the `collectd` function.
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# systemctl daemon-reload
+# systemctl enable --now collectd
+----
+
+All `collectd` metrics are accessible at port 9103.
+
+With a quick test, you can see if the metrics can be scraped.
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# curl localhost:9103/metrics
+----
+// The offical project on github: https://github.com/collectd/collectd/
+
+
+# end::collectd-impl[]
\ No newline at end of file
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-docinfo.xml b/adoc/SLES4SAP-sap-infra-monitoring-docinfo.xml
index 996e66a1..c470e274 100644
--- a/adoc/SLES4SAP-sap-infra-monitoring-docinfo.xml
+++ b/adoc/SLES4SAP-sap-infra-monitoring-docinfo.xml
@@ -72,7 +72,7 @@
 <abstract>
         <para>
         This guide provides detailed information about how to install and customize
-        SUSE Linux Enterprise Server for SAP Applications to monitor hardware related metrics to provide insights that can help increase uptime of critical SAP applications.
+        SUSE Linux Enterprise Server for SAP Applications to monitor hardware-related metrics to provide insights that can help increase uptime of critical SAP applications.
         It is based on SUSE Linux Enterprise Server for SAP Applications 15 SP3.
         The concept however can also be used starting with SUSE Linux Enterprise Server for SAP Applications 15 SP1.
         </para>
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-grafana.adoc b/adoc/SLES4SAP-sap-infra-monitoring-grafana.adoc
new file mode 100644
index 00000000..e4a99fbc
--- /dev/null
+++ b/adoc/SLES4SAP-sap-infra-monitoring-grafana.adoc
@@ -0,0 +1,84 @@
+// Grafana adoc file
+// Please use the following line to implement each tagged content to the main document:
+// include::SLES4SAP-sap-infra-monitoring-grafana.adoc[tag=grafana-XXXXX]
+
+// Grafana general
+# tag::grafana-general[]
+
+===== Grafana
+
+https://grafana.com/oss/grafana/[Grafana] is an open source visualization and analytics platform.
+Grafana's plug-in architecture allows interaction with a variety of data sources without creating data copies.
+Its graphical browser-based user interface visualizes the data through highly customizable views, providing an interactive diagnostic workspace.
+
+Grafana can display metrics data from Prometheus and log data from Loki side-by-side, correlating events from log files with metrics.
+This can provide helpful insights when trying to identify the cause for an issue.
+Also, Grafana can trigger alerts based on metrics or log entries, and thus help identify potential issues early.
+
+# end::grafana-general[]
+
+
+// Grafana implementing
+# tag::grafana-impl[]
+
+=== Grafana
+
+The Grafana RPM packages can be found in the PackageHub repository.
+The repository has to be activated via the `SUSEConnect` command first, unless you have activated it in the previous steps already.
+----
+# SUSEConnect --product PackageHub/15.3/x86_64
+----
+
+Grafana can then be installed via `zypper` command:
+----
+# zypper in grafana
+----
+
+
+Start and enable the Grafana server service:
+----
+# systemctl enable --now grafana-server.service
+----
+
+
+Now connect from a browser to your Grafana instance and log in:
+
+image::sap-infra-monitoring-grafana-login.png[Grafana Login page,scaledwidth=80%,title="Grafana welcome page"]
+
+==== Grafana data sources
+After the login, the data source must be added. On the right hand there is a wheel where a new data source can be added.
+
+image::sap-infra-monitoring-grafana-datasource-add.png[Grafana add a new data source,scaledwidth=80%,title="Adding a new Grafana data source"]
+
+Add a data source for the Prometheus service.
+
+.Prometheus example
+image::sap-infra-monitoring-grafana-data-prometheus.png[Prometheus data source,scaledwidth=80%,title="Grafana data source for Prometheus DB"]
+
+Also add a data source for Loki.
+
+.Loki example
+image::sap-infra-monitoring-grafana-data-loki.png[Loki data source,scaledwidth=80%,title="Grafana data source for LOKI DB"]
+
+Now Grafana can access both the metrics stored in Prometheus and the log data collected by Loki, to visualize them.
+
+==== Grafana dashboards
+
+Dashboards are how Grafana presents information to the user.
+Prepared dashboards can be downloaded from https://grafana.com/dashboards, or imported using the Grafana ID.
+
+.Grafana dashboard import
+image::sap-infra-monitoring-grafana-dashboards.png[Dashboard overview,scaledwidth=80%,title="Grafana dashboard import option"]
+
+The dashboards can also be created from scratch. Information from all data sources can be merged into one dashboard.
+
+image::sap-infra-monitoring-grafana-dashboard-new.png[Dashboard create a new dashboard,scaledwidth=80%,title="Build your own dashboard"]
+
+==== Putting it all together
+The picture below shows a dashboard displaying detailed information about the SAP HANA cluster, orchestrated by *pacemaker*.
+
+.Dashboard example for SAP HANA
+image::sap-infra-monitoring-grafana-hana-cluster.png[SUSE HANA cluster dashboard example,scaledwidth=80%,title="SUSE cluster exporter dashboard"]
+
+
+# end::grafana-impl[]
\ No newline at end of file
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-ipmi.adoc b/adoc/SLES4SAP-sap-infra-monitoring-ipmi.adoc
new file mode 100644
index 00000000..b2400173
--- /dev/null
+++ b/adoc/SLES4SAP-sap-infra-monitoring-ipmi.adoc
@@ -0,0 +1,105 @@
+// IPMI adoc file
+// Please use the following line to implement each tagged content to the main document:
+// include::SLES4SAP-sap-infra-monitoring-ipmi.adoc[tag=ipmi-XXXXX]
+
+// IPMI general
+# tag::ipmi-general[]
+
+===== Prometheus IPMI Exporter
+The https://github.com/prometheus-community/ipmi_exporter[Prometheus IPMI Exporter] supports both
+
+* the regular /metrics endpoint for Prometheus, exposing metrics from the host that the exporter is running on,
+* and an /ipmi endpoint that supports IPMI over RMCP.
+
+One exporter instance running on one host can be used to monitor a large number of IPMI interfaces by passing the target parameter to a scrape.
+
+# end::ipmi-general[]
+
+
+// IPMI implementing
+# tag::ipmi-impl[]
+
+
+=== Prometheus IPMI Exporter
+
+The IPMI exporter can be used to scrape information like temperature, power supply information and fan information.
+
+Create a directory, download and extract the IPMI exporter.
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# mkdir ipmi_exporter
+# cd ipmi_exporter
+# curl -OL https://github.com/prometheus-community/ipmi_exporter/releases/download/v1.4.0/ipmi_exporter-1.4.0.linux-amd64.tar.gz
+# tar xzvf ipmi_exporter-1.4.0.linux-amd64.tar.gz
+----
+
+NOTE: We have been using the version 1.4.0 of the IPMI exporter. For a different release, the URL used in the `curl` command above needs to be adapted.
+      Current releases can be found at the https://github.com/prometheus-community/ipmi_exporter[IPMI exporter GitHub repository].
+
+
+Some additional packages are required and need to be installed.
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# zypper in freeipmi libipmimonitoring6 monitoring-plugins-ipmi-sensor1
+----
+
+To start the IPMI exporter on the observed host, first start a new `screen` session, and then start the exporter.footnote:[Starting the IPMI exporter should really be done by creating a systemd unit.]
+// TODO: replace use of screen by a systemd unit for the IPMI exporter
+[subs="attributes,specialchars,verbatim,quotes"]
+.Starting IPMI
+----
+screen -S ipmi
+# cd ipmi_exporter-1.4.0.linux-amd64
+# ./ipmi_exporter
+----
+The IPMI exporter binary `ipmi_exporter` has been started in a screen session which can be detached (type `Ctrl+a d`).
+This lets the exporter continue running in the background.
+
+==== IPMI Exporter Systemd Service File 
+
+A more convenient and secure way to start the IPMI exporter is using a systemd service.
+To do so a service unit file has to be created under /etc/systemd/system/:
+
+[subs="attributes,specialchars,verbatim,quotes"]
+.Copy IPMI binary
+----
+cp ipmi_exporter-1.4.0.linux-amd64 /usr/local/bin/
+----
+
+[source]
+----
+# cat /etc/systemd/system/ipmi-exporter.service
+[Unit]
+Description=IPMI exporter
+Documentation=
+[Service]
+Type=simple
+Restart=no
+ExecStart=/usr/local/bin/ipmi_exporter-1.4.0.linux-amd64
+[Install]
+WantedBy=multi-user.target
+----
+
+The "systemd" needs to be informed about the new unit:
+
+.reload the systemd daemon
+[source]
+----
+# systemctl daemon-reload 
+----
+
+And finally enabled and started:
+
+.Start ipmi exporter 
+[source]
+----
+# systemctl enable --now ipmi-exporter.service 
+----
+
+
+The metrics of the ipmi_exporter are accessible port 9290.
+
+//accessing the remote configured ipmi metrics: http://ls3331:9290/ipmi?target=ls3316r&module=remote
+
+
+# end::ipmi-impl[]
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-loki.adoc b/adoc/SLES4SAP-sap-infra-monitoring-loki.adoc
new file mode 100644
index 00000000..66538a86
--- /dev/null
+++ b/adoc/SLES4SAP-sap-infra-monitoring-loki.adoc
@@ -0,0 +1,123 @@
+// Loki adoc file
+// Please use the following line to implement each tagged content to the main document:
+// include::SLES4SAP-sap-infra-monitoring-loki.adoc[tag=loki-XXXXX]
+
+// Loki general
+# tag::loki-general[]
+
+===== Loki
+
+https://grafana.com/oss/loki/[Loki] is a log aggregation system, inspired by Prometheus and designed to be cost effective and easy to operate.
+Unlike other logging systems, Loki is built around the idea of only indexing a set of metadata (labels) for logs and leaving the original log message unindexed.
+Log data itself is then compressed and stored in chunks in object stores, for example locally on the file system.
+A small index and highly compressed chunks simplify the operation and significantly lower the cost of Loki.
+
+# end::loki-general[]
+
+
+
+// Loki implementing
+# tag::loki-impl[]
+
+=== Loki
+The Loki RPM packages can be found in the PackageHub repository.
+The repository needs to be activated via the SUSEConnect command first, unless you have activated it in the previous steps already.
+----
+# SUSEConnect --product PackageHub/15.3/x86_64
+----
+
+Loki can then be installed via the `zypper` command:
+----
+# zypper in loki
+----
+
+Edit the Loki configuration file `/etc/loki/loki.yaml` and change the following lines:
+[source]
+----
+chunk_store_config:
+  max_look_back_period: 240h
+
+table_manager:
+  retention_deletes_enabled: true
+  retention_period: 240h
+----
+
+Start and enable Loki service:
+----
+# systemctl enable --now loki.service
+----
+
+# end::loki-impl[]
+
+
+
+// Loki practical usecases
+# tag::loki-alert[]
+
+==== Loki alerts
+Loki supports Prometheus-compatible alerting rules. They are following the same syntax, except they use LogQL for their expressions.
+To activate alerting the loki config needs a component called ruler:
+
+.loki.yaml
+[source]
+-----
+# Loki defaults to running in multi-tenant mode.
+# Multi-tenant mode is set in the configuration with:
+# auth_enabled: false
+# When configured with "auth_enabled: false", Loki uses a single tenant.
+# The single tenant ID will be the string fake.
+auth_enabled: false
+[...]
+
+ruler:
+  wal:
+    dir: /loki/ruler-wal
+  storage:
+    type: local
+    local:
+      directory: /etc/loki/rules
+  rule_path: /tmp/loki-rules-scratch
+  alertmanager_url: http://alertmanager:9093
+  enable_alertmanager_v2: true
+-----
+
+Depending on the given directory path in our eample above, the rule file has to be stored under:
+
+ /etc/loki/rules/fake/rules.yml
+
+NOTE: We are using `auth_enabled: false` and therefor the default tenant ID is `fake` which needs to be add
+      to the path the rules are stored.
+
+The example rule below will trigger a mail (vial alertmanager configuration) if the password failed after accessing via ssh.
+The log line looks like the following:
+
+ 2023-07-19T10:41:38.076428+02:00 nuc5 sshd[16723]: Failed password for invalid user charly from 192.168.1.201 port 58831 ssh2
+
+.rules.yml
+[source]
+----
+groups:
+    - name: accessLog
+      rules:
+        - alert: Failed_user_found
+          expr: 'sum(
+                   count_over_time(
+                     {filename="/var/log/messages" }
+                       |= "Failed password for"
+                       | pattern `<day>T<time> <host> <unit>: <_> <_> <_> <_> <_> <user> <_> <ip> <_> <port>`
+                       [10m]
+                   )
+                 ) by (day, time, host, unit, user, ip, port)'
+          for: 1m
+          labels:
+            alertname: AccessFailed
+          annotations:
+            description: "There was a failed password message!"
+            title: "Loki Alert - Failed Password!"
+
+----
+
+image::sap_infra-monitoring-deep_dive-loki01.png[Using Loki Alert,scaledwidth=80%,title="Loki Alert Message"]
+
+
+# end::loki-alert[]
\ No newline at end of file
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-nodeexporter.adoc b/adoc/SLES4SAP-sap-infra-monitoring-nodeexporter.adoc
new file mode 100644
index 00000000..ef825333
--- /dev/null
+++ b/adoc/SLES4SAP-sap-infra-monitoring-nodeexporter.adoc
@@ -0,0 +1,457 @@
+// Node exporter adoc file
+// Please use the following line to implement each tagged content to the main document:
+// include::SLES4SAP-sap-infra-monitoring-nodeexporter.adoc[tag=nodeexporter-XXXXX]
+
+// Prometheus Node Exporter general
+# tag::nodeexporter-general[]
+
+===== Prometheus Node Exporter
+The https://github.com/prometheus/node_exporter[Prometheus Node Exporter] is an exporter for hardware and OS metrics exposed by *NIX kernels.
+It is written in Go with pluggable metric collectors.
+
+# end::nodeexporter-general[]
+
+
+// Prometheus Node Exporter implementing
+# tag::nodeexporter-impl[]
+
+=== Node exporter
+
+The `prometheus-node_exporter` can be installed directly from the SUSE repository.
+It is part of {sles} and all derived products.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# zypper -n in golang-github-prometheus-node_exporter
+----
+
+Start and enable the node exporter for automatic start at system boot.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# systemctl enable --now prometheus-node_exporter
+----
+
+[TIP]
+====
+To check if the exporter is running, you can use the following commands:
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# systemctl status prometheus-node_exporter
+# ss -tulpan |grep exporter
+----
+====
+
+Configure the node exporter depending on your needs. Arguments to be passed to `prometheus-node_exporter` can be provided in the configuration file `/etc/sysconfig/prometheus-node_exporter`, for example to modify which metrics the `node_exporter` will collect and expose.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+.Arguments provided in /etc/sysconfig/prometheus-node_exporter
+----
+...
+ARGS="--collector.systemd --no-collector.mdadm --collector.ksmd --no-collector.rapl --collector.meminfo_numa --no-collector.zfs --no-collector.udp_queues --no-collector.softnet --no-collector.sockstat --no-collector.nfsd --no-collector.netdev --no-collector.infiniband --no-collector.arp"
+...
+----
+By default, the node exporter is listening for incoming connections on port 9100.
+
+# end::nodeexporter-impl[]
+
+
+// === Extend Prometheus node_exporter function area
+# tag::node-extend[]
+[#id-extend-prometheus-node-exporter-function-area]
+==== Extend Prometheus node_exporter function area
+
+[discrete]
+[#id-use-the-collector-textfile-option-from-the-node-exporter]
+===== Use the collector.textfile option from the node_exporter
+The textfile.collector functionality of the Prometheus node_exporter is already activated by default.
+
+https://github.com/prometheus/node_exporter#textfile-collector
+
+The option must be activated in the node_exporter configuration file by add the path where the node_exporter has to look for `*.prom` files. The option is called `--collector.textfile.directory=”<path>”`.
+The content in this files have to be formated in a node_exporter consumable way.
+
+These information sources helped:
+
+* https://www.robustperception.io/using-the-textfile-collector-from-a-shell-script#more-4014
+* https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#metric-types
+
+//[subs="attributes,specialchars,verbatim,quotes"]
+[source]
+----
+# cat /etc/sysconfig/prometheus-node_exporter
+
+## Path: Network/Monitors/Prometheus/node_exporter
+## Description: Prometheus node exporter startup parameters
+## Type: string
+## Default: ''
+ARGS="--collector.systemd --no-collector.mdadm  --collector.textfile.directory="/var/lib/node_exporter/" --collector.meminfo_numa"
+----
+
+Finally, the node_exporter needs to be informed about the configuration changes and the directory must be created.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# systemctl restart prometheus-node_exporter.service
+# mdir -p /var/lib/node_exporter
+----
+
+[discrete]
+===== Example for disk information behind the RAID controller
+Information from the disks that are connected to the RAID controller are missing. All the important pieces of information like error counters or the status of the logical volumes are not accessible from the OS by default.
+The RAID controller provides the hard disks as a logical device in two parts. The logical volumes of the RAID controller are on the one hand a RAID1 volume for the operating system and a RAID6 volume
+for the data on the other hand. In SLES a /dev/sda and a /dev/sdb is shown. Perhaps due to the age of the hardware, it was not possible to read SMART data or query the status of the logical devices with native tools provided by the OS.
+The tool `amCLI` can exactly display the information we are looking for: detailed data about the RAID controller and all associated devices, at runtime. This tool is provided by the hardware vendor.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# # *overview*
+# amCLI -l
+...
+ 32/7: SAS Backplane
+ 32/11: Disk, 'TOSHIBA MBF2300RC (0)', 285568MB
+ 32/10: Disk, 'TOSHIBA MBF2300RC (1)', 285568MB
+ 32/9: Disk, 'TOSHIBA MBF2300RC (2)', 285568MB
+ 32/8: Disk, 'TOSHIBA MBF2300RC (3)', 285568MB
+ 32/15: Disk, 'TOSHIBA MBF2300RC (4)', 285568MB
+ 32/14: Disk, 'TOSHIBA MBF2300RC (5)', 285568MB
+ 32/13: Disk, 'TOSHIBA MBF2300RC (6)', 285568MB
+ 32/12: Disk, 'TOSHIBA MBF2300RC (7)', 285568MB
+ 32/2: Logical drive 0, 'LogicalDrive_0', RAID-1, 285568MB
+ 32/3: Logical drive 1, 'storage', RAID-6, 1142272MB
+ ...
+
+# # *detailed view*
+# amCLI -l 32/11
+32/11: Disk, 'TOSHIBA MBF2300RC (0)', 285568MB
+ Parents: 1
+ Children: -
+ Properties:
+ Port number: 3
+ Name: TOSHIBA MBF2300RC (0)
+ Vendor: TOSHIBA
+ Product: MBF2300RC
+ Type: SAS
+ Firmware version: 5208
+ Serial number: EB07PC305JS2
+ Transfer speed: 600 MB/s
+ Transfer width: 1 bit(s)
+ Rotational speed: 10 Krpm
+ Device number: 7
+ Slot: 0
+ SAS address 00: 0x50000393E8216D5E
+ Physical size: 286102 MB
+ Config. size: 285568 MB
+ Status: Operational
+...
+----
+
+The output of the `amCLI` must be written in a file so that the node_exporter can collect it. Later, the raw data can be process in the Prometheus server.
+The tool `amCLI` provides a different level of detail of the data depending on the options set, as shown above.
+
+//image::amcli-view-options.png[amCLI overview and detail view for the disks,scaledwidth=80%,title="amCLI disk status information"]
+
+The output must be prepared to be used later in Prometheus. Think about and decide what information wanted to use later and how it should be presented.
+In this example, two things has influenced the decision:
+
+* The first one a label set
+* The second one values that changes, like an error counter.
+
+The example picked values out of the `amCLI` output and defined them either as labels or as processable values. For queries where the labels were important, the output of a 0 or 1 as a value is defined.
+For the second case, it returns the value that the output provides.
+Using `awk` helped preparing the output of the `amCLI` in such a way that it end up with a metric that has a custom name on it (amcli_disk_information_summary). 
+The script is called `amcli.sh` and it is recommended to put this under `/usr/local/bin`. The file what is created should located at `/var/lib/node_exporter`. This directory must be created. 
+
+[source]
+----
+#!/bin/bash
+
+TEXTFILE_COLLECTOR_DIR=/var/lib/node_exporter/
+FILE=$TEXTFILE_COLLECTOR_DIR/amcli.prom
+TS=$(date +%s)
+
+{
+	diskinfo=amcli_disk_information_summary
+	echo "# HELP $diskinfo Physical Disk properties."
+	echo "# TYPE $diskinfo gauge"
+
+	PHYDisks=$(amCLI --list |sed -ne '/Disk,/{s/^\s*//;s/:.*$//;p}')
+	for disk in $PHYDisks; do
+		output=$(amCLI -l $disk \
+			| awk -v name=$disk -v ts=$TS 'BEGIN {
+				slot    = "";
+				vendor  = "";
+				product = "";
+				status  = "";
+				power_status  = "";
+				port_number   = "";
+				rotational_speed = "";
+			}{
+				if ($1 == "Vendor:")    { vendor  = $2; }
+				if ($1 == "Product:")   { product = $2; }
+				if ($1 == "Port" && $2 == "number:")       { port_number = $3; }
+				if ($1 == "Rotational") { rotational_speed = $3 $4; }
+				if ($1 == "Power" && $2 == "status:")      { power_status = $3; }
+				if ($1 == "Status:")    { status       = $2 $3 $4 $5; }
+				if ($1 == "Slot:")      { slot         = $2; }
+			} END {
+				printf ("amcli_disk_information_summary{name=\"%s\", vendor=\"%s\", product=\"%s\", port_number=\"%s\", rotational_speed=\"%s\", power_status=\"%s\", slot=\"%s\", status=\"%s\", ts=\"%s\" }\n",
+					name, vendor, product, port_number, rotational_speed, power_status, slot, status, ts);
+			}')
+		rc=$?
+		if [ $rc = 0 ]; then
+			stat=1
+		else
+			stat=0
+		fi
+		echo "$output $stat"
+	done
+
+} > "$FILE.$$"
+mv $FILE.$$ $FILE
+
+exit 0
+# End
+----
+Once the script was executed the content of the file with the name “amcli.prom” looked like this:
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# cat amcli.prom
+
+# HELP amcli_disk_information_summary Physical Disk properties.
+# TYPE amcli_disk_information_summary gauge
+amcli_disk_information_summary{name="32/11", vendor="TOSHIBA", product="MBF2300RC", port_number="3", rotational_speed="10Krpm", power_status="Active", slot="0", status="Operational", ts="1646052400" } 1
+amcli_disk_information_summary{name="32/10", vendor="TOSHIBA", product="MBF2300RC", port_number="2", rotational_speed="10Krpm", power_status="Active", slot="1", status="Operational", ts="1646052400" } 1
+amcli_disk_information_summary{name="32/9", vendor="TOSHIBA", product="MBF2300RC", port_number="1", rotational_speed="10Krpm", power_status="Active", slot="2", status="Operational", ts="1646052400" } 1
+amcli_disk_information_summary{name="32/8", vendor="TOSHIBA", product="MBF2300RC", port_number="0", rotational_speed="10Krpm", power_status="Active", slot="3", status="Operational", ts="1646052400" } 1
+amcli_disk_information_summary{name="32/15", vendor="TOSHIBA", product="MBF2300RC", port_number="7", rotational_speed="10Krpm", power_status="Active", slot="4", status="Operational", ts="1646052400" } 1
+amcli_disk_information_summary{name="32/14", vendor="TOSHIBA", product="MBF2300RC", port_number="6", rotational_speed="10Krpm", power_status="Active", slot="5", status="Operational", ts="1646052400" } 1
+amcli_disk_information_summary{name="32/13", vendor="TOSHIBA", product="MBF2300RC", port_number="5", rotational_speed="10Krpm", power_status="Active", slot="6", status="Operational", ts="1646052400" } 1
+amcli_disk_information_summary{name="32/12", vendor="TOSHIBA", product="MBF2300RC", port_number="4", rotational_speed="10Krpm", power_status="Active", slot="7", status="Operational", ts="1646052400" } 1
+----
+
+And the view from the node_exporter webui:
+
+image::amcli-disk-info.png[amCLI disk information, collected from `amCLI` and sorted by `awk`,scaledwidth=100%,title="amCLI basic disk information"]
+
+For the second case reused already existing labels from the disk information section. This help to be able to implement a mapping later. Therefore extended the script by a section like this:
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+...
+	diskmedia=amcli_disk_media_error
+	echo "# HELP $diskmedia Physical Disk Error Counter for Media."
+	echo "# TYPE $diskmedia counter"
+
+	diskmisc=amcli_disk_misc_error
+	echo "# HELP $diskmisc Physical Disk Error Counter for Misc."
+	echo "# TYPE $diskmisc counter"
+
+	disksmart=amcli_disk_smart_error
+	echo "# HELP $disksmart Physical Disk Error Counter for SMART."
+	echo "# TYPE $disksmart counter"
+
+	for disk in $(amCLI --list |sed -ne '/Disk,/{s/^\s*//;s/:.*$//;p}'); do
+		DISKmedia=$(amCLI -l $disk \
+			| awk -v name=$disk -v ts=$TS 'BEGIN {
+				slot = "";
+				port_number   = "";
+				serial_number = "";
+		}{
+		 if ($1 == "Port")       { port_number = $3; }
+		 if ($1 == "Status:")    { status = $2 $3 $4; }
+		 if ($1 == "Slot:")      { slot = $2; }
+		 if ($1 == "Media" && $2 == "errors:")  { media_error = $3; }
+		 if ($1 == "Misc" && $2 == "errors:")   { misc_error = $3; }
+		 if ($1 == "SMART" && $2 == "errors:")  { smart_error = $3; }
+		 if ($1 == "Serial" && $2 == "number:") { serial_number = $3; }
+		} END {
+			printf ("amcli_disk_media_error{name=\"%s\", port_number=\"%s\", serial_number=\"%s\", slot=\"%s\", ts=\"%s\" } %s\n",
+				name, port_number, serial_number, slot, ts, media_error);
+			printf ("amcli_disk_misc_error{name=\"%s\", port_number=\"%s\", serial_number=\"%s\", slot=\"%s\", ts=\"%s\" } %s\n",
+				name, port_number, serial_number, slot, ts, misc_error);
+			printf ("amcli_disk_smart_error{name=\"%s\", port_number=\"%s\", serial_number=\"%s\", slot=\"%s\", ts=\"%s\" } %s\n",
+				name, port_number, serial_number, slot, ts, smart_error);
+		}')
+		echo "$DISKmedia"
+	done
+...
+----
+
+After the script was executed again the contents of the file looked now like this:
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# cat amcli.prom
+
+# HELP amcli_disk_information_summary Physical Disk properties.
+# TYPE amcli_disk_information_summary gauge
+amcli_disk_information_summary{name="32/11", vendor="TOSHIBA", product="MBF2300RC", port_number="3", rotational_speed="10Krpm", power_status="Active", slot="0", status="Operational", ts="1646054157" } 1
+amcli_disk_information_summary{name="32/10", vendor="TOSHIBA", product="MBF2300RC", port_number="2", rotational_speed="10Krpm", power_status="Active", slot="1", status="Operational", ts="1646054157" } 1
+amcli_disk_information_summary{name="32/9", vendor="TOSHIBA", product="MBF2300RC", port_number="1", rotational_speed="10Krpm", power_status="Active", slot="2", status="Operational", ts="1646054157" } 1
+amcli_disk_information_summary{name="32/8", vendor="TOSHIBA", product="MBF2300RC", port_number="0", rotational_speed="10Krpm", power_status="Active", slot="3", status="Operational", ts="1646054157" } 1
+amcli_disk_information_summary{name="32/15", vendor="TOSHIBA", product="MBF2300RC", port_number="7", rotational_speed="10Krpm", power_status="Active", slot="4", status="Operational", ts="1646054157" } 1
+amcli_disk_information_summary{name="32/14", vendor="TOSHIBA", product="MBF2300RC", port_number="6", rotational_speed="10Krpm", power_status="Active", slot="5", status="Operational", ts="1646054157" } 1
+amcli_disk_information_summary{name="32/13", vendor="TOSHIBA", product="MBF2300RC", port_number="5", rotational_speed="10Krpm", power_status="Active", slot="6", status="Operational", ts="1646054157" } 1
+amcli_disk_information_summary{name="32/12", vendor="TOSHIBA", product="MBF2300RC", port_number="4", rotational_speed="10Krpm", power_status="Active", slot="7", status="Operational", ts="1646054157" } 1
+# HELP amcli_disk_media_error Physical Disk Error Counter for Media.
+# TYPE amcli_disk_media_error counter
+# HELP amcli_disk_misc_error Physical Disk Error Counter for Misc.
+# TYPE amcli_disk_misc_error counter
+# HELP amcli_disk_smart_error Physical Disk Error Counter for SMART.
+# TYPE amcli_disk_smart_error counter
+amcli_disk_media_error{name="32/11", port_number="3", serial_number="EB07PC305JS2", slot="0", ts="1646054157" } 0
+amcli_disk_misc_error{name="32/11", port_number="3", serial_number="EB07PC305JS2", slot="0", ts="1646054157" } 0
+amcli_disk_smart_error{name="32/11", port_number="3", serial_number="EB07PC305JS2", slot="0", ts="1646054157" } 0
+amcli_disk_media_error{name="32/10", port_number="2", serial_number="EB07PC305JUV", slot="1", ts="1646054157" } 0
+amcli_disk_misc_error{name="32/10", port_number="2", serial_number="EB07PC305JUV", slot="1", ts="1646054157" } 0
+amcli_disk_smart_error{name="32/10", port_number="2", serial_number="EB07PC305JUV", slot="1", ts="1646054157" } 0
+amcli_disk_media_error{name="32/9", port_number="1", serial_number="EB07PC305K2W", slot="2", ts="1646054157" } 0
+amcli_disk_misc_error{name="32/9", port_number="1", serial_number="EB07PC305K2W", slot="2", ts="1646054157" } 0
+amcli_disk_smart_error{name="32/9", port_number="1", serial_number="EB07PC305K2W", slot="2", ts="1646054157" } 0
+amcli_disk_media_error{name="32/8", port_number="0", serial_number="EB07PC305K5J", slot="3", ts="1646054157" } 0
+amcli_disk_misc_error{name="32/8", port_number="0", serial_number="EB07PC305K5J", slot="3", ts="1646054157" } 0
+amcli_disk_smart_error{name="32/8", port_number="0", serial_number="EB07PC305K5J", slot="3", ts="1646054157" } 0
+amcli_disk_media_error{name="32/15", port_number="7", serial_number="EB07PC305K96", slot="4", ts="1646054157" } 0
+amcli_disk_misc_error{name="32/15", port_number="7", serial_number="EB07PC305K96", slot="4", ts="1646054157" } 0
+amcli_disk_smart_error{name="32/15", port_number="7", serial_number="EB07PC305K96", slot="4", ts="1646054157" } 0
+amcli_disk_media_error{name="32/14", port_number="6", serial_number="EB07PC305JNS", slot="5", ts="1646054157" } 0
+amcli_disk_misc_error{name="32/14", port_number="6", serial_number="EB07PC305JNS", slot="5", ts="1646054157" } 0
+amcli_disk_smart_error{name="32/14", port_number="6", serial_number="EB07PC305JNS", slot="5", ts="1646054157" } 0
+amcli_disk_media_error{name="32/13", port_number="5", serial_number="EB07PC305JSC", slot="6", ts="1646054157" } 0
+amcli_disk_misc_error{name="32/13", port_number="5", serial_number="EB07PC305JSC", slot="6", ts="1646054157" } 0
+amcli_disk_smart_error{name="32/13", port_number="5", serial_number="EB07PC305JSC", slot="6", ts="1646054157" } 0
+amcli_disk_media_error{name="32/12", port_number="4", serial_number="EB07PC305JR7", slot="7", ts="1646054157" } 0
+amcli_disk_misc_error{name="32/12", port_number="4", serial_number="EB07PC305JR7", slot="7", ts="1646054157" } 0
+amcli_disk_smart_error{name="32/12", port_number="4", serial_number="EB07PC305JR7", slot="7", ts="1646054157" } 0
+----
+
+The view in the browser looks as expected:
+
+image::amcli-disk-counter.png[amCLI disk error counters,scaledwidth=100%,title="amCLI disk error counters"]
+
+Gathering everything that seems important, by using this method and extending the script. With the texfile.collector option of prometheus-node_exporter it is possible to gather all the information that where not accessible before.
+
+
+[discrete]
+===== Regular update of the file content
+For this task, “systemd.service” and “systemd.timer” can be used. Alternatively, this could also be realized by means of “cron”. The script needs executable permissions for this.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# chmod 750 amcli.sh
+----
+
+In “/etc/systemd/system/” create the timer and the service unit. The example will start with a timer calling the service every minute. With the 15sec scrap interval, the information in Prometheus is only updated every 4th interval.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# cat /etc/systemd/system/prometheus_amcli.timer
+
+ [Unit]
+ Description=Collecting RAID controller information
+ Documentation=man:amCLI
+
+[Timer]
+ OnCalendar=*-*-* *:*:00
+ Persistent=true
+ Unit=prometheus_amcli.service
+
+[Install]
+ WantedBy=multi-user.target
+
+----
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# cat /etc/systemd/system/prometheus_amcli.service
+
+[Unit]
+Description=Collecting RAID controller information
+Documentation=man:amCLI
+
+[Service]
+Type=simple
+Restart=no
+ExecStartPre=/usr/bin/rm -f /var/lib/node_exporter/amcli.prom
+ExecStart=/usr/local/bin/amcli.sh
+Nice=19
+
+[Install]
+WantedBy=multi-user.target
+----
+
+The “systemd” needs to be informed about the new units:
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# systemctl daemon-reload
+----
+
+Enable and start the monitoring extension for the node_exporter:
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# systemctl enable prometheus_amcli.timer
+
+Created symlink /etc/systemd/system/multi-user.target.wants/prometheus_amcli.timer → /etc/systemd/system/prometheus_amcli.timer
+
+# systemctl enable --now prometheus_amcli.timer
+----
+
+Check the status again briefly:
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# systemctl status prometheus_amcli
+
+● prometheus_amcli.service - Collecting RAID controller information
+ Loaded: loaded (/etc/systemd/system/prometheus_amcli.service; disabled; vendor preset: disabled)
+ Active: inactive (dead) since Mon 2022-02-28 07:52:07 CET; 2s ago
+ Docs: man:amCLI
+ Process: 4824 ExecStart=/usr/local/bin/amcli.sh (code=exited, status=0/SUCCESS)
+ Main PID: 4824 (code=exited, status=0/SUCCESS)
+
+Feb 28 07:52:03 fscs99 systemd[1]: Started Collecting RAID controller information.
+----
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# systemctl status prometheus_amcli.timer
+
+● prometheus_amcli.timer - Collecting RAID controller information
+ Loaded: loaded (/etc/systemd/system/prometheus_amcli.timer; enabled; vendor preset: disabled)
+ Active: active (waiting) since Mon 2022-02-28 07:30:24 CET; 21min ago
+ Trigger: Mon 2022-02-28 07:53:00 CET; 44s left
+ Docs: man:amCLI
+
+Feb 28 07:30:24 fscs99 systemd[1]: Stopping Collecting RAID controller information.
+Feb 28 07:30:24 fscs99 systemd[1]: Started Collecting RAID controller information.
+----
+
+The data is now retrieved every minute with the script `amcli.sh` and the output is redirected to a file `amcli.prom` that the Prometheus node_exporter can process.
+The result of our work now looks like this:
+
+image::amcli-newdata.png[amCLI disk information collection,scaledwidth=100%,title="amCLI disk information collection"]
+
+[discrete]
+===== Query the data from Prometheus
+With the metrics accessible from Prometheus, a rule can be built to set up an alert trigger. Starting with built the metrics using the 
+Prometheus Web UI.
+
+image::prometheus-alertmanager-metrics.png[amCLI disk metrics processed by Prometheus,scaledwidth=100%,title="amCLI basic disk metrics in Prometheus"]
+
+Once the query is complete and provides the desired result, include these metrics in our Prometheus rule file.
+
+# end::node-extend[]
+
+# tag::script_export[]
+==== Use a script_exporter to expose data
+
+[discrete]
+===== ???? Use a script_exporter to expose data
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+----
+
+//image::amcli-disk-info.png[amCLI disk information, collected from `amCLI` and sorted by `awk`,scaledwidth=80%,title="amCLI basic disk information"]
+
+# end::script_export[]
\ No newline at end of file
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-pcm.adoc b/adoc/SLES4SAP-sap-infra-monitoring-pcm.adoc
new file mode 100644
index 00000000..e737f0ba
--- /dev/null
+++ b/adoc/SLES4SAP-sap-infra-monitoring-pcm.adoc
@@ -0,0 +1,104 @@
+// PCM adoc file
+// Please use the following line to implement each tagged content to the main document:
+// include::SLES4SAP-sap-infra-monitoring-pcm.adoc[tag=pcm-XXXXX]
+
+// PCM general
+# tag::pcm-general[]
+
+===== Processor Counter Monitor (PCM)
+
+https://github.com/opcm/pcm[Processor Counter Monitor (PCM)] is an application programming interface (API) and a set of tools based on the API to monitor performance and energy metrics of Intel{reg} Core{tm}, Xeon{reg}, Atom{tm} and Xeon Phi{tm} processors.
+PCM works on Linux, Windows, macOS X, FreeBSD and DragonFlyBSD operating systems.
+
+# end::pcm-general[]
+
+
+// PCM implementing
+# tag::pcm-impl[]
+
+=== Processor Counter Monitor (PCM)
+
+Processor Counter Monitor (PCM) can be installed from its GitHub project pages.
+
+Make sure the required tools are installed for building.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+.Installing PCM from source
+----
+# zypper in -y git cmake gcc-c++
+----
+
+Clone the Git repository and build the tool using the following commands.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+.Installing PCM from source
+----
+# git clone https://github.com/opcm/pcm.git
+# cd pcm
+# mkdir build
+# cd build
+# cmake ..
+# cmake --build .
+# cd bin
+----
+
+NOTE: Starting with SLES4SAP SP5 the PCM package is included.  
+
+
+To start PCM on the observed host, first start a new `screen` session, and then start PCM.footnote:[Starting PCM should really be done by creating a systemd unit.]
+// TODO: replace use of screen by a systemd unit for PCM
+[subs="attributes,specialchars,verbatim,quotes"]
+.Starting PCM
+----
+# screen -S pcm
+# ./pcm-sensor-server -d
+----
+
+The PCM sensor server binary `pcm-sensor-server` has been started in a screen session which can be detached (type `Ctrl+a d`).
+This lets the PCM sensor server continue running in the background.
+
+==== PCM Systemd Service File
+
+A more convenient and secure way to start pcm-sensor-server is using a systemd service.
+To do so a service unit file has to be created under /etc/systemd/system/:
+
+[subs="attributes,specialchars,verbatim,quotes"]
+.Copy PCM binary
+----
+cp pcm-sensor-server /usr/local/bin/
+----
+
+
+[source]
+----
+# cat /etc/systemd/system/pcm.service
+[Unit]
+Description=
+Documentation=/usr/share/doc/PCM
+[Service]
+Type=simple
+Restart=no
+ExecStart=/usr/local/bin/pcm-sensor-server 
+[Install]
+WantedBy=multi-user.target
+----
+
+The "systemd" needs to be informed about the new unit:
+
+.reload the systemd daemon
+[source]
+----
+# systemctl daemon-reload 
+----
+
+And finally enabled and started:
+
+.Start pcm 
+[source]
+----
+# systemctl enable --now pcm.service 
+----
+
+The PCM metrics can be queried from port 9738.
+
+# end::pcm-impl[]
\ No newline at end of file
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-prometheus.adoc b/adoc/SLES4SAP-sap-infra-monitoring-prometheus.adoc
new file mode 100644
index 00000000..513a4fe7
--- /dev/null
+++ b/adoc/SLES4SAP-sap-infra-monitoring-prometheus.adoc
@@ -0,0 +1,234 @@
+# tag::prom-general[]
+===== Prometheus
+https://prometheus.io[Prometheus] is an open source systems monitoring and alerting toolkit.
+It is storing time series data like metrics locally and runs rules over this data to aggregate and record new time series from existing data. Prometheus it also able to generate alerts.
+The project has a very active developer and user community.
+It is now a stand-alone open source project and maintained independently of any company.
+This makes it very attractive for SUSE as open source company and fits into our culture.
+To emphasize this, and to clarify the project's governance structure, Prometheus joined the Cloud Native Computing Foundation 5 years ago (2016).
+Prometheus works well for recording any purely numeric time series.
+
+Prometheus is designed for reliability. It is the system to go to during an outage as it allows you to quickly analyze a situation.
+Each Prometheus server is a stand-alone server, not depending on network storage or other remote services.
+You can rely on it when other parts of your infrastructure are broken, and you do not need to set up extensive infrastructure to use it.
+# end::prom-general[]
+
+# tag::prometheus-inst[]
+
+=== Prometheus Installation
+
+The Prometheus RPM packages can be found in the PackageHub repository.
+This repository needs to be activated via the `SUSEConnect` command first.
+
+[discrete]
+==== PackageHub
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# SUSEConnect --product PackageHub/15.3/x86_64
+----
+////
+[discrete]
+==== OBS repository
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+[server_monitoring]
+name=Server Monitoring Software (SLE_15_SP3)
+type=rpm-md
+baseurl=https://download.opensuse.org/repositories/server:/monitoring/SLE_15_SP3/
+gpgcheck=1
+gpgkey=https://download.opensuse.org/repositories/server:/monitoring/SLE_15_SP3/repodata/repomd.xml.key
+enabled=1
+----
+////
+
+Prometheus can then easily be installed using the `zypper` command:
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# zypper ref
+# zypper in golang-github-prometheus-prometheus
+----
+
+[discrete]
+==== Prometheus Configuration
+There are at least two configuration files which are important:
+
+* Prometheus systemd options `/etc/sysconfig/prometheus`
+* Prometheus server configuration `/etc/prometheus/prometheus.yml`
+
+[discrete]
+===== Systemd arguments
+In the `/etc/sysconfig/prometheus` we added the following arguments to extend the data retention,
+the storage location and the enable the configuration reload via web api.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# vi /etc/sysconfig/prometheus
+
+...
+ARGS="--storage.tsdb.retention.time=90d --storage.tsdb.path /var/lib/prometheus/ --web.enable-lifecycle"
+...
+----
+
+[discrete]
+===== Prometheus storage
+The storage in this example has a dedicated storage volume. In case changing the storage location to a
+different one or after the installation of Prometheus you must take care for the filesystem permissions.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# ls -ld /var/lib/promethe*
+
+drwx------ 9 prometheus prometheus 199 Nov 18 18:00 /var/lib/prometheus
+----
+
+[discrete]
+===== Prometheus configuration `prometheus.yml`
+Edit the Prometheus configuration file `/etc/prometheus/prometheus.yml` to include the scrape job configurations you want to add.
+In our example we have defined multiple job for different exporters. This would simplify the
+Grafana dashboard creation later and the Prometheus alertmanger rule definition.
+
+[source]
+.Job definition for Node Exporter
+----
+  - job_name: node-export
+    static_configs:
+      - targets:
+        - monint1:9100
+----
+
+
+[source]
+.Job definition for Collectd
+----
+  - job_name: intel-collectd
+    static_configs:
+      - targets:
+        - monint2:9103
+        - monint1:9103
+----
+
+[source]
+.Job definition for PCM
+----
+  - job_name: intel-pcm
+    scrape_interval: 2s
+    static_configs:
+      - targets:
+        - monint1:9738
+----
+
+
+[source]
+.Prometheus IPMI Exporter
+----
+  - job_name: ipmi
+    scrape_interval: 1m
+    scrape_timeout: 30s
+    metrics_path: /metrics
+    scheme: http
+    static_configs:
+      - targets:
+        - monint1:9290
+        - monint2:9290
+----
+
+Finally start and enable the Prometheus service:
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# systemctl enable --now prometheus.service
+----
+
+NOTE: Defining Prometheus rules are done in a separate file. The metrics defined in such a file will trigger an alert as soon as the condition is met.
+
+# end::prometheus-inst[]
+
+# tag::prometheus-maint[]
+=== Prometheus maintenance
+//TODO:web api and systemctl reload comparision, way of documenting the commands must be review
+
+[discrete]
+===== Start, Stop and Reload Prometheus
+The installation of Prometheus is providing a systemd unit. The Prometheus server can easily started and stopped with `systemctl` command.
+
+* Start Prometheus `systemctl start prometheus.service`
+* Stop Prometheus `systemctl stop prometheus.service`
+* Restart Prometheus `systemctl restart prometheus.service`
+* Reload Prometheus configuration `systemctl reload prometheus.service`
+
+In case the `--web.enable-lifecycle` option is set for Prometheus the configuration can be reloaded via `curl`.
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# curl -X POST http://<hostname or IP>:9090/-/reload
+----
+
+[discrete]
+===== Validating Prometheus configuration and rules
+With the installation of Prometheus a check tool is installed. The `promtool` can check the rules and the
+configuration of Prometheus before the system is running into failure due to wrong formatings or settings.
+
+* promtool check config /etc/prometheus/<..>.yml
+* promtool check rule /etc/prometheus/<..>.yml
+
+[subs="attributes,specialchars,verbatim,quotes"]
+----
+# promtool check config /etc/prometheus/prometheus.yml
+
+Checking /etc/prometheus/prometheus.yml
+  SUCCESS: 1 rule files found
+
+Checking /etc/prometheus/rules.yml
+  SUCCESS: 43 rules found
+----
+
+# end::prometheus-maint[]
+
+
+
+// Prometheus alerting rules
+# tag::prometheus-alert[]
+
+==== Prometheus alerts
+
+Prometheus alerting is based on rules. Alerting rules allow you to define alert conditions based on Prometheus expression language.
+The Prometheus Alertmanager will then send notifications about firing alerts to an external service (receiver). 
+
+
+To activate alerting the Prometheus config needs the following components:
+
+[source]
+.Alerting section in the prometheus.yaml config file
+----
+alerting:
+  alertmanagers:
+  - static_configs:
+    - targets:
+        - alertmanager:9093
+
+rule_files:
+  - /etc/prometheus/rules.yml
+----
+
+Depending on the rule_files path the rules have to be store in the given file. The example rule below will trigger an alert if an exporter (see prometheus.yaml - targets)
+is not `up`. The labels `instance` and `job` gives more information about hostnamen and type of exporter.
+
+[source]
+.Rule file configuration 
+----
+groups:
+
+  - alert: exporter-down
+    expr: up{job=~".+"} == 0
+    for: 1m
+    labels:
+    annotations:
+      title: Exporter {{ $labels.instance }} is down
+      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance }} for more than 1 minutes. Exporter seams down.
+
+----
+
+# end::prometheus-alert[]
diff --git a/adoc/SLES4SAP-sap-infra-monitoring-promtail.adoc b/adoc/SLES4SAP-sap-infra-monitoring-promtail.adoc
new file mode 100644
index 00000000..5389e628
--- /dev/null
+++ b/adoc/SLES4SAP-sap-infra-monitoring-promtail.adoc
@@ -0,0 +1,170 @@
+// Promtail adoc file
+// Please use the following line to implement each tagged content to the main document:
+// include::SLES4SAP-sap-infra-monitoring-promtail.adoc[tag=promtail-XXXXX]
+
+// Promtail general      ------------------------------------------------------------------------------------
+# tag::promtail-general[]
+===== Promtail
+
+https://grafana.com/docs/loki/latest/clients/promtail/[Promtail] is a Loki agent responsible for shipping the contents of local logs to a Loki instance.
+It is usually deployed to every machine that needed to be monitored.
+# end::promtail-general[]
+
+
+// Promtail implementing ------------------------------------------------------------------------------------
+# tag::promtail-impl[]
+=== Promtail (Loki agent)
+The Promtail RPM packages can be found in the PackageHub repository.
+The repository has to be activated via the `SUSEConnect` command first, unless you have activated it in the previous steps already.
+----
+# SUSEConnect --product PackageHub/15.3/x86_64
+----
+
+Promtail can then be installed via the `zypper` command.
+----
+# zypper in promtail
+----
+
+Edit the Promtail configuration file `/etc/loki/promtail.yaml` to include the scrape configurations you want to add.
+
+.To include the systemd-journal, add the following:
+[source]
+----
+  - job_name: journal
+    journal:
+      max_age: 12h
+      labels:
+        job: systemd-journal
+    relabel_configs:
+      - source_labels: ['__journal__systemd_unit']
+        target_label: 'unit'
+----
+
+IMPORTANT: If you are using `systemd-journal`, do not forget to add the `loki` user to the `systemd-journal` group: `usermod -G systemd-journal -a loki`
+
+.To include the HANA alert trace files, add the following:
+[source]
+----
+  - job_name: HANA
+    static_configs:
+    - targets:
+        - localhost
+      labels:
+        job: hana-trace
+        host: monint1
+        __path__: /usr/sap/IN1/HDB11/monint1/trace/*_alert_monint1.trc
+----
+
+IMPORTANT: If you are using SAP logs like the HANA traces, do not forget to add the `loki` user to the `sapsys` group: `usermod -G sapsys -a loki`
+
+
+Start and enable the Promtail service:
+----
+# systemctl enable --now promtail.service
+----
+
+# end::promtail-impl[]
+
+
+
+// Promtail practical usecases  -----------------------------------------------------------------------------
+# tag::promtail-deep[]
+
+=== Promtail
+Promtail is not only able to collecting and pasing logs to Loki. Promtail can do much more. By using the pipeline section it can
+be include several stages to add and transform logs, labels and timestamp. Below are some examples of common and useful stages.
+
+==== Template
+
+The template stage are mostly used to manipulate text, convert it to upper or lower cases, etc.
+The below example will manipulate the timestamp to fit into the correct RFC:
+
+.promtail.yaml
+[source]
+----
+- job_name: services
+  static_configs:
+  - targets:
+      - localhost
+    labels:
+      job: service-available
+      dataSource: Fullsysteminfodump
+      __path__: /logs/fu/HDB*/*/trace/system_availability_*.trc
+
+  pipeline_stages:
+
+  - match:
+      selector: '{job="service-available"}'
+      stages:
+
+      # Separate timestamp and messages for further process
+      - regex:
+          expression: '^0;(?P<time>.*?\..{6});(?P<message>.*$)'
+
+      # Correct  and create loki compatible timestamp
+      - template:
+          source: time
+          template: '{{ Replace .Value " " "T" -1 }}000+00:00'
+      - timestamp:
+          source: time
+          format: RFC3339Nano
+          action_on_failure: fudge
+----
+
+
+==== Labels for log content
+
+Labels are used a lot in Grafana products. The example below already contain labels like "job" and host".
+It is however also possible to create labels depending on the log content by using regex.
+
+.promtail.yaml
+[source]
+----
+scrape_configs:
+- job_name: system
+  static_configs:
+   - targets:
+      - localhost
+     labels:
+      job: messages
+      loghost: logserver01
+      __path__: /var/log/messages
+
+  pipeline_stages:
+  - match:
+      selector: '{job="systemlogs"}'
+      stages:
+      - regex:
+          expression: '^.* .* (?P<Unit>.*?)\[.*\]: .*$'
+      - labels:
+          Unit:
+----
+
+With the above config the label `Unit` can be used to show all messages with a specific unit:
+
+image::sap-infra-monitoring-deep_dive-promtail01.png[Using Labels,scaledwidth=80%,title="Labels depending on the log content"]
+
+
+==== Drop log entries
+Sometimes an application is constantly writing annoying messages in the log you want to get rid of.
+The stage `drop` can exactly do that by using, for example, the parameter `expression`.
+
+.promtail.yaml
+[source]
+----
+- job_name: messages
+  static_configs:
+  - targets:
+      - localhost
+    labels:
+      job: systemlogs
+      host: nuc5
+      __path__: /logs/messages
+
+  pipeline_stages:
+  - drop:
+      expression: ".*annoying messages.*"
+----
+
+
+# end::promtail-deep[]
\ No newline at end of file
diff --git a/adoc/SLES4SAP-sap-infra-monitoring.adoc b/adoc/SLES4SAP-sap-infra-monitoring.adoc
index f5bcf982..7656526b 100644
--- a/adoc/SLES4SAP-sap-infra-monitoring.adoc
+++ b/adoc/SLES4SAP-sap-infra-monitoring.adoc
@@ -12,34 +12,34 @@
 :sles4sap: {sles} for SAP Applications
 
 
-= Hardware Monitoring for SAP Systems
+= Infrastructure monitoring for SAP Systems
 
 
 [[sec-introduction]]
 == Introduction
 
-Many customers deploy SAP systems such as SAP S/4HANA for their global operations, to support mission-critical business functions. This means the need for maximized system availability becomes crucial. 
+Many customers deploy SAP systems such as SAP S/4HANA for their global operations, to support mission-critical business functions. This means the need for maximized system availability becomes crucial.
 Accordingly, IT departments are faced with very demanding SLAs: many companies now require 24x7 reliability for their SAP systems.
 
-The base for every SAP system is a solid infrastructure supporting it. 
+The base for every SAP system is a solid infrastructure supporting it.
 
-Operating System:: {sles4sap} is the leading Linux platform for SAP HANA, SAP NetWeaver and SAP S/4HANA solutions. 
+Operating System:: {sles4sap} is the leading Linux platform for SAP HANA, SAP NetWeaver and SAP S/4HANA solutions.
 It helps reduce downtime with the flexibility to configure and deploy a choice of multiple HA/DR scenarios for SAP HANA and NetWeaver-based applications.
 System data monitoring enables proactive problem avoidance.
 
 Hardware:: Most modern hardware platforms running SAP systems rely on Intel's system architecture. The combination of SUSE Linux Enterprise Server on the latest generation Intel Xeon Scalable processors and Intel Optane DC persistent memory help deliver fast, innovative, and secure IT services and to provide resilient enterprise S/4HANA platforms.
 The Intel platform allows to monitor deep into the hardware, to gain insights in what the system is doing on a hardware level. Monitoring on a hardware level can help reduce downtime for SAP systems in several ways:
 
-Failure prediction:: Identifying any hardware failure in advance allows customers to react early and in an scheduled manner, reducing the risk of errors that usually occur on operations executed during system outages. 
+Failure prediction:: Identifying any hardware failure in advance allows customers to react early and in an scheduled manner. This reduces the risk of errors that usually occur on operations executed during system outages.
 
 Failure remediation:: Having hardware metrics at hand when looking for the root cause of an issue can help speed up the analysis and therefore reduce the time until the system(s) return into operation.
-It can also reduce the reaction time, providing more precise information about problems, especially for big customers that usually have operations outsourced to many service providers and does not control the environment directly.
+It can also reduce the reaction time, providing more precise information about problems. This holds especially true for enterprise customers that usually have operations outsourced to many service providers and do not control the environment directly.
 
-This paper describes a hardware monitoring solution for SAP systems that allows to use hardware metrics provided by the Intel hardware platform to be analyzed in an SAP context.
+This paper describes a monitoring solution for SAP systems that allows to use metrics to be analyzed in an SAP context.
 
 
 [[sec-overview]]
-== Hardware monitoring for SAP systems overview
+== Monitoring for SAP systems overview
 
 The solution presented in this document consists of several open source tools that are combined to collect logs and metrics from server systems, store them in a queriable database, and present them in a visual and easy-to-consume way.
 In the following sections, we will give an overview of the components and how they work together.
@@ -47,16 +47,16 @@ In the following sections, we will give an overview of the components and how th
 
 === Components
 
-The monitoring solution proposed in this document consists of several components. 
+The monitoring solution proposed in this document consists of several components.
 
-.Hardware Monitoring Components
+.Monitoring Components
 image::sap-infra-monitoring-hwmonitoring_components.png[Hardware Monitoring Components,scaledwidth=100%]
 
-These components can be categorized by their use: 
+These components can be categorized by their use:
 
 Data Sources:: Components that simplify the collection of monitoring data, providing measurements or collected data in a way that the data storage components can pick them up.
 Data Storage:: Components that store the data coming from the data sources, and provide a mechanism to query the data.
-Data Visualization:: Components that allow a visual representation of the data stored in the data storage components, to make the (possibly aggregated) data easy to understand and analyze.
+Data Visualization and Notification:: Components that allow a visual representation (and notification) of the data stored in the data storage components, to make the (possibly aggregated) data easy to understand and analyze.
 
 The following sections describe these components.
 
@@ -65,529 +65,97 @@ The following sections describe these components.
 
 The data source components collect data from the operating system or hardware interfaces, and provide them to the data storage layer.
 
+// ===== PCM general
+include::SLES4SAP-sap-infra-monitoring-pcm.adoc[tag=pcm-general]
 
-===== Processor Counter Monitor (PCM)
+// ===== Collectd general
+include::SLES4SAP-sap-infra-monitoring-collectd.adoc[tag=collectd-general]
 
-https://github.com/opcm/pcm[Processor Counter Monitor (PCM)] is an application programming interface (API) and a set of tools based on the API to monitor performance and energy metrics of Intel{reg} Core{tm}, Xeon{reg}, Atom{tm} and Xeon Phi{tm} processors. 
-PCM works on Linux, Windows, macOS X, FreeBSD and DragonFlyBSD operating systems.
+// ===== Prometheus Node Exporter
+include::SLES4SAP-sap-infra-monitoring-nodeexporter.adoc[tag=nodeexporter-general]
 
+// ===== Prometheus IPMI exporter general
+include::SLES4SAP-sap-infra-monitoring-ipmi.adoc[tag=ipmi-general]
 
-===== `collectd` - System information collection daemon
-https://collectd.org/[`collectd`] is a small daemon which collects system information periodically and provides mechanisms to store and monitor the values in a variety of ways.
-
-
-===== Prometheus Node Exporter 
-The https://github.com/prometheus/node_exporter[Prometheus Node Exporter] is an exporter for hardware and OS metrics exposed by *NIX kernels.
-It is written in Go with pluggable metric collectors.
-
-
-===== Prometheus IPMI Exporter
-The https://github.com/prometheus-community/ipmi_exporter[Prometheus IPMI Exporter] supports both
-
-* the regular /metrics endpoint for Prometheus, exposing metrics from the host that the exporter is running on, 
-* and an /ipmi endpoint that supports IPMI over RMCP.
-
-One exporter instance running on one host can be used to monitor a large number of IPMI interfaces by passing the target parameter to a scrape.
-
-////
-CPU, Memory, Temperature, Disk
-////
-
-
-===== Promtail
-
-https://grafana.com/docs/loki/latest/clients/promtail/[Promtail] is a Loki agent responsible for shipping the contents of local logs to a Loki instance. 
-It is usually deployed to every machine that needed to be monitored.
-
+// ===== Promtail general
+include::SLES4SAP-sap-infra-monitoring-promtail.adoc[tag=promtail-general]
 
 ==== Data collection
 
 On the data collection layer, we use two tools, covering different kinds of data: metrics and logs.
 
+// ===== Prometheus general
+include::SLES4SAP-sap-infra-monitoring-prometheus.adoc[tag=prom-general]
 
-===== Prometheus
-https://prometheus.io[Prometheus] is an open source systems monitoring and alerting toolkit. 
-It is storing time series data like metrics locally and runs rules over this data to aggregate and record new time series from existing data. Prometheus it also able to generate alerts.
-The project has a very active developer and user community. 
-It is now a stand-alone open source project and maintained independently of any company. 
-This makes it very attractive for SUSE as open source company and fits into our culture. 
-To emphasize this, and to clarify the project's governance structure, Prometheus joined the Cloud Native Computing Foundation 5 years ago (2016).
-Prometheus works well for recording any purely numeric time series.
-
-Prometheus is designed for reliability. It is the system to go to during an outage as it allows you to quickly analyze a situation. 
-Each Prometheus server is a stand-alone server, not depending on network storage or other remote services. 
-You can rely on it when other parts of your infrastructure are broken, and you do not need to set up extensive infrastructure to use it.
-
-
-===== Loki
-
-https://grafana.com/oss/loki/[Loki] is a log aggregation system, inspired by Prometheus and designed to be cost effective and easy to operate.
-Unlike other logging systems, Loki is built around the idea of only indexing a set of metadata (labels) for logs and leaving the original log message unindexed. 
-Log data itself is then compressed and stored in chunks in object stores, for example locally on the file system. 
-A small index and highly compressed chunks simplify the operation and significantly lower the cost of Loki.
-
+// ===== Loki general
+include::SLES4SAP-sap-infra-monitoring-loki.adoc[tag=loki-general]
 
-==== Data visualization
 
-With the wealth of data collected in the previous steps, tooling is needed to make the data accessible. 
-Through aggregation and visualization data becomes meaningful and consumable information. 
+==== Data visualization and notification
 
+With the wealth of data collected in the previous steps, tooling is needed to make the data accessible.
+Through aggregation and visualization data becomes meaningful and consumable information.
 
-===== Grafana
+// ===== Grafana general
+include::SLES4SAP-sap-infra-monitoring-grafana.adoc[tag=grafana-general]
 
-https://grafana.com/oss/grafana/[Grafana] is an open source visualization and analytics platform.
-Grafana's plug-in architecture allows interaction with a variety of data sources without creating data copies.
-Its graphical browser-based user interface visualizes the data through highly customizable views, providing an interactive diagnostic workspace.  
+// ===== Alertmanager general
+include::SLES4SAP-sap-infra-monitoring-alertmanager.adoc[tag=alert-general]
 
-Grafana can display metrics data from Prometheus and log data from Loki side-by-side, correlating events from log files with metrics. 
-This can provide helpful insights when trying to identify the cause for an issue.
-Also, Grafana can trigger alerts based on metrics or log entries, and thus help identify potential issues early. 
 
-
-
-////
-===============================================================================================
-////
-
-== Implementing hardware monitoring for SAP systems
+== Implementing monitoring for SAP systems
 
 The following sections show how to set up a monitoring solution based on the tools that have been introduced in the solution overview.
 
 
-=== Node exporter
-
-The `prometheus-node_exporter` can be installed directly from the SUSE repository. 
-It is part of {sles} and all derived products.
-
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-zypper -n in golang-github-prometheus-node_exporter
-----
-
-Start and enable the node exporter for automatic start at system boot.
-
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-systemctl enable --now prometheus-node_exporter
-----
-
-[TIP]
-====
-To check if the exporter is running, you can use the following commands:
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-systemctl status prometheus-node_exporter
-ss -tulpan |grep exporter
-----
-====
-
-Configure the node exporter depending on your needs. Arguments to be passed to `prometheus-node_exporter` can be provided in the configuration file `/etc/sysconfig/prometheus-node_exporter`, for example to modify which metrics the `node_exporter` will collect and expose.
-
-[subs="attributes,specialchars,verbatim,quotes"]
-.Arguments provided in /etc/sysconfig/prometheus-node_exporter
-----
-...
-ARGS="--collector.systemd --no-collector.mdadm --collector.ksmd --no-collector.rapl --collector.meminfo_numa --no-collector.zfs --no-collector.udp_queues --no-collector.softnet --no-collector.sockstat --no-collector.nfsd --no-collector.netdev --no-collector.infiniband --no-collector.arp"
-...
-----
-By default, the node exporter is listening for incoming connections on port 9100.
-
-
-=== `collectd`
-
-The `collectd` packages can be installed from the SUSE repositories as well. For the example at hand, we have used a newer version from the openSUSE repository.
-
-Create a file `/etc/zypp/repos.d/server_monitoring.repo` and add the following content to it:
-[subs="attributes,specialchars,verbatim,quotes"]
-.Content for /etc/zypp/repos.d/server_monitoring.repo
-----
-[server_monitoring]
-name=Server Monitoring Software (SLE_15_SP3)
-type=rpm-md
-baseurl=https://download.opensuse.org/repositories/server:/monitoring/SLE_15_SP3/
-gpgcheck=1
-gpgkey=https://download.opensuse.org/repositories/server:/monitoring/SLE_15_SP3/repodata/repomd.xml.key
-enabled=1
-----
-
-Afterward refresh the repository metadata and install `collectd` and its plugins.
-
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-zypper ref
-zypper in collectd collectd-plugins-all
-----
-
-Now the `collectd` must be adapted to collect the information you want to get and export it in the format you need.
-For example, when looking for network latency, use the ping plugin and expose the data in a Prometheus format.
-
-[subs="attributes,specialchars,verbatim,quotes"]
-.Configuration of collectd in /etc/collectd.conf (excerpts)
-----
-...
-LoadPlugin ping
-...
-<Plugin ping>
-        Host "10.162.63.254"
-        Interval 1.0
-        Timeout 0.9
-        TTL 255
-#       SourceAddress "1.2.3.4"
-#       AddressFamily "any"
-        Device "eth0"
-        MaxMissed -1
-</Plugin>
-...
-LoadPlugin write_prometheus
-...
-<Plugin write_prometheus>
-        Port "9103"
-</Plugin>
-...
-----
-
-Uncomment the `LoadPlugin` line and check the `<Plugin ping>` section in the file.
-
-Modify the `systemd` unit that `collectd` works as expected. First, create a copy from the system-provided service file.
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-cp /usr/lib/systemd/system/collectd.service /etc/systemd/system/collectd.service
-----
-
-Second, adapt this local copy.
-Add the required `CapabilityBoundingSet` parameters in our local copy `/etc/systemd/system/collectd.service`.
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-...
-# Here's a (incomplete) list of the plugins known capability requirements: 
-#   ping            CAP_NET_RAW
-CapabilityBoundingSet=CAP_NET_RAW
-...
-----
-
-Activate the changes and start the `collectd` function.
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-systemctl daemon-reload
-systemctl enable --now collectd
-----
-
-All `collectd` metrics are accessible at port 9103.
-
-With a quick test, you can see if the metrics can be scraped.
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-curl localhost:9103/metrics
-----
-
-// The offical project on github: https://github.com/collectd/collectd/
-
-
-=== Processor Counter Monitor (PCM)
-
-Processor Counter Monitor (PCM) can be installed from its GitHub project pages.
-
-Make sure the required tools are installed for building.
-
-[subs="attributes,specialchars,verbatim,quotes"]
-.Installing PCM from source
-----
-zypper in -y git cmake gcc-c++
-----
-
-Clone the Git repository and build the tool using the following commands.
-
-[subs="attributes,specialchars,verbatim,quotes"]
-.Installing PCM from source
-----
-git clone https://github.com/opcm/pcm.git
-cd pcm
-mkdir build
-cd build
-cmake ..
-cmake --build .
-cd bin
-----
-
-To start PCM on the observed host, first start a new `screen` session, and then start PCM.footnote:[Starting PCM should really be done by creating a systemd unit.]  
-// TODO: replace use of screen by a systemd unit for PCM
-[subs="attributes,specialchars,verbatim,quotes"]
-.Starting PCM
-----
-screen -S pcm
-./pcm-sensor-server -d
-----
-
-The PCM sensor server binary `pcm-sensor-server` has been started in a screen session which can be detached (type `Ctrl+a d`).
-This lets the PCM sensor server continue running in the background.
-
-The PCM metrics can be queried from port 9738.
-
-
-=== Prometheus IPMI Exporter
-
-The IPMI exporter can be used to scrape information like temperature, power supply information and fan information.
-
-Create a directory, download and extract the IPMI exporter.
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-mkdir ipmi_exporter
-cd ipmi_exporter
-curl -OL https://github.com/prometheus-community/ipmi_exporter/releases/download/v1.4.0/ipmi_exporter-1.4.0.linux-amd64.tar.gz
-tar xzvf ipmi_exporter-1.4.0.linux-amd64.tar.gz
-----
-
-NOTE: We have been using the version 1.4.0 of the IPMI exporter. For a different release, the URL used in the `curl` command above needs to be adapted. 
-      Current releases can be found at the https://github.com/prometheus-community/ipmi_exporter[IPMI exporter GitHub repository]. 
-
-
-Some additional packages are required and need to be installed.
-[subs="attributes,specialchars,verbatim,quotes"]
-----
-zypper in freeipmi monitoring-plugins-ipmi-sensor1 libipmimonitoring6 monitoring-plugins-ipmi-sensor1
-----
-
-To start the IPMI exporter on the observed host, first start a new `screen` session, and then start the exporter.footnote:[Starting the IPMI exporter should really be done by creating a systemd unit.]  
-// TODO: replace use of screen by a systemd unit for the IPMI exporter
-[subs="attributes,specialchars,verbatim,quotes"]
-.Starting PCM
-----
-screen -S ipmi
-cd ipmi_exporter-1.4.0.linux-amd64
-./ipmi_exporter
-----
-The IPMI exporter binary `ipmi_exporter` has been started in a screen session which can be detached (type `Ctrl+a d`).
-This lets the exporter continue running in the background.
-
-The metrics of the ipmi_exporter are accessible port 9290.
-
-=== Prometheus
-The Prometheus RPM packages can be found in the PackageHub repository.  
-This repository needs to be activated via the SUSEConnect command first.
-----
-SUSEConnect --product PackageHub/15.3/x86_64
-----
-
-Prometheus can then easily be installed using the `zypper` command:
-----
-zypper in golang-github-prometheus-prometheus
-----
-
-Edit the Prometheus configuration file `/etc/prometheus/prometheus.yml` to include the scrape job configurations you want to add.
-
-[source]
-.Job definition for Node Exporter 
-----
-  - job_name: node-export
-    static_configs:
-      - targets: 
-        - monint1:9100
-----
-
-
-[source]
-.Job definition for Collectd
-----
-  - job_name: intel-collectd 
-    static_configs:
-      - targets:
-        - monint2:9103
-        - monint1:9103
-----
-
-[source]
-.Job definition for PCM
-----
-  - job_name: intel-pcm
-    scrape_interval: 2s
-    static_configs:
-      - targets:
-        - monint1:9738
-----
-
-
-[source]
-.Prometheus IPMI Exporter
-----
-  - job_name: ipmi
-    scrape_interval: 1m
-    scrape_timeout: 30s
-    metrics_path: /metrics
-    scheme: http
-    static_configs:
-      - targets:
-        - monint1:9290
-        - monint2:9104
-----
-
-Finally start and enable the Prometheus service:
-----
-systemctl enable --now prometheus.service 
-----
-
-=== Loki
-The Loki RPM packages can be found in the PackageHub repository.  
-The repository needs to be activated via the SUSEConnect command first, unless you have activated it in the previous steps already.
-----
-SUSEConnect --product PackageHub/15.3/x86_64
-----
-
-Loki can then be installed via the `zypper` command:
-----
-zypper in loki
-----
-
-Edit the Loki configuration file `/etc/loki/loki.yaml` and change the following lines:
-[source]
-----
-chunk_store_config:
-  max_look_back_period: 240h 
-
-table_manager:
-  retention_deletes_enabled: true 
-  retention_period: 240h 
-----
-
-Start and enable Loki service:
-----
-systemctl enable --now loki.service 
-----
-
-
-
-==== Promtail (Loki agent)
-The Promtail RPM packages can be found in the PackageHub repository.  
-The repository has to be activated via the SUSEConnect command first, unless you have activated it in the previous steps already.
-----
-SUSEConnect --product PackageHub/15.3/x86_64
-----
-
-Promtail can then be installed via the `zypper` command.
-----
-zypper in promtail
-----
-
-Edit the Promtail configuration file `/etc/loki/promtail.yaml` to include the scrape configurations you want to add.
-
-.To include the systemd-journal, add the following: 
-[source]
-----
-  - job_name: journal 
-    journal:
-      max_age: 12h
-      labels:
-        job: systemd-journal
-    relabel_configs:
-      - source_labels: ['__journal__systemd_unit']
-        target_label: 'unit'
-----
-
-IMPORTANT: If you are using `systemd-journal`, do not forget to add the `loki` user to the `systemd-journal` group: `usermod -G systemd-journal -a loki`
-
-.To include the HANA alert trace files, add the following: 
-[source]
-----
-  - job_name: HANA 
-    static_configs:
-    - targets:
-        - localhost
-      labels:
-        job: hana-trace
-        host: monint1
-        __path__: /usr/sap/IN1/HDB11/monint1/trace/*_alert_monint1.trc
-----
-
-IMPORTANT: If you are using SAP logs like the HANA traces, do not forget to add the `loki` user to the `sapsys` group: `usermod -G sapsys -a loki`
-
+// === Prometheus Node Exporter implementing
+include::SLES4SAP-sap-infra-monitoring-nodeexporter.adoc[tag=nodeexporter-impl]
 
-Start and enable the Promtail service:
-----
-systemctl enable --now promtail.service 
-----
-
-
-
-=== Grafana
-
-The Grafana RPM packages can be found in the PackageHub repository.  
-The repository has to be activated via the SUSEConnect command first, unless you have activated it in the previous steps already.
-----
-SUSEConnect --product PackageHub/15.3/x86_64
-----
+// === Collectd implementing
+include::SLES4SAP-sap-infra-monitoring-collectd.adoc[tag=collectd-impl]
 
-Grafana can then be installed via `zypper` command:
-----
-zypper in grafana
-----
+// === PCM implementing
+include::SLES4SAP-sap-infra-monitoring-pcm.adoc[tag=pcm-impl]
 
+// === Prometheus IPMI exporter implementing
+include::SLES4SAP-sap-infra-monitoring-ipmi.adoc[tag=ipmi-impl]
 
-Start and enable the Grafana server service:
-----
-systemctl enable --now grafana-server.service 
-----
+// === Prometheus implementing
+include::SLES4SAP-sap-infra-monitoring-prometheus.adoc[tag=prometheus-inst]
+// ==== Prometheus alert
+include::SLES4SAP-sap-infra-monitoring-prometheus.adoc[tag=prometheus-alert]
 
+// === Loki implementing
+include::SLES4SAP-sap-infra-monitoring-loki.adoc[tag=loki-impl]
+// ==== Loki Alert 
+include::SLES4SAP-sap-infra-monitoring-loki.adoc[tag=loki-alert]
 
-Now connect from a browser to your Grafana instance and log in:
+// === Promtail general
+include::SLES4SAP-sap-infra-monitoring-promtail.adoc[tag=promtail-impl]
 
-image::sap-infra-monitoring-grafana-login.png[Grafana Login page,scaledwidth=80%,title="Grafana welcome page"]
+// === Grafana general
+include::SLES4SAP-sap-infra-monitoring-grafana.adoc[tag=grafana-impl]
 
-==== Grafana data sources
-After the login, the data source must be added. On the right hand there is a wheel where a new data source can be added.
+// === Alertmanager implementation
+include::SLES4SAP-sap-infra-monitoring-alertmanager.adoc[tag=alert-impl]
 
-image::sap-infra-monitoring-grafana-datasource-add.png[Grafana add a new data source,scaledwidth=80%,title="Adding a new Grafana data source"]
-
-Add a data source for the Prometheus service.
-
-.Prometheus example
-image::sap-infra-monitoring-grafana-data-prometheus.png[Prometheus data source,scaledwidth=80%,title="Grafana data source for Prometheus DB"]
-
-Also add a data source for Loki.
-
-.Loki example
-image::sap-infra-monitoring-grafana-data-loki.png[Loki data source,scaledwidth=80%,title="Grafana data source for LOKI DB"]
-
-Now Grafana can access both the metrics stored in Prometheus and the log data collected by Loki, to visualize them.
-
-==== Grafana dashboards
-
-Dashboards are how Grafana presents information to the user. 
-Prepared dashboards can be downloaded from https://grafana.com/dashboards, or imported using the Grafana ID. 
-
-.Grafana dashboard import
-image::sap-infra-monitoring-grafana-dashboards.png[Dashboard overview,scaledwidth=80%,title="Grafana dashboard import option"]
-
-The dashboards can also be created from scratch. Information from all data sources can be merged into one dashboard.
-
-image::sap-infra-monitoring-grafana-dashboard-new.png[Dashboard create a new dashboard,scaledwidth=80%,title="Build your own dashboard"]
-
-=== Putting it all together
-The picture below shows a dashboard displaying detailed information about the SAP HANA cluster, orchestrated by *pacemaker*.
-
-.Dashboard example for SAP HANA
-image::sap-infra-monitoring-grafana-hana-cluster.png[SUSE HANA cluster dashboard example,scaledwidth=80%,title="SUSE cluster exporter dashboard"]
-
-
-
-////
-===============================================================================================
-////
 
 == Practical use cases
 
 The following sections describe some practical use cases of the tooling set up in the previous chapter.
 
-=== CPU 
-I/O performance is very importand on SAP systems. By looking for the *iowait* metric in command line tools like `top` or `sar`, you can only see a single 
+=== CPU
+I/O performance is very importand on SAP systems. By looking for the *iowait* metric in command line tools like `top` or `sar`, you can only see a single
 value without any relation. The picture below is showing such a value in a certain timeframe.
 
-image::sap-infra-monitoring-iowait_values.png[iowait over a certain time,scaledwidth=70%,title="iowait over certain timeframe"] 
+image::sap-infra-monitoring-iowait_values.png[iowait over a certain time,scaledwidth=70%,title="iowait over certain timeframe"]
 
-An *iowait* of 2% might not show any problem at first glance. But if you look at *iowait* as part of the whole CPU load, the picture is completely 
+An *iowait* of 2% might not show any problem at first glance. But if you look at *iowait* as part of the whole CPU load, the picture is completely
 different to what you saw before. The reason is that the total CPU load in the example is only a little higher then *iowait*.
 
 image::sap-infra-monitoring-iowait_procent_of_total.png[iowait in Percent of the total CPU load, scaledwidth=70%,title="iowait in Percent of total CPU load"]
 
-In our example, you now see an *iowait* value of about 90% of the total CPU load. 
+In our example, you now see an *iowait* value of about 90% of the total CPU load.
 
 To get the percent of *iowait* of the total CPU load, use the following formula:
 ----
@@ -597,28 +165,28 @@ To get the percent of *iowait* of the total CPU load, use the following formula:
 
 The metrics used are:
 
-* `node_cpu_seconds_total{mode="idle"}` 
+* `node_cpu_seconds_total{mode="idle"}`
 * `node_cpu_seconds_total{mode="iowait"}`
 
- 
+
 Conclusion:: A high *iowait* in relation to the overall CPU load is indicating a low throughput. As a result, the IO performance might be very bad.
 An alert could be triggert by setting a proper threshold if the *iowait* is going through a certain value.
 
 
-=== Memory 
+=== Memory
 
-Memory performance in modern servers is not only influenced by its speed, but mainly by the way it is accessed. 
-The Non-Uniform Memory Access (NUMA) architecture used in modern systems is a way of building very large multi-processor systems so that every CPU 
-(that is a group of CPU cores) has a certain amount of memory directly attached to it. 
-Multiple CPUs (multiple groups of processors cores) are then connected together using special bus systems (for example UPI) to provide processor data coherency. 
-Memory that is "local" to a CPU can be accessed with maximum performance and minimal latency. 
-If a process running on a CPU core needs to access memory that is attached to another CPU, it can do so. However, this comes at the price of added latency, 
+Memory performance in modern servers is not only influenced by its speed, but mainly by the way it is accessed.
+The Non-Uniform Memory Access (NUMA) architecture used in modern systems is a way of building very large multi-processor systems so that every CPU
+(that is a group of CPU cores) has a certain amount of memory directly attached to it.
+Multiple CPUs (multiple groups of processors cores) are then connected together using special bus systems (for example UPI) to provide processor data coherency.
+Memory that is "local" to a CPU can be accessed with maximum performance and minimal latency.
+If a process running on a CPU core needs to access memory that is attached to another CPU, it can do so. However, this comes at the price of added latency,
 because it needs to go through the bus system connecting the CPUs.
 
 
 ==== Non-Uniform Memory Access (NUMA) example
 
-There are two exporters at hand which can help to provide the metrics data. The `node_exporter` has an option `--collector.meminfo_numa` 
+There are two exporters at hand which can help to provide the metrics data. The `node_exporter` has an option `--collector.meminfo_numa`
 which must be enabled in the configuration file `/etc/sysconfig/prometheus-node_exporter`. In the example below the `collectd` plugin `numa` was used.
 
 We are focusing on two metrics:
@@ -643,53 +211,53 @@ image::sap-infra-monitoring-grafana-numa.png[NUMA ratio NUMA nodes,scaledwidth=1
 
 The metric used is `collectd_numa_vmpage_action_total`.
 
-Conclusion:: If a process attempts to get a page from its local node, but this node is out of free pages, the 
-`numa_miss` of that node will be incremented (indicating that the node is out of memory) and another node 
-will accomodate the process's request. To know which nodes are "lending memory" to the 
-out-of-memory node, you need to look at `numa_foreign`. Having a high value for `numa_foreign` for a 
-particular node indicates that this node's memory is underutilized so the node is frequently accommodating 
-memory allocation requests that failed on other nodes. 
+Conclusion:: If a process attempts to get a page from its local node, but this node is out of free pages, the
+`numa_miss` of that node will be incremented (indicating that the node is out of memory) and another node
+will accomodate the process's request. To know which nodes are "lending memory" to the
+out-of-memory node, you need to look at `numa_foreign`. Having a high value for `numa_foreign` for a
+particular node indicates that this node's memory is underutilized so the node is frequently accommodating
+memory allocation requests that failed on other nodes.
 A high amout of `numa_miss` indicates a performance degradation for memory based applications like SAP HANA.
 
 ==== Memory module observation
 
-Today more and more application data are hold in the main memory. Examples are _Dynamic random access memory_ (DRAM) or _Persistent memory_ PMEM. 
-The observation of this component became quite important. This is because systems with a high amount of main memory, for example multiple terabytes, 
-are populated with a corresponding number of modules. The example below represents a memory hardware failure which was not effecting the system with a downtime, 
+Today more and more application data are hold in the main memory. Examples are _Dynamic random access memory_ (DRAM) or _Persistent memory_ PMEM.
+The observation of this component became quite important. This is because systems with a high amount of main memory, for example multiple terabytes,
+are populated with a corresponding number of modules. The example below represents a memory hardware failure which was not effecting the system with a downtime,
 but a maintenance window should now be scheduled to replace faulty modules.
 
 image::sap-infra-monitoring-grafana-memory2.png[Memory errors,scaledwidth=100%,title="Memory module failure"]
 
 The example shows a reduction of available space at the same time as the hardware count is increasing.
- 
+
 The metric used here is `node_memory_HardwareCorrupted_bytes`.
 
-Memory errors (correctable)  also correlate with the CPU performance as shown in the example below.  
+Memory errors (correctable)  also correlate with the CPU performance as shown in the example below.
 For each of the captured memory failure event, an increase of the CPU I/O is shown.
 
 image::sap-infra-monitoring-grafana-memory_failure.png[Memory failure metrix,scaledwidth=70%,align="center",title="Memory failure"]
 
-Conclusion:: The risk that one of the modules becomes faulty increases with the total amount of modules per system. 
-The observation of memory correctable errors and uncorrectable errors is essential. Features like Intel RAS can 
-help to avoid application downtime if the failure could be handled by the hardware. 
+Conclusion:: The risk that one of the modules becomes faulty increases with the total amount of modules per system.
+The observation of memory correctable errors and uncorrectable errors is essential. Features like Intel RAS can
+help to avoid application downtime if the failure could be handled by the hardware.
 
-=== Network 
+=== Network
 
-Beside the fact that the network work must be available and the throughput must be fit, the network latency is very important, too. 
+Beside the fact that the network work must be available and the throughput must be fit, the network latency is very important, too.
 For cluster setups and applications which are working in a sync mode, like SAP HANA with HANA system replication, the network latency becomes even more relevant.
-The `collectd` plugin ping can help here to observe the network latency over time. 
-The Grafana dashboard below visualizes the network latency over the past one hour. 
+The `collectd` plugin ping can help here to observe the network latency over time.
+The Grafana dashboard below visualizes the network latency over the past one hour.
 
-image::sap-infra-monitoring-grafana-latency.png[collectd ping,scaledwidth=100%,title="collectd latency check"] 
+image::sap-infra-monitoring-grafana-latency.png[collectd ping,scaledwidth=100%,title="collectd latency check"]
 
 The red line is a threshold which can be used to trigger an alert.
 
 The metrics used here are `collectd_ping` and `collectd_ping_ping_droprate`.
 
-Conclusion:: A high value or peak over a long period (multiple time stamps) indicates network response time issues at least to the ping destination. 
+Conclusion:: A high value or peak over a long period (multiple time stamps) indicates network response time issues at least to the ping destination.
 An increasing amount of the `ping_droprate` points to some issues with the ping destination in regards to responding to the ping request.
 
-=== Storage 
+=== Storage
 
 ////
 ==== Storage performance
@@ -709,13 +277,13 @@ it enables you to monitor the Read and Write operations of logical disk on your
 Conclusion:: can even early detect the potential causes of storage subsystem failures and can reduce the risk of unexpected downtime.
 ////
 
-==== Storage capacity 
-Monitoring disk space utilization of the server is critical and important for maximizing application availability. 
-Detecting and monitoring - unexpected or expected - growth of data on the disk will help preventing _disk full_ situations, and therefore application unavailability. 
+==== Storage capacity
+Monitoring disk space utilization of the server is critical and important for maximizing application availability.
+Detecting and monitoring - unexpected or expected - growth of data on the disk will help preventing _disk full_ situations, and therefore application unavailability.
 
 image::sap-infra-monitoring-grafana-storage-disk.png[growing filesystem,scaledwidth=100%,title="disk free capacity is droping"]
 
-The example above represents a continuously growing file system. 
+The example above represents a continuously growing file system.
 
 The metrics used are `node_filesystem_free_bytes` and `node_filesystem_size_bytes`.
 
@@ -727,9 +295,20 @@ After a _disk full_ situation many side effects are shown:
 image::sap-infra-monitoring-grafana-storage-full.png[filesystem full,scaledwidth=100%,title="side effects after disk full"]
 
 
-Conclusion:: Predictive alerting can avoid a situation where the file system runs out of space and the system becomes unavailable. 
+Conclusion:: Predictive alerting can avoid a situation where the file system runs out of space and the system becomes unavailable.
 Setting up a simple alerting is a great way to help ensure that you do not encounter any surprises.
 
+// === Extend Prometheus node_exporter function area
+include::SLES4SAP-sap-infra-monitoring-nodeexporter.adoc[tag=node-extend]
+
+== Miscellaneous
+
+// Prometheus maintenance
+include::SLES4SAP-sap-infra-monitoring-prometheus.adoc[tag=prometheus-maint]
+
+// Promtail templates, labels, drop
+include::SLES4SAP-sap-infra-monitoring-promtail.adoc[tag=promtail-deep]
+
 
 == Summary
 
diff --git a/adoc/common_sbp_legal_notice.adoc b/adoc/common_sbp_legal_notice.adoc
index 5f980a3f..adfd1e51 100644
--- a/adoc/common_sbp_legal_notice.adoc
+++ b/adoc/common_sbp_legal_notice.adoc
@@ -1,4 +1,4 @@
-Copyright (C) 2006–2022 SUSE LLC and contributors. All rights reserved. 
+Copyright (C) 2006–2023 SUSE LLC and contributors. All rights reserved. 
 
 Permission is granted to copy, distribute and/or modify this document under the terms of
 the GNU Free Documentation License, Version 1.2 or (at your option) version 1.3; with the
diff --git a/adoc/sap-nw740-sle15-setupguide.adoc b/adoc/sap-nw740-sle15-setupguide.adoc
index 714ab77e..89f9b931 100644
--- a/adoc/sap-nw740-sle15-setupguide.adoc
+++ b/adoc/sap-nw740-sle15-setupguide.adoc
@@ -1089,6 +1089,14 @@ _As user root_
 
 First, configure the resources for the file system, IP address and the {sap}
 instance. Make sure you adapt the parameters to your environment.
+The shown file system and SAPInstance monitor timeouts are a trade-off between
+fast recovery vs. resilience against sporadic temporary NFS issues. You may
+slightly increase it to fit your infrastructure.
+The SAPInstance timeout needs to be higher than the file system timeout.
+Consult your storage or NFS server documentation for appropriate
+timeout values.
+See also manual pages ocf_heartbeat_Filesystem(7), ocf_heartbeat_SAPInstance(7)
+and nfs(5).
 
 .ASCS primitive
 ================================================
@@ -1138,6 +1146,14 @@ _As user root_
 
 Second, configure the resources for the file system, IP address and the {sap}
 instance. Make sure you adapt the parameters to your environment.
+The shown file system and SAPInstance monitor timeouts are a trade-off between
+fast recovery vs. resilience against sporadic temporary NFS issues. You may
+slightly increase it to fit your infrastructure.
+The SAPInstance timeout needs to be higher than the file system timeout.
+Consult your storage or NFS server documentation for appropriate
+timeout values.
+See also manual pages ocf_heartbeat_Filesystem(7), ocf_heartbeat_SAPInstance(7)
+and nfs(5).
 
 The specific parameter _IS_ERS=true_ should only be set for the ERS instance.
 
@@ -1748,7 +1764,7 @@ See also manual pages SAPStartsrv_basic_cluster(8) and mount.nfs(8).
 
 Find below crm configuration fragments for {sap} system {mySid}. This example
 shows the specific items for the two-node cluster with priority fencing.
-This configuration is basically the same as above, except the Filesystem
+This configuration is basically the same as above, except the file system
 resources are replaced by SAPStartSrv resources.
 
 [subs="attributes"]
diff --git a/images/src/png/amcli-disk-counter.png b/images/src/png/amcli-disk-counter.png
new file mode 100644
index 00000000..591f6ddb
Binary files /dev/null and b/images/src/png/amcli-disk-counter.png differ
diff --git a/images/src/png/amcli-disk-info.png b/images/src/png/amcli-disk-info.png
new file mode 100644
index 00000000..07ea2517
Binary files /dev/null and b/images/src/png/amcli-disk-info.png differ
diff --git a/images/src/png/amcli-newdata.png b/images/src/png/amcli-newdata.png
new file mode 100644
index 00000000..197506da
Binary files /dev/null and b/images/src/png/amcli-newdata.png differ
diff --git a/images/src/png/grafana-dashboard-import.png b/images/src/png/grafana-dashboard-import.png
new file mode 100644
index 00000000..35a0d2a1
Binary files /dev/null and b/images/src/png/grafana-dashboard-import.png differ
diff --git a/images/src/png/grafana-dashboard-list.png b/images/src/png/grafana-dashboard-list.png
new file mode 100644
index 00000000..e7c57f73
Binary files /dev/null and b/images/src/png/grafana-dashboard-list.png differ
diff --git a/images/src/png/grafana-dashboard-new.png b/images/src/png/grafana-dashboard-new.png
new file mode 100644
index 00000000..782455a3
Binary files /dev/null and b/images/src/png/grafana-dashboard-new.png differ
diff --git a/images/src/png/grafana-dashboards.png b/images/src/png/grafana-dashboards.png
new file mode 100644
index 00000000..41f9801d
Binary files /dev/null and b/images/src/png/grafana-dashboards.png differ
diff --git a/images/src/png/grafana-data-loki.png b/images/src/png/grafana-data-loki.png
new file mode 100644
index 00000000..69e07b47
Binary files /dev/null and b/images/src/png/grafana-data-loki.png differ
diff --git a/images/src/png/grafana-data-prometheus.png b/images/src/png/grafana-data-prometheus.png
new file mode 100644
index 00000000..27800be6
Binary files /dev/null and b/images/src/png/grafana-data-prometheus.png differ
diff --git a/images/src/png/grafana-datasource-add.png b/images/src/png/grafana-datasource-add.png
new file mode 100644
index 00000000..3adcc795
Binary files /dev/null and b/images/src/png/grafana-datasource-add.png differ
diff --git a/images/src/png/grafana-datasources-over.png b/images/src/png/grafana-datasources-over.png
new file mode 100644
index 00000000..3bf13b01
Binary files /dev/null and b/images/src/png/grafana-datasources-over.png differ
diff --git a/images/src/png/grafana-hana-cluster.png b/images/src/png/grafana-hana-cluster.png
new file mode 100644
index 00000000..0388e39e
Binary files /dev/null and b/images/src/png/grafana-hana-cluster.png differ
diff --git a/images/src/png/grafana-latency.png b/images/src/png/grafana-latency.png
new file mode 100644
index 00000000..357f23d9
Binary files /dev/null and b/images/src/png/grafana-latency.png differ
diff --git a/images/src/png/grafana-login.png b/images/src/png/grafana-login.png
new file mode 100644
index 00000000..c3488a98
Binary files /dev/null and b/images/src/png/grafana-login.png differ
diff --git a/images/src/png/grafana-memory.png b/images/src/png/grafana-memory.png
new file mode 100644
index 00000000..11ea8e2b
Binary files /dev/null and b/images/src/png/grafana-memory.png differ
diff --git a/images/src/png/grafana-memory2.png b/images/src/png/grafana-memory2.png
new file mode 100644
index 00000000..6a983b1c
Binary files /dev/null and b/images/src/png/grafana-memory2.png differ
diff --git a/images/src/png/grafana-memory_failure.png b/images/src/png/grafana-memory_failure.png
new file mode 100644
index 00000000..724308c5
Binary files /dev/null and b/images/src/png/grafana-memory_failure.png differ
diff --git a/images/src/png/grafana-numa.png b/images/src/png/grafana-numa.png
new file mode 100644
index 00000000..90656a37
Binary files /dev/null and b/images/src/png/grafana-numa.png differ
diff --git a/images/src/png/grafana-storage-disk.png b/images/src/png/grafana-storage-disk.png
new file mode 100644
index 00000000..61cbd70a
Binary files /dev/null and b/images/src/png/grafana-storage-disk.png differ
diff --git a/images/src/png/grafana-storage-full.png b/images/src/png/grafana-storage-full.png
new file mode 100644
index 00000000..d3e06b10
Binary files /dev/null and b/images/src/png/grafana-storage-full.png differ
diff --git a/images/src/png/hwmonitoring_components.png b/images/src/png/hwmonitoring_components.png
new file mode 100644
index 00000000..40bec3b6
Binary files /dev/null and b/images/src/png/hwmonitoring_components.png differ
diff --git a/images/src/png/iowait_procent_of_total.png b/images/src/png/iowait_procent_of_total.png
new file mode 100644
index 00000000..b372d58d
Binary files /dev/null and b/images/src/png/iowait_procent_of_total.png differ
diff --git a/images/src/png/iowait_system_load.png b/images/src/png/iowait_system_load.png
new file mode 100644
index 00000000..d3ebb8a8
Binary files /dev/null and b/images/src/png/iowait_system_load.png differ
diff --git a/images/src/png/iowait_values.png b/images/src/png/iowait_values.png
new file mode 100644
index 00000000..6bd5e143
Binary files /dev/null and b/images/src/png/iowait_values.png differ
diff --git a/images/src/png/prometheus-alertmanager-metrics.png b/images/src/png/prometheus-alertmanager-metrics.png
new file mode 100644
index 00000000..5bfddbd9
Binary files /dev/null and b/images/src/png/prometheus-alertmanager-metrics.png differ
diff --git a/images/src/png/sap-infra-monitoring-deep_dive-promtail01.png b/images/src/png/sap-infra-monitoring-deep_dive-promtail01.png
new file mode 100755
index 00000000..6866fecd
Binary files /dev/null and b/images/src/png/sap-infra-monitoring-deep_dive-promtail01.png differ
diff --git a/images/src/png/sap_infra-monitoring-deep_dive-loki01.png b/images/src/png/sap_infra-monitoring-deep_dive-loki01.png
new file mode 100755
index 00000000..877a7d41
Binary files /dev/null and b/images/src/png/sap_infra-monitoring-deep_dive-loki01.png differ
diff --git a/xml/MAIN-SBP-SLE-OffLine-Upgrade-Local-Boot.xml b/xml/MAIN-SBP-SLE-OffLine-Upgrade-Local-Boot.xml
index 92c8015e..43c4907b 100644
--- a/xml/MAIN-SBP-SLE-OffLine-Upgrade-Local-Boot.xml
+++ b/xml/MAIN-SBP-SLE-OffLine-Upgrade-Local-Boot.xml
@@ -74,7 +74,7 @@
      </mediaobject>
    </cover>
 
-  <date>June 11, 2019</date>
+  <date>Oct 31, 2023</date>
 
   <abstract>
    <para>This guide shows how to perform an upgrade of a SUSE Linux Enterprise
@@ -335,6 +335,7 @@ menuentry 'SLES 15 Upgrade' {
         linux   /boot/linux.install  &lt;options&gt;
         echo    'Loading initial ramdisk ...'
         initrd  /boot/initrd.install
+}       
    </screen>
 
    <para>Keep the lines you do not need to change unchanged. They ensure that the boot loader loads