diff --git a/CHANGELOG.md b/CHANGELOG.md index 7972758f..72f99de6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Run a `containerdebug` process in the background of each Airflow container to collect debugging information ([#557]). - Aggregate emitted Kubernetes events on the CustomResources ([#571]). +- Add OPA support ([#573]). ### Changed @@ -14,6 +15,7 @@ [#557]: https://github.com/stackabletech/airflow-operator/pull/557 [#571]: https://github.com/stackabletech/airflow-operator/pull/571 [#572]: https://github.com/stackabletech/airflow-operator/pull/572 +[#573]: https://github.com/stackabletech/airflow-operator/pull/573 ## [24.11.1] - 2025-01-09 diff --git a/Cargo.lock b/Cargo.lock index ee5992e2..c7c3f953 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -2371,8 +2371,8 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.85.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.85.0#59506c6202778889a27b6ae8153457e60a49c68d" +version = "0.86.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.86.0#e5bfee596cc918b05f3e1d7e667c25951317cf31" dependencies = [ "chrono", "clap", @@ -2410,7 +2410,7 @@ dependencies = [ [[package]] name = "stackable-operator-derive" version = "0.3.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.85.0#59506c6202778889a27b6ae8153457e60a49c68d" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.86.0#e5bfee596cc918b05f3e1d7e667c25951317cf31" dependencies = [ "darling", "proc-macro2", @@ -2421,7 +2421,7 @@ dependencies = [ [[package]] name = "stackable-shared" version = "0.0.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.85.0#59506c6202778889a27b6ae8153457e60a49c68d" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.86.0#e5bfee596cc918b05f3e1d7e667c25951317cf31" dependencies = [ "kube", "semver", diff --git a/Cargo.nix b/Cargo.nix index 691d50bf..a27671d7 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -1558,7 +1558,7 @@ rec { "default" = [ "Debug" "Clone" "Copy" "PartialEq" "Eq" "PartialOrd" "Ord" "Hash" "Default" "Deref" "DerefMut" "Into" ]; "full" = [ "syn/full" ]; }; - resolvedDefaultFeatures = [ "Clone" "Debug" "Default" "Hash" "PartialEq" ]; + resolvedDefaultFeatures = [ "Clone" "Debug" "Default" "Eq" "Hash" "PartialEq" ]; }; "either" = rec { crateName = "either"; @@ -7391,13 +7391,13 @@ rec { }; "stackable-operator" = rec { crateName = "stackable-operator"; - version = "0.85.0"; + version = "0.86.0"; edition = "2021"; workspace_member = null; src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; - rev = "59506c6202778889a27b6ae8153457e60a49c68d"; - sha256 = "0rh476rmn5850yj85hq8znwmlfhd7l5bkxz0n5i9m4cddxhi2cl5"; + rev = "e5bfee596cc918b05f3e1d7e667c25951317cf31"; + sha256 = "04a866w46mbrsqv7iq9x6l2kh1bnykkmfnjwwfrqk6njn91arvf1"; }; libName = "stackable_operator"; authors = [ @@ -7430,7 +7430,7 @@ rec { name = "educe"; packageId = "educe"; usesDefaultFeatures = false; - features = [ "Clone" "Debug" "Default" "PartialEq" ]; + features = [ "Clone" "Debug" "Default" "PartialEq" "Eq" ]; } { name = "either"; @@ -7556,8 +7556,8 @@ rec { workspace_member = null; src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; - rev = "59506c6202778889a27b6ae8153457e60a49c68d"; - sha256 = "0rh476rmn5850yj85hq8znwmlfhd7l5bkxz0n5i9m4cddxhi2cl5"; + rev = "e5bfee596cc918b05f3e1d7e667c25951317cf31"; + sha256 = "04a866w46mbrsqv7iq9x6l2kh1bnykkmfnjwwfrqk6njn91arvf1"; }; procMacro = true; libName = "stackable_operator_derive"; @@ -7591,8 +7591,8 @@ rec { workspace_member = null; src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; - rev = "59506c6202778889a27b6ae8153457e60a49c68d"; - sha256 = "0rh476rmn5850yj85hq8znwmlfhd7l5bkxz0n5i9m4cddxhi2cl5"; + rev = "e5bfee596cc918b05f3e1d7e667c25951317cf31"; + sha256 = "04a866w46mbrsqv7iq9x6l2kh1bnykkmfnjwwfrqk6njn91arvf1"; }; libName = "stackable_shared"; authors = [ diff --git a/Cargo.toml b/Cargo.toml index f26d8fc6..4ded2aaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9" snafu = "0.8" -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.85.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.86.0" } strum = { version = "0.26", features = ["derive"] } tokio = { version = "1.40", features = ["full"] } tracing = "0.1" diff --git a/crate-hashes.json b/crate-hashes.json index 290d87f2..7edbbe57 100644 --- a/crate-hashes.json +++ b/crate-hashes.json @@ -1,6 +1,6 @@ { - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.85.0#stackable-operator-derive@0.3.1": "0rh476rmn5850yj85hq8znwmlfhd7l5bkxz0n5i9m4cddxhi2cl5", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.85.0#stackable-operator@0.85.0": "0rh476rmn5850yj85hq8znwmlfhd7l5bkxz0n5i9m4cddxhi2cl5", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.85.0#stackable-shared@0.0.1": "0rh476rmn5850yj85hq8znwmlfhd7l5bkxz0n5i9m4cddxhi2cl5", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.86.0#stackable-operator-derive@0.3.1": "04a866w46mbrsqv7iq9x6l2kh1bnykkmfnjwwfrqk6njn91arvf1", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.86.0#stackable-operator@0.86.0": "04a866w46mbrsqv7iq9x6l2kh1bnykkmfnjwwfrqk6njn91arvf1", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.86.0#stackable-shared@0.0.1": "04a866w46mbrsqv7iq9x6l2kh1bnykkmfnjwwfrqk6njn91arvf1", "git+https://github.com/stackabletech/product-config.git?tag=0.7.0#product-config@0.7.0": "0gjsm80g6r75pm3824dcyiz4ysq1ka4c1if6k1mjm9cnd5ym0gny" } \ No newline at end of file diff --git a/deploy/helm/airflow-operator/crds/crds.yaml b/deploy/helm/airflow-operator/crds/crds.yaml index a6125873..54fa5a93 100644 --- a/deploy/helm/airflow-operator/crds/crds.yaml +++ b/deploy/helm/airflow-operator/crds/crds.yaml @@ -492,6 +492,42 @@ spec: - authenticationClass type: object type: array + authorization: + description: Authorization options. Learn more in the [Airflow authorization usage guide](https://docs.stackable.tech/home/nightly/airflow/usage-guide/security#_authorization). + nullable: true + properties: + opa: + description: Configure the OPA stacklet [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) and the name of the Rego package containing your authorization rules. Consult the [OPA authorization documentation](https://docs.stackable.tech/home/nightly/concepts/opa) to learn how to deploy Rego authorization rules with OPA. + nullable: true + properties: + cache: + default: + entryTimeToLive: 30s + maxEntries: 10000 + description: Least Recently Used (LRU) cache with per-entry time-to-live (TTL) value. + properties: + entryTimeToLive: + default: 30s + description: Time to live per entry + type: string + maxEntries: + default: 10000 + description: Maximum number of entries in the cache; If this threshold is reached then the least recently used item is removed. + format: uint32 + minimum: 0.0 + type: integer + type: object + configMapName: + description: The [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) for the OPA stacklet that should be used for authorization requests. + type: string + package: + description: The name of the Rego package containing the Rego rules for the product. + nullable: true + type: string + required: + - configMapName + type: object + type: object credentialsSecret: description: The name of the Secret object containing the admin user credentials and database connection details. Read the [getting started guide first steps](https://docs.stackable.tech/home/nightly/airflow/getting_started/first_steps) to find out more. type: string diff --git a/docs/modules/airflow/pages/usage-guide/security.adoc b/docs/modules/airflow/pages/usage-guide/security.adoc index e09a5be2..91bfc18d 100644 --- a/docs/modules/airflow/pages/usage-guide/security.adoc +++ b/docs/modules/airflow/pages/usage-guide/security.adoc @@ -129,12 +129,18 @@ A minimum client configuration in Keycloak for this example looks like this: Further information for specifying an AuthenticationClass for an OIDC provider can be found at the xref:concepts:authentication.adoc#_oidc[concepts page]. == Authorization -The Airflow Webserver delegates the {airflow-access-control-docs}[handling of user access control] to https://flask-appbuilder.readthedocs.io/en/latest/security.html[Flask AppBuilder]. -=== Webinterface +The Airflow Webserver delegates the {airflow-access-control-docs}[handling of user access control] to the https://flask-appbuilder.readthedocs.io/en/latest/security.html[Flask AppBuilder]. +The AuthManager in the Flask AppBuilder can be configured to fetch the user roles from the authentication backend, e.g. LDAP. +Instead of using the integrated authorization, the Stackable Data Platform also provides an AuthManager which delegates the authorization requests to an xref:opa:index.adoc[Open Policy Agent (OPA)]. + +=== Integrated authorization + +==== Webinterface + You can view, add to, and assign the roles displayed in the Airflow Webserver UI to existing users. -=== LDAP +==== LDAP Airflow supports assigning {airflow-access-control-docs}[Roles] to users based on their LDAP group membership, though this is not yet supported by the Stackable operator. All the users logging in via LDAP get assigned to the same role which you can configure via the attribute `authenticationConfig.userRegistrationRole` on the AirflowCluster object: @@ -155,7 +161,7 @@ spec: <1> The reference to an AuthenticationClass called `ldap` <2> All users are assigned to the `Admin` role -=== OpenID Connect +==== OpenID Connect The mechanism for assigning roles to users described in the LDAP section also applies to OpenID Connect. Airflow supports assigning {airflow-access-control-docs}[Roles] to users based on their OpenID Connect scopes, though this is not yet supported by the Stackable operator. @@ -177,3 +183,247 @@ spec: ---- <1> All users are assigned to the `Admin` role + +=== Open Policy Agent + +Authorization with an Open Policy Agent can be enabled with the following cluster configuration: + +[source,yaml] +---- +apiVersion: airflow.stackable.tech/v1alpha1 +kind: AirflowCluster +metadata: + name: airflow-with-opa +spec: + clusterConfig: + authorization: + opa: + configMapName: opa # <1> + package: airflow # <2> + cache: # <3> + entryTimeToLive: 10s # <4> + maxEntries: 100000 # <5> +---- + +<1> The xref:concepts:service_discovery.adoc[service discovery ConfigMap] for the OPA instance containing the URL of the OPA API +<2> The Rego rule package with the authorization rules +<3> A cache for authorization requests to the Open Policy Agent to reduce the load on OPA and to bridge restarts of the OPA pods; + Defaults are used, if not set explicitly. +<4> Time to live per cached authorization request; + Defaults to 30 seconds; + Changes in the Rego rules may not be effective within the given duration. +<5> Maximum number of cached authorization requests in the cache; + Defaults to 10,000 entries; + If this limit is reached then the least recently used entry is removed and the metric `airflow_opa_cache_limit_reached` is increased by one. + The cache size should probably be increased if this metric is constantly raised. + +The Rego rule package defined in the configuration must contain specific rules which are true or false dependent on the input which differs slightly between the rules. +The following list contains the rule names as well as a specification of the possible input: + +* `is_authorized_configuration` ++ +Returns whether the user is authorized to perform a given action on configuration. ++ +[source,json,line-comment=%] +---- +"input": { + "method": "", % <1> + "details": { + "section": "" % <2> + }, + "user": { + "id": "", + "name": "" + } +} +---- +<1> One of "GET", "POST", "PUT", "DELETE" or "MENU" +<2> `null` if the action is performed on all configuration sections +* `is_authorized_connection` ++ +Returns whether the user is authorized to perform a given action on a connection. ++ +[source,json,line-comment=%] +---- +"input": { + "method": "", % <1> + "details": { + "conn_id": "" % <2> + }, + "user": { + "id": "", + "name": "" + } +} +---- +<1> One of "GET", "POST", "PUT", "DELETE" or "MENU" +<2> `null` if the action is performed on all connections +* `is_authorized_dag` ++ +Returns whether the user is authorized to perform a given action on a DAG. ++ +[source,json,line-comment=%] +---- +"input": { + "method": "", % <1> + "access_entity": "", % <2> + "details": { + "dag_id": "" % <3> + }, + "user": { + "id": "", + "name": "" + } +} +---- +<1> One of "GET", "POST", "PUT", "DELETE" or "MENU" +<2> The kind of DAG information the authorization request is about. + If not provided, the authorization request is about the DAG itself. + One of "AUDIT_LOG", "CODE", "DEPENDENCIES", "RUN", "SLA_MISS", "TASK", "TASK_INSTANCE", "TASK_RESCHEDULE", "TASK_LOGS", "WARNING" or "XCOM" +<3> `null` if the action is performed on all DAGs +* `is_authorized_dataset` ++ +Returns whether the user is authorized to perform a given action on a dataset. ++ +[source,json,line-comment=%] +---- +"input": { + "method": "", % <1> + "details": { + "uri": "" % <2> + }, + "user": { + "id": "", + "name": "" + } +} +---- +<1> One of "GET", "POST", "PUT", "DELETE" or "MENU" +<2> `null` if the action is performed on all datasets +* `is_authorized_pool` ++ +Returns whether the user is authorized to perform a given action on a pool. ++ +[source,json,line-comment=%] +---- +"input": { + "method": "", % <1> + "details": { + "name": "" % <2> + }, + "user": { + "id": "", + "name": "" + } +} +---- +<1> One of "GET", "POST", "PUT", "DELETE" or "MENU" +<2> `null` if the action is performed on all pools +* `is_authorized_variable` ++ +Returns whether the user is authorized to perform a given action on a variable. ++ +[source,json,line-comment=%] +---- +"input": { + "method": "", % <1> + "details": { + "key": "" % <2> + }, + "user": { + "id": "", + "name": "" + } +} +---- +<1> One of "GET", "POST", "PUT", "DELETE" or "MENU" +<2> `key` if the action is performed on all variables +* `is_authorized_view` ++ +Returns whether the user is authorized to access a read-only state of the installation. ++ +[source,json,line-comment=%] +---- +"input": { + "access_view": "", % <1> + "user": { + "id": "", + "name": "" + } +} +---- +<1> The specific read-only view/state the authorization request is about. + One of "CLUSTER_ACTIVITY", "DOCS", "IMPORT_ERRORS", "JOBS", "PLUGINS", "PROVIDERS", "TRIGGERS" or "WEBSITE". +* `is_authorized_custom_view` ++ +Returns whether the user is authorized to perform a given action on a custom view. ++ +A custom view can be a view defined as part of the auth manager. +This view is then only available when the auth manager is used as part of the environment. +It can also be a view defined as part of a plugin defined by a user. ++ +[source,json,line-comment=%] +---- +"input": { + "method": "", % <1> + "resource_name": "", + "user": { + "id": "", + "name": "" + } +} +---- +<1> Usually one of "GET", "POST", "PUT", "DELETE" or "MENU", but the method can also be a string if the action has been defined in a plugin. +In that case, the action can be anything. + +The roles defined in Airflow are not used when the authorization is performed by OPA. +Therefore, it makes sense to hide the menu entry "List Roles" simply by not allowing access to the custom view "List Roles". + +A ConfigMap with the Rego rules could look as follows: + +[source,yaml] +---- +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: airflow-rules + labels: + opa.stackable.tech/bundle: "true" +data: + airflow.rego: | + + # The Rego rule package which must be defined in the cluster + # configuration (spec.clusterConfig.authorization.opa.package). + package airflow + + import rego.v1 + + default is_authorized_configuration := false + default is_authorized_connection := false + default is_authorized_dag := false + default is_authorized_dataset := false + default is_authorized_pool := false + default is_authorized_variable := false + default is_authorized_view := false + default is_authorized_custom_view := false + + # Allow everybody to get the DAG runs of example_trigger_target_dag + is_authorized_dag if { + input.method == "GET" + input.access_entity == "RUN" + input.details.id == "example_trigger_target_dag" + } + + # Allow the administrator to access all custom views but hide the + # menu "List Roles" and disallow the access to the Roles resource. + is_authorized_custom_view if { + input.resource_name != "List Roles" + input.resource_name != "Roles" + + input.user.name == "admin" + } +---- + +The xref:opa:usage-guide/user-info-fetcher.adoc[User Info Fetcher] can be used to fetch the groups in which the user is a member of. +The Rego rule can then grant access based on the group membership. diff --git a/rust/crd/src/authorization.rs b/rust/crd/src/authorization.rs new file mode 100644 index 00000000..da8f2967 --- /dev/null +++ b/rust/crd/src/authorization.rs @@ -0,0 +1,49 @@ +use stackable_operator::{client::Client, commons::opa::OpaApiVersion, time::Duration}; + +use crate::{AirflowAuthorization, AirflowCluster, AirflowOpaConfig}; + +pub struct AirflowAuthorizationResolved { + pub opa: Option, +} + +impl AirflowAuthorizationResolved { + pub async fn from_authorization_config( + client: &Client, + airflow: &AirflowCluster, + authorization: &Option, + ) -> Result { + let opa = if let Some(AirflowAuthorization { + opa: Some(opa_config), + }) = authorization + { + Some(OpaConfigResolved::from_opa_config(client, airflow, opa_config).await?) + } else { + None + }; + Ok(AirflowAuthorizationResolved { opa }) + } +} + +pub struct OpaConfigResolved { + pub connection_string: String, + pub cache_entry_time_to_live: Duration, + pub cache_max_entries: u32, +} + +impl OpaConfigResolved { + pub async fn from_opa_config( + client: &Client, + airflow: &AirflowCluster, + airflow_opa_config: &AirflowOpaConfig, + ) -> Result { + let connection_string = airflow_opa_config + .opa + .full_document_url_from_config_map(client, airflow, None, OpaApiVersion::V1) + .await?; + Ok(OpaConfigResolved { + connection_string, + cache_entry_time_to_live: airflow_opa_config.cache.entry_time_to_live, + cache_max_entries: airflow_opa_config.cache.max_entries, + }) + } +} diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 118eca8d..e6a642b4 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -10,7 +10,9 @@ use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ commons::{ affinity::StackableAffinity, + cache::UserInformationCache, cluster_operation::ClusterOperation, + opa::OpaConfig, product_image_selection::ProductImage, resources::{ CpuLimitsFragment, MemoryLimitsFragment, NoRuntimeLimits, NoRuntimeLimitsFragment, @@ -52,6 +54,7 @@ use crate::{ pub mod affinity; pub mod authentication; +pub mod authorization; pub mod git_sync; pub const AIRFLOW_UID: i64 = 1000; @@ -118,6 +121,10 @@ pub enum AirflowConfigOptions { AuthLdapTlsKeyfile, AuthLdapTlsCacertfile, AuthLdapAllowSelfSigned, + AuthOpaCacheMaxsize, + AuthOpaCacheTtlInSec, + AuthOpaRequestUrl, + AuthOpaRequestTimeout, } impl FlaskAppConfigOptions for AirflowConfigOptions { @@ -143,6 +150,10 @@ impl FlaskAppConfigOptions for AirflowConfigOptions { AirflowConfigOptions::AuthLdapTlsKeyfile => PythonType::StringLiteral, AirflowConfigOptions::AuthLdapTlsCacertfile => PythonType::StringLiteral, AirflowConfigOptions::AuthLdapAllowSelfSigned => PythonType::BoolLiteral, + AirflowConfigOptions::AuthOpaCacheMaxsize => PythonType::IntLiteral, + AirflowConfigOptions::AuthOpaCacheTtlInSec => PythonType::IntLiteral, + AirflowConfigOptions::AuthOpaRequestUrl => PythonType::StringLiteral, + AirflowConfigOptions::AuthOpaRequestTimeout => PythonType::IntLiteral, } } } @@ -200,6 +211,11 @@ pub struct AirflowClusterConfig { #[serde(default)] pub authentication: Vec, + /// Authorization options. + /// Learn more in the [Airflow authorization usage guide](DOCS_BASE_URL_PLACEHOLDER/airflow/usage-guide/security#_authorization). + #[serde(skip_serializing_if = "Option::is_none")] + pub authorization: Option, + /// The name of the Secret object containing the admin user credentials and database connection details. /// Read the /// [getting started guide first steps](DOCS_BASE_URL_PLACEHOLDER/airflow/getting_started/first_steps) @@ -253,6 +269,22 @@ pub struct AirflowClusterConfig { pub volume_mounts: Vec, } +#[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct AirflowAuthorization { + #[serde(skip_serializing_if = "Option::is_none")] + pub opa: Option, +} + +#[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct AirflowOpaConfig { + #[serde(flatten)] + pub opa: OpaConfig, + #[serde(default)] + pub cache: UserInformationCache, +} + // TODO: Temporary solution until listener-operator is finished #[derive(Clone, Debug, Default, Display, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "PascalCase")] diff --git a/rust/operator-binary/src/airflow_controller.rs b/rust/operator-binary/src/airflow_controller.rs index 2744639b..43559c9a 100644 --- a/rust/operator-binary/src/airflow_controller.rs +++ b/rust/operator-binary/src/airflow_controller.rs @@ -14,10 +14,12 @@ use product_config::{ }; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_airflow_crd::{ - authentication::AirflowAuthenticationClassResolved, git_sync::GitSync, -}; -use stackable_airflow_crd::{ - authentication::AirflowClientAuthenticationDetailsResolved, build_recommended_labels, + authentication::{ + AirflowAuthenticationClassResolved, AirflowClientAuthenticationDetailsResolved, + }, + authorization::AirflowAuthorizationResolved, + build_recommended_labels, + git_sync::GitSync, AirflowCluster, AirflowClusterStatus, AirflowConfig, AirflowConfigOptions, AirflowExecutor, AirflowRole, Container, ExecutorConfig, ExecutorConfigFragment, AIRFLOW_CONFIG_FILENAME, AIRFLOW_UID, APP_NAME, CONFIG_PATH, GIT_CONTENT, GIT_ROOT, GIT_SYNC_NAME, LOG_CONFIG_DIR, @@ -363,6 +365,14 @@ pub async fn reconcile_airflow( .await .context(InvalidAuthenticationConfigSnafu)?; + let authorization_config = AirflowAuthorizationResolved::from_authorization_config( + client, + airflow, + &airflow.spec.cluster_config.authorization, + ) + .await + .unwrap(); + let mut roles = HashMap::new(); // if the kubernetes executor is specified there will be no worker role as the pods @@ -444,6 +454,7 @@ pub async fn reconcile_airflow( common_configuration, &resolved_product_image, &authentication_config, + &authorization_config, &vector_aggregator_address, &mut cluster_resources, client, @@ -493,6 +504,7 @@ pub async fn reconcile_airflow( &rolegroup, rolegroup_config, &authentication_config, + &authorization_config, &rbac_sa, &merged_airflow_config, airflow_executor, @@ -513,6 +525,7 @@ pub async fn reconcile_airflow( &rolegroup, rolegroup_config, &authentication_config, + &authorization_config, &merged_airflow_config.logging, vector_aggregator_address.as_deref(), &Container::Airflow, @@ -562,6 +575,7 @@ async fn build_executor_template( common_config: &CommonConfiguration, resolved_product_image: &ResolvedProductImage, authentication_config: &AirflowClientAuthenticationDetailsResolved, + authorization_config: &AirflowAuthorizationResolved, vector_aggregator_address: &Option, cluster_resources: &mut ClusterResources, client: &stackable_operator::client::Client, @@ -582,6 +596,7 @@ async fn build_executor_template( &rolegroup, &HashMap::new(), authentication_config, + authorization_config, &merged_executor_config.logging, vector_aggregator_address.as_deref(), &Container::Base, @@ -679,6 +694,7 @@ fn build_rolegroup_config_map( rolegroup: &RoleGroupRef, rolegroup_config: &HashMap>, authentication_config: &AirflowClientAuthenticationDetailsResolved, + authorization_config: &AirflowAuthorizationResolved, logging: &Logging, vector_aggregator_address: Option<&str>, container: &Container, @@ -688,7 +704,8 @@ fn build_rolegroup_config_map( .cloned() .unwrap_or_default(); - config::add_airflow_config(&mut config, authentication_config).context(ConstructConfigSnafu)?; + config::add_airflow_config(&mut config, authentication_config, authorization_config) + .context(ConstructConfigSnafu)?; let mut config_file = Vec::new(); @@ -841,6 +858,7 @@ fn build_server_rolegroup_statefulset( rolegroup_ref: &RoleGroupRef, rolegroup_config: &HashMap>, authentication_config: &AirflowClientAuthenticationDetailsResolved, + authorization_config: &AirflowAuthorizationResolved, service_account: &ServiceAccount, merged_airflow_config: &AirflowConfig, executor: &AirflowExecutor, @@ -908,6 +926,7 @@ fn build_server_rolegroup_statefulset( rolegroup_config, executor, authentication_config, + authorization_config, )); let volume_mounts = airflow.volume_mounts(); diff --git a/rust/operator-binary/src/config.rs b/rust/operator-binary/src/config.rs index ddc741fe..6edb73a3 100644 --- a/rust/operator-binary/src/config.rs +++ b/rust/operator-binary/src/config.rs @@ -5,6 +5,7 @@ use stackable_airflow_crd::{ AirflowAuthenticationClassResolved, AirflowClientAuthenticationDetailsResolved, FlaskRolesSyncMoment, DEFAULT_OIDC_PROVIDER, }, + authorization::{AirflowAuthorizationResolved, OpaConfigResolved}, AirflowConfigOptions, }; use stackable_operator::commons::authentication::{ldap::AuthenticationProvider, oidc}; @@ -41,6 +42,7 @@ pub enum Error { pub fn add_airflow_config( config: &mut BTreeMap, authentication_config: &AirflowClientAuthenticationDetailsResolved, + authorization_config: &AirflowAuthorizationResolved, ) -> Result<()> { if !config.contains_key(&*AirflowConfigOptions::AuthType.to_string()) { config.insert( @@ -51,6 +53,7 @@ pub fn add_airflow_config( } append_authentication_config(config, authentication_config)?; + append_authorization_config(config, authorization_config)?; Ok(()) } @@ -271,16 +274,53 @@ fn append_oidc_config( Ok(()) } +fn append_authorization_config( + config: &mut BTreeMap, + authorization_config: &AirflowAuthorizationResolved, +) -> Result<(), Error> { + if let Some(opa_config) = &authorization_config.opa { + append_opa_config(config, opa_config)?; + } + + Ok(()) +} + +fn append_opa_config( + config: &mut BTreeMap, + opa_config: &OpaConfigResolved, +) -> Result<(), Error> { + config.insert( + AirflowConfigOptions::AuthOpaRequestUrl.to_string(), + opa_config.connection_string.to_owned(), + ); + config.insert( + AirflowConfigOptions::AuthOpaCacheTtlInSec.to_string(), + opa_config.cache_entry_time_to_live.as_secs().to_string(), + ); + config.insert( + AirflowConfigOptions::AuthOpaCacheMaxsize.to_string(), + opa_config.cache_max_entries.to_string(), + ); + + Ok(()) +} + #[cfg(test)] mod tests { use crate::config::add_airflow_config; use indoc::formatdoc; use rstest::rstest; - use stackable_airflow_crd::authentication::{ - default_sync_roles_at, default_user_registration, AirflowAuthenticationClassResolved, - AirflowClientAuthenticationDetailsResolved, FlaskRolesSyncMoment, + use stackable_airflow_crd::{ + authentication::{ + default_sync_roles_at, default_user_registration, AirflowAuthenticationClassResolved, + AirflowClientAuthenticationDetailsResolved, FlaskRolesSyncMoment, + }, + authorization::{AirflowAuthorizationResolved, OpaConfigResolved}, + }; + use stackable_operator::{ + commons::authentication::{ldap, oidc}, + time::Duration, }; - use stackable_operator::commons::authentication::{ldap, oidc}; use std::collections::BTreeMap; #[test] @@ -292,8 +332,10 @@ mod tests { sync_roles_at: FlaskRolesSyncMoment::Registration, }; + let authorization_config = AirflowAuthorizationResolved { opa: None }; + let mut result = BTreeMap::new(); - add_airflow_config(&mut result, &authentication_config).expect("Ok"); + add_airflow_config(&mut result, &authentication_config, &authorization_config).expect("Ok"); assert_eq!( BTreeMap::from([ @@ -335,8 +377,10 @@ mod tests { sync_roles_at: FlaskRolesSyncMoment::Registration, }; + let authorization_config = AirflowAuthorizationResolved { opa: None }; + let mut result = BTreeMap::new(); - add_airflow_config(&mut result, &authentication_config).expect("Ok"); + add_airflow_config(&mut result, &authentication_config, &authorization_config).expect("Ok"); assert_eq!(BTreeMap::from([ ("AUTH_LDAP_ALLOW_SELF_SIGNED".into(), "false".into()), @@ -419,8 +463,10 @@ mod tests { sync_roles_at: default_sync_roles_at(), }; + let authorization_config = AirflowAuthorizationResolved { opa: None }; + let mut result = BTreeMap::new(); - add_airflow_config(&mut result, &authentication_config).expect("Ok"); + add_airflow_config(&mut result, &authentication_config, &authorization_config).expect("Ok"); assert_eq!( BTreeMap::from([ @@ -465,4 +511,41 @@ mod tests { result ); } + + #[test] + fn test_opa_config() { + let authentication_config = AirflowClientAuthenticationDetailsResolved { + authentication_classes_resolved: vec![], + user_registration: true, + user_registration_role: "User".to_string(), + sync_roles_at: FlaskRolesSyncMoment::Registration, + }; + + let authorization_config = AirflowAuthorizationResolved { + opa: Some(OpaConfigResolved { + connection_string: "http://opa:8081/v1/data/airflow".to_string(), + cache_entry_time_to_live: Duration::from_secs(30), + cache_max_entries: 1000, + }), + }; + + let mut result = BTreeMap::new(); + add_airflow_config(&mut result, &authentication_config, &authorization_config).expect("Ok"); + + assert_eq!( + BTreeMap::from([ + ("AUTH_OPA_CACHE_MAXSIZE".into(), "1000".into()), + ("AUTH_OPA_CACHE_TTL_IN_SEC".into(), "30".into()), + ( + "AUTH_OPA_REQUEST_URL".into(), + "http://opa:8081/v1/data/airflow".into() + ), + ("AUTH_ROLES_SYNC_AT_LOGIN".into(), "false".into()), + ("AUTH_TYPE".into(), "AUTH_DB".into()), + ("AUTH_USER_REGISTRATION".into(), "true".into()), + ("AUTH_USER_REGISTRATION_ROLE".into(), "User".into()) + ]), + result + ); + } } diff --git a/rust/operator-binary/src/env_vars.rs b/rust/operator-binary/src/env_vars.rs index 394183e3..520f4bc2 100644 --- a/rust/operator-binary/src/env_vars.rs +++ b/rust/operator-binary/src/env_vars.rs @@ -1,20 +1,21 @@ use crate::util::env_var_from_secret; use product_config::types::PropertyNameKind; -use stackable_airflow_crd::authentication::{ - AirflowAuthenticationClassResolved, AirflowClientAuthenticationDetailsResolved, -}; -use stackable_airflow_crd::git_sync::GitSync; use stackable_airflow_crd::{ - AirflowCluster, AirflowConfig, AirflowExecutor, AirflowRole, ExecutorConfig, LOG_CONFIG_DIR, - STACKABLE_LOG_DIR, + authentication::{ + AirflowAuthenticationClassResolved, AirflowClientAuthenticationDetailsResolved, + }, + authorization::AirflowAuthorizationResolved, + git_sync::GitSync, + AirflowCluster, AirflowConfig, AirflowExecutor, AirflowRole, ExecutorConfig, GIT_LINK, + GIT_SYNC_DIR, LOG_CONFIG_DIR, STACKABLE_LOG_DIR, TEMPLATE_LOCATION, TEMPLATE_NAME, +}; +use stackable_operator::{ + commons::authentication::oidc, k8s_openapi::api::core::v1::EnvVar, kube::ResourceExt, + product_logging::framework::create_vector_shutdown_file_command, }; -use stackable_airflow_crd::{GIT_LINK, GIT_SYNC_DIR, TEMPLATE_LOCATION, TEMPLATE_NAME}; -use stackable_operator::commons::authentication::oidc; -use stackable_operator::k8s_openapi::api::core::v1::EnvVar; -use stackable_operator::kube::ResourceExt; -use stackable_operator::product_logging::framework::create_vector_shutdown_file_command; use std::collections::{BTreeMap, BTreeSet, HashMap}; +const AIRFLOW__CORE__AUTH_MANAGER: &str = "AIRFLOW__CORE__AUTH_MANAGER"; const AIRFLOW__LOGGING__LOGGING_CONFIG_CLASS: &str = "AIRFLOW__LOGGING__LOGGING_CONFIG_CLASS"; const AIRFLOW__METRICS__STATSD_ON: &str = "AIRFLOW__METRICS__STATSD_ON"; const AIRFLOW__METRICS__STATSD_HOST: &str = "AIRFLOW__METRICS__STATSD_HOST"; @@ -49,6 +50,7 @@ pub fn build_airflow_statefulset_envs( rolegroup_config: &HashMap>, executor: &AirflowExecutor, auth_config: &AirflowClientAuthenticationDetailsResolved, + authorization_config: &AirflowAuthorizationResolved, ) -> Vec { let mut env: BTreeMap = BTreeMap::new(); @@ -198,8 +200,9 @@ pub fn build_airflow_statefulset_envs( ); } AirflowRole::Webserver => { - let auth_vars = authentication_env_vars(auth_config); - env.extend(auth_vars.into_iter().map(|var| (var.name.to_owned(), var))); + let mut vars = authentication_env_vars(auth_config); + vars.extend(authorization_env_vars(authorization_config)); + env.extend(vars.into_iter().map(|var| (var.name.to_owned(), var))); } _ => {} } @@ -232,7 +235,7 @@ pub fn build_airflow_statefulset_envs( } fn get_dags_folder(airflow: &AirflowCluster) -> String { - return if let Some(GitSync { + if let Some(GitSync { git_folder: Some(dags_folder), .. }) = airflow.git_sync() @@ -244,7 +247,7 @@ fn get_dags_folder(airflow: &AirflowCluster) -> String { // /stackable/airflow is used instead of $AIRFLOW_HOME. // See https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html#dags-folder "/stackable/airflow/dags".to_string() - }; + } } // This set of environment variables is a standard set that is not dependent on any @@ -501,3 +504,17 @@ fn authentication_env_vars( .flat_map(oidc::AuthenticationProvider::client_credentials_env_var_mounts) .collect() } + +fn authorization_env_vars(authorization_config: &AirflowAuthorizationResolved) -> Vec { + let mut env = vec![]; + + if authorization_config.opa.is_some() { + env.push(EnvVar { + name: AIRFLOW__CORE__AUTH_MANAGER.into(), + value: Some("opa_auth_manager.opa_fab_auth_manager.OpaFabAuthManager".to_string()), + ..Default::default() + }); + } + + env +} diff --git a/tests/release.yaml b/tests/release.yaml index ff6668e5..c94e7bcf 100644 --- a/tests/release.yaml +++ b/tests/release.yaml @@ -14,5 +14,7 @@ releases: operatorVersion: 0.0.0-dev airflow: operatorVersion: 0.0.0-dev + opa: + operatorVersion: 0.0.0-dev spark-k8s: operatorVersion: 0.0.0-dev diff --git a/tests/templates/kuttl/commons/metrics.py b/tests/templates/kuttl/commons/metrics.py index 2fd358cb..e0fb5eff 100755 --- a/tests/templates/kuttl/commons/metrics.py +++ b/tests/templates/kuttl/commons/metrics.py @@ -53,7 +53,11 @@ def assert_metric(role, role_group, metric): # (disable line-break flake checks) if ( (assert_metric("scheduler", role_group, "airflow_scheduler_heartbeat")) - and (assert_metric("webserver", role_group, "airflow_task_instance_created_BashOperator")) # noqa: W503, W504 + and ( + assert_metric( + "webserver", role_group, "airflow_task_instance_created_BashOperator" + ) + ) # noqa: W503, W504 and ( assert_metric( "scheduler", diff --git a/tests/templates/kuttl/opa/10-patch-ns.yaml.j2 b/tests/templates/kuttl/opa/10-patch-ns.yaml.j2 new file mode 100644 index 00000000..67185acf --- /dev/null +++ b/tests/templates/kuttl/opa/10-patch-ns.yaml.j2 @@ -0,0 +1,9 @@ +{% if test_scenario['values']['openshift'] == 'true' %} +# see https://github.com/stackabletech/issues/issues/566 +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: kubectl patch namespace $NAMESPACE -p '{"metadata":{"labels":{"pod-security.kubernetes.io/enforce":"privileged"}}}' + timeout: 120 +{% endif %} diff --git a/tests/templates/kuttl/opa/11-assert.yaml b/tests/templates/kuttl/opa/11-assert.yaml new file mode 100644 index 00000000..319e927a --- /dev/null +++ b/tests/templates/kuttl/opa/11-assert.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: test-airflow-postgresql +timeout: 480 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: airflow-postgresql +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/opa/11-install-postgresql.yaml b/tests/templates/kuttl/opa/11-install-postgresql.yaml new file mode 100644 index 00000000..0c333b31 --- /dev/null +++ b/tests/templates/kuttl/opa/11-install-postgresql.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: >- + helm install airflow-postgresql + --namespace $NAMESPACE + --version 12.5.6 + --values 11_helm-bitnami-postgresql-values.yaml + --repo https://charts.bitnami.com/bitnami postgresql + --wait + timeout: 600 diff --git a/tests/templates/kuttl/opa/11_helm-bitnami-postgresql-values.yaml.j2 b/tests/templates/kuttl/opa/11_helm-bitnami-postgresql-values.yaml.j2 new file mode 100644 index 00000000..951804d6 --- /dev/null +++ b/tests/templates/kuttl/opa/11_helm-bitnami-postgresql-values.yaml.j2 @@ -0,0 +1,31 @@ +--- +volumePermissions: + enabled: false + securityContext: + runAsUser: auto + +primary: + podSecurityContext: +{% if test_scenario['values']['openshift'] == 'true' %} + enabled: false +{% else %} + enabled: true +{% endif %} + containerSecurityContext: + enabled: false + resources: + requests: + memory: "128Mi" + cpu: "512m" + limits: + memory: "128Mi" + cpu: "1" + +shmVolume: + chmod: + enabled: false + +auth: + username: airflow + password: airflow + database: airflow diff --git a/tests/templates/kuttl/opa/12-assert.yaml.j2 b/tests/templates/kuttl/opa/12-assert.yaml.j2 new file mode 100644 index 00000000..50b1d4c3 --- /dev/null +++ b/tests/templates/kuttl/opa/12-assert.yaml.j2 @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +{% endif %} diff --git a/tests/templates/kuttl/opa/12-install-vector-aggregator-discovery-configmap.yaml.j2 b/tests/templates/kuttl/opa/12-install-vector-aggregator-discovery-configmap.yaml.j2 new file mode 100644 index 00000000..2d6a0df5 --- /dev/null +++ b/tests/templates/kuttl/opa/12-install-vector-aggregator-discovery-configmap.yaml.j2 @@ -0,0 +1,9 @@ +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +data: + ADDRESS: {{ lookup('env', 'VECTOR_AGGREGATOR') }} +{% endif %} diff --git a/tests/templates/kuttl/opa/20-assert.yaml b/tests/templates/kuttl/opa/20-assert.yaml new file mode 100644 index 00000000..e868cdaf --- /dev/null +++ b/tests/templates/kuttl/opa/20-assert.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE rollout status daemonset opa-server-default --timeout 300s diff --git a/tests/templates/kuttl/opa/20-install-opa.yaml.j2 b/tests/templates/kuttl/opa/20-install-opa.yaml.j2 new file mode 100644 index 00000000..c9beec62 --- /dev/null +++ b/tests/templates/kuttl/opa/20-install-opa.yaml.j2 @@ -0,0 +1,29 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +metadata: + name: install-opa +--- +apiVersion: opa.stackable.tech/v1alpha1 +kind: OpaCluster +metadata: + name: opa +spec: + image: + productVersion: "{{ test_scenario['values']['opa-latest'] }}" + pullPolicy: IfNotPresent + clusterConfig: +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + servers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + containers: + opa: + loggers: + decision: + level: INFO + roleGroups: + default: {} diff --git a/tests/templates/kuttl/opa/30-assert.yaml b/tests/templates/kuttl/opa/30-assert.yaml new file mode 100644 index 00000000..ad3c8974 --- /dev/null +++ b/tests/templates/kuttl/opa/30-assert.yaml @@ -0,0 +1,28 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: install-airflow +timeout: 1200 +commands: + - script: > + kubectl --namespace $NAMESPACE + wait --for=condition=available=true + airflowclusters.airflow.stackable.tech/airflow + --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: airflow-webserver-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: airflow-scheduler-default +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/opa/30-install-airflow.yaml.j2 b/tests/templates/kuttl/opa/30-install-airflow.yaml.j2 new file mode 100644 index 00000000..6867c5dc --- /dev/null +++ b/tests/templates/kuttl/opa/30-install-airflow.yaml.j2 @@ -0,0 +1,68 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +metadata: + name: install-airflow +--- +apiVersion: v1 +kind: Secret +metadata: + name: airflow-credentials +type: Opaque +stringData: + adminUser.username: airflow + adminUser.firstname: Airflow + adminUser.lastname: Admin + adminUser.email: airflow@airflow.com + adminUser.password: airflow + connections.secretKey: thisISaSECRET_1234 + connections.sqlalchemyDatabaseUri: postgresql+psycopg2://airflow:airflow@airflow-postgresql/airflow +--- +apiVersion: airflow.stackable.tech/v1alpha1 +kind: AirflowCluster +metadata: + name: airflow +spec: + image: +{% if test_scenario['values']['airflow'].find(",") > 0 %} + custom: "{{ test_scenario['values']['airflow'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['airflow'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['airflow'] }}" +{% endif %} + pullPolicy: IfNotPresent + clusterConfig: + authorization: + opa: + configMapName: opa + package: airflow + cache: + entryTimeToLive: 5s + maxEntries: 10 + credentialsSecret: airflow-credentials + exposeConfig: true + loadExamples: true +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + webservers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + configOverrides: + webserver_config.py: + WTF_CSRF_ENABLED: "False" # Allow "POST /login/" without CSRF token + roleGroups: + default: + replicas: 1 + kubernetesExecutors: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + schedulers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 diff --git a/tests/templates/kuttl/opa/31-opa-rules.yaml b/tests/templates/kuttl/opa/31-opa-rules.yaml new file mode 100644 index 00000000..84f06ce1 --- /dev/null +++ b/tests/templates/kuttl/opa/31-opa-rules.yaml @@ -0,0 +1,168 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: airflow-rules + labels: + opa.stackable.tech/bundle: "true" +data: + airflow.rego: | + package airflow + + import rego.v1 + + default is_authorized_configuration := false + default is_authorized_connection := false + default is_authorized_dag := false + default is_authorized_dataset := false + default is_authorized_pool := false + default is_authorized_variable := false + default is_authorized_view := false + default is_authorized_custom_view := false + + # Allow the user "airflow" to create test users + # POST /auth/fab/v1/users + is_authorized_custom_view if { + input.method == "POST" + input.resource_name == "Users" + + input.user.id == 1 + input.user.name == "airflow" + } + + # GET /api/v1/config + is_authorized_configuration if { + input.method == "GET" + input.details.section == null + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/config?section=core + is_authorized_configuration if { + input.method == "GET" + input.details.section == "core" + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/connections + is_authorized_connection if { + input.method == "GET" + input.details.conn_id == null + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/connections/postgres_default + is_authorized_connection if { + input.method == "GET" + input.details.conn_id == "postgres_default" + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/dags/example_trigger_target_dag + is_authorized_dag if { + input.method == "GET" + input.access_entity == null + input.details.id == "example_trigger_target_dag" + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/dags/~/dagRuns + is_authorized_dag if { + input.method == "GET" + input.access_entity == "RUN" + input.details.id == null + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/dags/example_trigger_target_dag/dagRuns + is_authorized_dag if { + input.method == "GET" + input.access_entity == "RUN" + input.details.id == "example_trigger_target_dag" + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/datasets + is_authorized_dataset if { + input.method == "GET" + input.details.uri == null + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/datasets/s3%3A%2F%2Fbucket%2Fmy-task + is_authorized_dataset if { + input.method == "GET" + input.details.uri == "s3://bucket/my-task" + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/pools + is_authorized_pool if { + input.method == "GET" + input.details.name == null + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/pools/default_pool + is_authorized_pool if { + input.method == "GET" + input.details.name == "default_pool" + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # POST /api/v1/variables + is_authorized_variable if { + input.method == "POST" + input.details.key == null + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /api/v1/variables/myVar + is_authorized_variable if { + input.method == "GET" + input.details.key == "myVar" + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # GET /home + is_authorized_view if { + input.access_view == "WEBSITE" + + input.user.id == 2 + input.user.name == "jane.doe" + } + + # PATCH /auth/fab/v1/users/jane.doe + is_authorized_custom_view if { + input.method == "PUT" + input.resource_name == "Users" + + input.user.id == 2 + input.user.name == "jane.doe" + } diff --git a/tests/templates/kuttl/opa/40-assert.yaml b/tests/templates/kuttl/opa/40-assert.yaml new file mode 100644 index 00000000..a5ad1c45 --- /dev/null +++ b/tests/templates/kuttl/opa/40-assert.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: install-test-container +timeout: 300 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-runner +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/opa/40-install-test-container.yaml.j2 b/tests/templates/kuttl/opa/40-install-test-container.yaml.j2 new file mode 100644 index 00000000..db06d421 --- /dev/null +++ b/tests/templates/kuttl/opa/40-install-test-container.yaml.j2 @@ -0,0 +1,80 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: test-runner +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: test-runner +{% if test_scenario['values']['openshift'] == 'true' %} +rules: +- apiGroups: ["security.openshift.io"] + resources: ["securitycontextconstraints"] + resourceNames: ["privileged"] + verbs: ["use"] +{% endif %} +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: test-runner +subjects: + - kind: ServiceAccount + name: test-runner +roleRef: + kind: Role + name: test-runner + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +metadata: + name: install-test-container +timeout: 300 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-runner + labels: + app: test-runner +spec: + replicas: 1 + selector: + matchLabels: + app: test-runner + template: + metadata: + labels: + app: test-runner + spec: + serviceAccountName: test-runner + securityContext: + fsGroup: 1000 + containers: + - name: test-runner + image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable0.0.0-dev + stdin: true + tty: true + resources: + requests: + memory: "128Mi" + cpu: "512m" + limits: + memory: "128Mi" + cpu: "1" + volumeMounts: + - name: tls + mountPath: /stackable/tls + env: + - name: REQUESTS_CA_BUNDLE + value: /stackable/tls/ca.crt + volumes: + - name: tls + csi: + driver: secrets.stackable.tech + volumeAttributes: + secrets.stackable.tech/class: tls + secrets.stackable.tech/scope: pod diff --git a/tests/templates/kuttl/opa/41-assert.yaml b/tests/templates/kuttl/opa/41-assert.yaml new file mode 100644 index 00000000..36367d56 --- /dev/null +++ b/tests/templates/kuttl/opa/41-assert.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: login +timeout: 300 +commands: + - script: > + kubectl exec -n $NAMESPACE test-runner-0 -- + python /stackable/check-authorization.py diff --git a/tests/templates/kuttl/opa/41-check-authorization.yaml b/tests/templates/kuttl/opa/41-check-authorization.yaml new file mode 100644 index 00000000..f2f392be --- /dev/null +++ b/tests/templates/kuttl/opa/41-check-authorization.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +metadata: + name: login +commands: + - script: > + kubectl cp + 41_check-authorization.py + $NAMESPACE/test-runner-0:/stackable/check-authorization.py diff --git a/tests/templates/kuttl/opa/41_check-authorization.py b/tests/templates/kuttl/opa/41_check-authorization.py new file mode 100644 index 00000000..576bcbb9 --- /dev/null +++ b/tests/templates/kuttl/opa/41_check-authorization.py @@ -0,0 +1,166 @@ +import logging +import requests +import sys + + +logging.basicConfig( + level="DEBUG", format="%(asctime)s %(levelname)s: %(message)s", stream=sys.stdout +) + +# Jane Doe has access to specific resources. +user_jane_doe = { + "first_name": "Jane", + "last_name": "Doe", + "username": "jane.doe", + "email": "jane.doe@stackable.tech", + "roles": [{"name": "User"}], + "password": "T8mn72D9", +} +# Richard Roe has no access. +user_richard_roe = { + "first_name": "Richard", + "last_name": "Roe", + "username": "richard.roe", + "email": "richard.roe@stackable.tech", + "roles": [{"name": "User"}], + "password": "NvfpU518", +} + + +def create_user(user): + requests.post( + "http://airflow-webserver:8080/auth/fab/v1/users", + auth=("airflow", "airflow"), + json=user, + ) + + +def check_api_authorization_for_user( + user, expected_status_code, method, endpoint, data=None, api="api/v1" +): + api_url = f"http://airflow-webserver:8080/{api}" + + auth = (user["username"], user["password"]) + response = requests.request(method, f"{api_url}/{endpoint}", auth=auth, json=data) + assert response.status_code == expected_status_code + + +def check_api_authorization(method, endpoint, data=None, api="api/v1"): + check_api_authorization_for_user( + user_jane_doe, 200, method=method, endpoint=endpoint, data=data, api=api + ) + check_api_authorization_for_user( + user_richard_roe, 403, method=method, endpoint=endpoint, data=data, api=api + ) + + +def check_website_authorization_for_user(user, expected_status_code): + username = user["username"] + password = user["password"] + with requests.Session() as session: + login_response = session.post( + "http://airflow-webserver:8080/login/", + data=f"username={username}&password={password}", + allow_redirects=False, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert login_response.ok, f"Login for {username} failed" + home_response = session.get( + "http://airflow-webserver:8080/home", allow_redirects=False + ) + assert ( + home_response.status_code == expected_status_code + ), f"GET /home returned status code {home_response.status_code}, but {expected_status_code} was expected." + + +def test_is_authorized_configuration(): + # section == null + check_api_authorization("GET", "config") + # section != null + check_api_authorization("GET", "config?section=core") + + +def test_is_authorized_connection(): + # conn_id == null + check_api_authorization("GET", "connections") + # conn_id != null + check_api_authorization("GET", "connections/postgres_default") + + +def test_is_authorized_dag(): + # access_entity == null and id == null + # There is no API endpoint to test this case. + + # access_entity == null and id != null + check_api_authorization("GET", "dags/example_trigger_target_dag") + + # access_entity != null and id == null + # Check "GET /dags/~/dagRuns" because access to "GET /dags" is always allowed + check_api_authorization("GET", "dags/~/dagRuns") + + # access_entity != null and id != null + check_api_authorization("GET", "dags/example_trigger_target_dag/dagRuns") + + +def test_is_authorized_dataset(): + # uri == null + check_api_authorization("GET", "datasets") + # uri != null + check_api_authorization("GET", "datasets/s3%3A%2F%2Fbucket%2Fmy-task") + + +def test_is_authorized_pool(): + # name == null + check_api_authorization("GET", "pools") + # name != null + check_api_authorization("GET", "pools/default_pool") + + +def test_is_authorized_variable(): + # key != null + check_api_authorization("POST", "variables", data={"key": "myVar", "value": "1"}) + # key == null + check_api_authorization("GET", "variables/myVar") + + +def test_is_authorized_view(): + check_website_authorization_for_user(user_jane_doe, 200) + check_website_authorization_for_user(user_richard_roe, 403) + + +def test_is_authorized_custom_view(): + user_jane_doe_patched = user_jane_doe.copy() + user_jane_doe_patched["email"] = "jane@stackable.tech" + check_api_authorization_for_user( + user_jane_doe, + 200, + "PATCH", + "users/jane.doe?update_mask=email", + data=user_jane_doe_patched, + api="/auth/fab/v1", + ) + + user_richard_roe_patched = user_richard_roe.copy() + user_richard_roe_patched["email"] = "richard@stackable.tech" + check_api_authorization_for_user( + user_richard_roe, + 403, + "PATCH", + "users/richard.roe?update_mask=email", + data=user_richard_roe_patched, + api="/auth/fab/v1", + ) + + +# Create test users +create_user(user_jane_doe) +create_user(user_richard_roe) + +test_is_authorized_configuration() +test_is_authorized_connection() +test_is_authorized_dag() +test_is_authorized_dataset() +test_is_authorized_pool() +test_is_authorized_variable() +test_is_authorized_view() +test_is_authorized_custom_view() diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 6c987771..ab85b16c 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -17,6 +17,9 @@ dimensions: - 2.10.2 # To use a custom image, add a comma and the full name after the product version # - 2.8.1,oci.stackable.tech/sandbox/airflow:2.8.1-stackable0.0.0-dev + - name: opa-latest + values: + - 0.67.1 - name: ldap-authentication values: - no-tls @@ -55,6 +58,11 @@ tests: dimensions: - airflow - openshift + - name: opa + dimensions: + - airflow + - opa-latest + - openshift - name: resources dimensions: - airflow-latest