diff --git a/Jenkinsfile b/Jenkinsfile
index dea025c..eca84e1 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -11,7 +11,7 @@ pipeline {
stage('Build and Publish') {
environment {
SERVICE_NAME = "kubling-docs"
- VERSION = "24.5.1"
+ VERSION = "24.5.2"
DOCKERHUB_CREDS = credentials('kubling-dockerhub')
}
steps {
diff --git a/component/Tag.js b/component/Tag.js
new file mode 100644
index 0000000..5122770
--- /dev/null
+++ b/component/Tag.js
@@ -0,0 +1,11 @@
+function Tag({ description, bgColor, txtColor, txtFont }) {
+
+ return (
+
+ {description}
+
+ );
+
+}
+export default Tag;
\ No newline at end of file
diff --git a/pages/CLI/overview.mdx b/pages/CLI/overview.mdx
index 7fe297f..55ffbb2 100644
--- a/pages/CLI/overview.mdx
+++ b/pages/CLI/overview.mdx
@@ -7,7 +7,7 @@ The main goal of the CLI is to facilitate common tasks required when configuring
It is designed to be easily embedded into pipelines, as well as into developers' environments.
### A note on its size
-Although Kubling is not a heavy application (its OCI Image is \<120MB), and its Community Edition (CE) version includes nearly all the features needed during
+Although Kubling is not a heavy application (its OCI Image is \~125MB), and its Community Edition (CE) version includes nearly all the features needed during
development—making it a perfect fit for local development environments—certain tasks like testing can be challenging and time-consuming
if every small change to a Script Module requires restarting a test container.
diff --git a/pages/Engine/_meta.json b/pages/Engine/_meta.json
index c0fdeae..c2afcda 100644
--- a/pages/Engine/_meta.json
+++ b/pages/Engine/_meta.json
@@ -5,6 +5,7 @@
"queries": "Queries",
"script-context-members": "",
"data-sources": "Data Sources",
+ "aggregators": "Aggregators",
"endpoints": "",
"properties": "",
"transactions": ""
diff --git a/pages/Engine/aggregators.mdx b/pages/Engine/aggregators.mdx
new file mode 100644
index 0000000..bbcf659
--- /dev/null
+++ b/pages/Engine/aggregators.mdx
@@ -0,0 +1,62 @@
+# Aggregators
+
+The **aggregator** is a special `schema` type that enables automatic view creation based on other data sources.
+This feature simplifies querying across multiple sources by aggregating data into views, which are easier to manage.
+
+Consider a scenario where we have several Kubernetes clusters, each defined as a data source within our VDB (Virtual Database).
+If we want to know, for example, which cluster a specific deployment is running on, one approach is to write a query that joins tables from all relevant schemas.
+However, this process becomes increasingly complex when managing dozens of clusters.
+
+Relational databases address such scenarios with `VIEWs`, and Kubling follows the same pattern. A `VIEW` is a virtual table constructed from the result of a query,
+which can be defined in DDL. But in our Kubernetes example, where each cluster has its own schema, it’s unclear where to define a `VIEW` that spans all clusters.
+
+This is where **aggregators** come in. They automatically generate views from tables across multiple `schemas`, streamlining multi-cluster data queries.
+
+Example:
+```yaml
+aggregatorSchemaModels:
+ - name: "k8s"
+ schemas:
+ - "k8s_1"
+ - "k8s_2"
+ tables:
+ - name: "DEPLOYMENT"
+ options:
+ updatable: false
+ - name: "NAMESPACE"
+ options:
+ updatable: false
+ - name: "DEPLOYMENT_CONDITIONS"
+ options:
+ updatable: false
+ - name: "DEPLOYMENT_CONTAINER"
+ options:
+ updatable: false
+ - name: "DEPLOYMENT_CONTAINER_VOLS"
+ options:
+ updatable: false
+```
+
+This configuration creates a new `SCHEMA` called `k8s` in the `VDB`.
+The tables in this schema, such as `DEPLOYMENT` and `NAMESPACE`, are actually `VIEWs` that aggregate data from the `k8s_1` and `k8s_2` schemas using a `UNION ALL`.
+
+## Primary Keys
+When using aggregators, be cautious when selecting the primary key for your `SCHEMA` definitions, as key duplication may occur.
+
+If a data source does not provide a globally unique key, you should use the Kubling `val_pk` directive while considering the aggregation implications.
+
+For instance, in Kubernetes, if you have only one cluster, it may not be necessary to create primary keys that concatenate the cluster name.
+However, if you plan to add more clusters in the future, you should construct the primary key to include a cluster identifier, as shown below:
+```sql
+...
+identifier string OPTIONS(val_pk 'clusterName+metadata__namespace+metadata__name+name'),
+PRIMARY KEY(identifier),
+...
+```
+
+## Errors or empty results
+Since `VIEWS` in the aggregator context rely on `UNION ALL` (which includes duplicates) to merge results,
+the query planner (DQP) evaluates costs differently compared to standard queries.
+In the case of Kubernetes data sources, this behavior might be influenced by the `blankNamespaceStrategy` configuration.
+
+For more information on `blankNamespaceStrategy`, check out the detailed explanation [here](/Engine/data-sources#the-importance-of-blank-namespace-strategy-).
diff --git a/pages/Engine/data-sources.mdx b/pages/Engine/data-sources.mdx
index f18e01a..4998890 100644
--- a/pages/Engine/data-sources.mdx
+++ b/pages/Engine/data-sources.mdx
@@ -1,4 +1,5 @@
import { Callout, Steps } from 'nextra/components'
+import Tag from '../../component/Tag'
# Data Sources
@@ -113,6 +114,41 @@ Sample config in VDB file:
[Check here](/schemas#kubernetes-data-source-configuration) the Kubernetes Data Source configuration spec.
+### The importance of blank namespace strategy
+
+In Kubernetes, resources can be divided into two main groups: namespaced and non-namespaced resources. For the first group, it is mandatory to place the resource within a namespace, whereas the second group consists of cluster-wide or node-wide resources.
+
+This type of design introduces certain challenges to Kubling's DQP. The first challenge is how to behave when querying namespaced resources without specifying a namespace.
+
+For example, the following query:
+```sql
+SELECT * from DEPLOYMENT
+```
+
+ Could mean two things to the user:
+1. **Return all cluster deployments** or...
+2. **Return deployments in the default namespace**, because if I needed specific namespaces or all, I would explicitly specify that in the query.
+
+The second challenge is how to weigh queries (often referred to as node cardinality) when choosing option 2, mentioned above.
+The query planner is designed in a way that makes decisions based on internal statistics, which may sometimes yield unpredictable results when APIs are design as Kubernetes'.
+
+Let's use the following query as an example:
+```sql
+SELECT * from DEPLOYMENT dp
+JOIN NAMESPACE AS ns ON dp.metadata__namespace = ns.metadata__name
+```
+
+In certain circumstances, the DQP may decide, based on internal statistics (or the lack thereof), that fetching all deployments and all namespaces in parallel,
+then evaluating them in memory to return only those that matches criteria `dp.metadata__namespace = ns.metadata__name`, even rewriting the criteria as `IN`, instead of equals,
+is cheaper than fetching all namespaces and then iterating and fetching deployments one by one.
+
+If this happens and option 2 is selected, the result will likely include deployments from the default namespace, which is not what the query is clearly trying to retrieve.
+
+As a conclusion, if you're planning to use `JOINS`, and to avoid unpredictable results, we suggest opting for **option 1**, which can be configured via `blankNamespaceStrategy` in
+the Kubernetes Data Source configuration.
+
+### DDL
+
Kubling comes with a built-in, statically compiled module for Kubernetes that uses, by default, a DDL files with all the supported tables.
Please note that it has only a few synthetic `TABLES`, therefore, in case you need a much more specialized or even reduced version, please use the
following as a starting point.
@@ -169,8 +205,8 @@ CREATE FOREIGN TABLE NODE_CONDITIONS
metadata__name string NOT NULL OPTIONS(synthetic_type 'parent'),
metadata__namespace string OPTIONS(synthetic_type 'parent'),
- lastTransitionTime string,
- lastUpdateTime string,
+ lastTransitionTime timestamp,
+ lastUpdateTime timestamp,
message string,
reason string,
status string,
@@ -272,6 +308,7 @@ CREATE FOREIGN TABLE DEPLOYMENT
schema string OPTIONS(val_constant '{{ schema.name }}'),
metadata__name string,
metadata__namespace string,
+ metadata__uid string,
metadata__labels json OPTIONS(parser_format 'asJsonPretty'),
spec__template__spec__containers json OPTIONS(parser_format 'asJsonPretty'),
spec__selector__matchLabels json OPTIONS(parser_format 'asJsonPretty'),
@@ -295,9 +332,10 @@ CREATE FOREIGN TABLE DEPLOYMENT_CONDITIONS
schema string OPTIONS(val_constant '{{ schema.name }}'),
metadata__name string NOT NULL OPTIONS(synthetic_type 'parent'),
metadata__namespace string OPTIONS(synthetic_type 'parent'),
+ metadata__uid string OPTIONS(synthetic_type 'parent'),
- lastTransitionTime string,
- lastUpdateTime string,
+ lastTransitionTime timestamp,
+ lastUpdateTime timestamp,
message string,
reason string,
status string,
@@ -315,6 +353,7 @@ CREATE FOREIGN TABLE DEPLOYMENT_CONTAINER
metadata__name string NOT NULL OPTIONS(synthetic_type 'parent'),
metadata__namespace string OPTIONS(synthetic_type 'parent'),
metadata__labels string OPTIONS(updatable false, synthetic_type 'parent'),
+ metadata__uid string OPTIONS(synthetic_type 'parent'),
image string NOT NULL,
name string NOT NULL,
@@ -337,6 +376,7 @@ CREATE FOREIGN TABLE DEPLOYMENT_CONTAINER_VOLS
metadata__name string NOT NULL OPTIONS(synthetic_type 'parent'),
metadata__namespace string OPTIONS(synthetic_type 'parent'),
metadata__labels string OPTIONS(synthetic_type 'parent'),
+ metadata__uid string OPTIONS(synthetic_type 'parent'),
containerName string OPTIONS(synthetic_type 'parent_array_key', synthetic_parent_field 'name'),
containerImage string OPTIONS(synthetic_type 'parent_array_key', synthetic_parent_field 'image'),
@@ -375,7 +415,7 @@ CREATE FOREIGN TABLE PERSISTENT_VOLUME
spec__volumeMode string,
spec__additionalProperties json OPTIONS(parser_format 'asJsonPretty'),
- status__lastPhaseTransitionTime string,
+ status__lastPhaseTransitionTime timestamp,
status__message string,
status__phase string,
status__reason string,
@@ -720,10 +760,10 @@ CREATE FOREIGN TABLE EVENT
action string,
deprecatedCount integer,
- deprecatedFirstTimestamp string,
- deprecatedLastTimestamp string,
+ deprecatedFirstTimestamp timestamp,
+ deprecatedLastTimestamp timestamp,
deprecatedSource json OPTIONS(parser_format 'asJsonPretty'),
- eventTime json OPTIONS(parser_format 'asJsonPretty'),
+ eventTime timestamp,
note string,
reason string,
regarding json OPTIONS(parser_format 'asJsonPretty'),
@@ -840,7 +880,7 @@ CREATE FOREIGN TABLE HORIZONTAL_POD_AUTOSCALER
status__currentMetrics json OPTIONS(parser_format 'asJsonPretty'),
status__currentReplicas integer,
status__desiredReplicas integer,
- status__lastScaleTime string,
+ status__lastScaleTime timestamp,
status__observedGeneration long,
identifier string NOT NULL OPTIONS(val_pk 'clusterName+metadata__namespace+metadata__name' ),
@@ -991,12 +1031,12 @@ CREATE FOREIGN TABLE JOB
status__active integer,
status__completedIndexes string,
- status__completionTime string,
+ status__completionTime timestamp,
status__conditions json OPTIONS(parser_format 'asJsonPretty'),
status__failed integer,
status__failedIndexes string,
status__ready integer,
- status__startTime string,
+ status__startTime timestamp,
status__succeeded integer,
status__terminating integer,
status__uncountedTerminatedPods json OPTIONS(parser_format 'asJsonPretty'),
@@ -1028,8 +1068,8 @@ CREATE FOREIGN TABLE CRON_JOB
spec__timeZone string,
status__active json OPTIONS(parser_format 'asJsonPretty'),
- status__lastScheduleTime string,
- status__lastSuccessfulTime string,
+ status__lastScheduleTime timestamp,
+ status__lastSuccessfulTime timestamp,
identifier string NOT NULL OPTIONS(val_pk 'clusterName+metadata__namespace+metadata__name' ),
PRIMARY KEY(identifier),
@@ -1241,11 +1281,11 @@ CREATE FOREIGN TABLE LEASE
metadata__namespace string,
metadata__labels json OPTIONS(parser_format 'asJsonPretty'),
- spec__acquireTime string,
+ spec__acquireTime timestamp,
spec__holderIdentity string,
spec__leaseDurationSeconds integer,
spec__leaseTransitions integer,
- spec__renewTime string,
+ spec__renewTime timestamp,
identifier string NOT NULL OPTIONS(val_pk 'clusterName+metadata__namespace+metadata__name' ),
PRIMARY KEY(identifier),
@@ -1283,7 +1323,6 @@ OPTIONS(updatable true,
supports_idempotency false,
tags 'kubernetes;{{ schema.properties.cluster_name }};crd;customresourcedefinition');
```
-
## Script Document Data Source (`SCRIPT_DOCUMENT_JS`)
diff --git a/pages/Engine/queries.mdx b/pages/Engine/queries.mdx
index 634ed60..f7c78ab 100644
--- a/pages/Engine/queries.mdx
+++ b/pages/Engine/queries.mdx
@@ -6,7 +6,6 @@ import { Callout, Steps } from 'nextra/components'
Kubling provides most of the functionality of SQL-92 DML.
The idea of this document is not to cover SQL exhaustively, but rather highlights how SQL is used in Kubling.
-For details about the exact form of SQL that Kubling accepts, [see grammar.](/Engine/grammar)
## Identifiers
diff --git a/pages/Observability/metrics.mdx b/pages/Observability/metrics.mdx
index e3b0c17..70079e1 100644
--- a/pages/Observability/metrics.mdx
+++ b/pages/Observability/metrics.mdx
@@ -1,4 +1,5 @@
import { Callout } from 'nextra/components'
+import Tag from '../../component/Tag'
# Metrics
@@ -152,46 +153,46 @@ Provides information about currently **active** HTTP requests.
## Engine related metrics
These metrics come with without any tags by default. You can specify yours in the `metricsCommonTags` attribute of the [Main application configuration](/schemas#main-application-configuration).
-### `kubling.engine.sql.sessions.active` Gauge
+### `kubling.engine.sql.sessions.active`
Provides the number of current active SQL Sessions (Native & PG transports).
-### `kubling.engine.sql.plan.active` Gauge
+### `kubling.engine.sql.plan.active`
Number of current active SQL atomic plans. Atomic plans are effective work items within the DQP, that is, once the DQP identified the Data Sources involved in the query.
-### `kubling.engine.sql.plan.enqueued.current` Gauge
+### `kubling.engine.sql.plan.enqueued.current`
Number of current Jobs (queries sent by any valid session) waiting for a free thread to process the plan.
-### `kubling.engine.sql.plan.enqueued_time.max` Gauge
+### `kubling.engine.sql.plan.enqueued_time.max`
The maximum time a plan spent waiting in the queue for a free thread during the current execution.
-### `kubling.engine.sql.threads.active` Gauge
+### `kubling.engine.sql.threads.active`
Number of current active SQL related threads, like DQP's plan processing. Take into account that one query may trigger multiple threads, one per atomic plan.
-### `kubling.engine.sql.jobs.total` Gauge
+### `kubling.engine.sql.jobs.total`
Number of total submitted SQL Jobs.
-### `kubling.engine.sql.jobs.completed` Gauge
+### `kubling.engine.sql.jobs.completed`
Number of total completed SQL Jobs.
-### `kubling.engine.sql.jobs.enqueued` Gauge
+### `kubling.engine.sql.jobs.enqueued`
Number of current enqueued SQL Jobs.
-### `kubling.engine.sql.jobs.max` Gauge
+### `kubling.engine.sql.jobs.max`
Maximum number of SQL jobs running in parallel during the current execution.
-### `kubling.engine.sql.jobs.enqueued.max` Gauge
+### `kubling.engine.sql.jobs.enqueued.max`
Maximum number of SQL jobs enqueued during the current execution.
-### `kubling.js.threads.active` Gauge
+### `kubling.js.threads.active`
Provides the number of current active JavaScript context threads.
### `kubling.js.executions` Counter
Provides the number of total JavaScript context threads claimed during current execution.
-### `kubling.js.auth.thread.active` Gauge
+### `kubling.js.auth.thread.active`
Provides the number of current active JavaScript Auth* context threads. Same measurement as `kubling.js.threads.active`.
-### `kubling.js.auth.executions` Counter
+### `kubling.js.auth.executions`
Provides the number of total JavaScript Auth* context threads claimed during current execution. Same measurement as `kubling.js.executions`.
## Prometheus
diff --git a/pages/_meta.json b/pages/_meta.json
index 941a03b..45fec72 100644
--- a/pages/_meta.json
+++ b/pages/_meta.json
@@ -10,6 +10,7 @@
"Observability": "",
"CLI": "",
"schemas": "Configuration files schemas",
+ "changelog": "Product Changelog",
"roadmap": "Product Roadmap",
"contact": {
"title": "Contact ↗",
diff --git a/pages/changelog.mdx b/pages/changelog.mdx
new file mode 100644
index 0000000..3a4a105
--- /dev/null
+++ b/pages/changelog.mdx
@@ -0,0 +1,7 @@
+# Product changelog
+
+## v24.5.2
+- Added `blankNamespaceStrategy` option to the Kubernetes Data Source Configuration.
+- Added a new aggregator schema button in the console, under Virtual Databases.
+- Fixed the JavaScript filesystem cache, which in some circumstances resulted in a false cache miss when the resource had been visited before, especially during `import`.
+- Added `metadata__uid` in all deployment related `TABLE`, in the built-in Kubernetes DDL
\ No newline at end of file
diff --git a/pages/roadmap.mdx b/pages/roadmap.mdx
index b27f5e9..5e4425a 100644
--- a/pages/roadmap.mdx
+++ b/pages/roadmap.mdx
@@ -1,3 +1,5 @@
+import Tag from '../component/Tag'
+
# Product roadmap
Adopting a new technology, especially foundational software like a database engine—even a virtual one like Kubling—is a long process that involves some degree of uncertainty.
@@ -6,28 +8,33 @@ This is why we’re sharing our high-level roadmap with you.
Please keep in mind that we only list features here that might directly impact you, and not all of them will necessarily be implemented.
-## Support all DDL options in non-document data sources AnalysisHigh Prio
+## Create a new GraphQL Data Source
+We are currently developing a new Data Source for GraphQL, which is expected to make a significant impact.
+Many of the existing Script modules that do not require manual interception will likely be transitioned to GraphQL.
+Since the new Data Source will be statically compiled, it will offer performance comparable to that of the Kubernetes Data Source, ensuring both efficiency and scalability.
+
+## Support all DDL options in non-document data sources
When we initially implemented support for traditional RDBMS and other NoSQL databases, we focused on a straightforward, 1:1 schema import.
However, we've recently identified use cases where users need to generate new schemas based on existing data source schemas, incorporating new fields and transformations.
We plan to develop a new Query Processor's interceptor meta-adapter, transparent to the user, to enhance DDL capabilities across all non-document data sources.
-## Support Couchbase Data Source Implemented
+## Support Couchbase Data Source
We will support Couchbase, possibly with some limitations, in the next release.
-## Support Cassandra Data Source Implemented
+## Support Cassandra Data Source
We will support Cassandra, possibly with some limitations, in the next release.
-## File Logs High Prio
+## File Logs
We plan to support saving logs in files, with the ability to define some rolling strategy.
-## Log Routers Analysis
+## Log Routers
We are analyzing whether to support routing logs, based on a predefined criteria, to custom script functions.
-## Cache AnalysisHigh Prio
+## Cache
We are analyzing how to implement configurable local cache strategy in data sources, with performant invalidation mechanism.
-## Transaction support on `SCRIPT_DOCUMENT_JS` and `KUBERNETES` Data Sources Analysis
+## Transaction support on `SCRIPT_DOCUMENT_JS` and `KUBERNETES` Data Sources
We are analyzing whether it is possible to implement transactions without breaking these DS principles.
## SQL functions defined in modules
@@ -37,9 +44,9 @@ in [Script Bundle Modules](/Modules/DataSource/Script).
## Smaller `kdv` CLI
We'll release a smaller version of the CLI without the `test` command, which requires lots of libraries to be added to the utility.
-## Load DDL from modules in Integration tests Analysis
+## Load DDL from modules in Integration tests
Loading DDL files defined inside modules is not yet supported. We are currently analyzing whether the integration tests should read the internal module metadata,
or if the `ddlFilePath` property should support schemes like `module:` and/or `bundle:`.
-## Upgrade Hibernate dialect to version 7 Low Prio
+## Upgrade Hibernate dialect to version 7
We currently support up to version `6.1.7`, and we are planning to jump directly to version `7`.
diff --git a/pages/schemas.mdx b/pages/schemas.mdx
index 7b5393e..f42cc9d 100644
--- a/pages/schemas.mdx
+++ b/pages/schemas.mdx
@@ -758,7 +758,7 @@ Applies to the following Data Source types:
```yaml
type: "object"
-id: "schema:kubling:model:vdb:sources:KubernetesSourceConfig"
+id: "schema:kubling:dbvirt:model:vdb:sources:KubernetesSourceConfig"
properties:
contextVariables:
type: "object"
@@ -772,6 +772,15 @@ properties:
masterUrl:
type: "string"
description: "Kubernetes cluster API URL. Only used when configFile is not present."
+ blankNamespaceStrategy:
+ type: "string"
+ description: "Sets how the module will behave in case of the query received does\
+ \ not specify a namespace.If none specified, the absence of a namespace in the\
+ \ query will fall to the 'default' namespace, as the kubectl does."
+ enum:
+ - "DEFAULT"
+ - "ALL"
+ - "FAIL"
```
## Script Document Data Source Configuration
diff --git a/public/img/favicon.png b/public/img/favicon.png
new file mode 100644
index 0000000..f80c502
Binary files /dev/null and b/public/img/favicon.png differ
diff --git a/theme.config.tsx b/theme.config.tsx
index 117373a..fbdae7d 100644
--- a/theme.config.tsx
+++ b/theme.config.tsx
@@ -4,6 +4,11 @@ import Image from "next/image";
import { useRouter } from 'next/router';
const config: DocsThemeConfig = {
+ head: (
+ <>
+
+ >
+ ),
logo: (