diff --git a/.semaphore/push-images/goldmane.yml b/.semaphore/push-images/goldmane.yml new file mode 100644 index 00000000000..9d358bff4eb --- /dev/null +++ b/.semaphore/push-images/goldmane.yml @@ -0,0 +1,47 @@ +version: v1.0 +name: Publish goldmane images +agent: + machine: + type: f1-standard-2 + os_image: ubuntu2204 + +execution_time_limit: + minutes: 60 + +global_job_config: + env_vars: + - name: DEV_REGISTRIES + value: quay.io/calico docker.io/calico + secrets: + - name: docker + - name: quay-robot-calico+semaphoreci + prologue: + commands: + - checkout + # Semaphore is doing shallow clone on a commit without tags. + # unshallow it for GIT_VERSION:=$(shell git describe --tags --dirty --always) + - retry git fetch --unshallow + - echo $DOCKER_TOKEN | docker login --username "$DOCKER_USER" --password-stdin + - echo $QUAY_TOKEN | docker login --username "$QUAY_USER" --password-stdin quay.io + - export BRANCH_NAME=$SEMAPHORE_GIT_BRANCH + +blocks: + - name: Publish goldmane images + dependencies: [] + skip: + when: "branch !~ '.+'" + task: + jobs: + - name: Linux multi-arch + commands: + - if [ -z "${SEMAPHORE_GIT_PR_NUMBER}" ]; then make -C goldmane cd CONFIRM=true; fi + - name: Publish goldmane multi-arch manifests + dependencies: + - Publish goldmane images + skip: + when: "branch !~ '.+'" + task: + jobs: + - name: Linux multi-arch manifests + commands: + - if [ -z "${SEMAPHORE_GIT_PR_NUMBER}" ]; then make -C goldmane push-manifests-with-tag CONFIRM=true; fi diff --git a/.semaphore/semaphore-scheduled-builds.yml b/.semaphore/semaphore-scheduled-builds.yml index 4b30c98aff3..7c0cb65caeb 100644 --- a/.semaphore/semaphore-scheduled-builds.yml +++ b/.semaphore/semaphore-scheduled-builds.yml @@ -82,6 +82,10 @@ promotions: pipeline_file: push-images/typha.yml auto_promote: when: "branch =~ 'master|release-'" + - name: Push Goldmane images + pipeline_file: push-images/goldmane.yml + auto_promote: + when: "branch =~ 'master|release-'" - name: Publish openstack packages pipeline_file: push-images/packaging.yaml auto_promote: @@ -529,6 +533,30 @@ blocks: - ./.semaphore/clean-up-vms ${VM_PREFIX} secrets: - name: google-service-account-for-gce +- name: goldmane + run: + when: "true or change_in(['/*', '/goldmane/'], {exclude: ['/**/.gitignore', '/**/README.md', '/**/LICENSE']})" + execution_time_limit: + minutes: 30 + dependencies: + - Prerequisites + task: + prologue: + commands: + - cd goldmane + jobs: + - name: make ci + commands: + - ../.semaphore/run-and-monitor make-ci.log make ci + - name: Build binary + matrix: + - env_var: ARCH + values: + - arm64 + - ppc64le + - s390x + commands: + - ../.semaphore/run-and-monitor image-$ARCH.log make build ARCH=$ARCH - name: kube-controllers run: when: "true or change_in(['/*', '/api/', '/libcalico-go/', '/kube-controllers/', '/hack/test/certs/'], {exclude: ['/**/.gitignore', '/**/README.md', '/**/LICENSE']})" diff --git a/.semaphore/semaphore.yml b/.semaphore/semaphore.yml index f39943cad03..f64b6900108 100644 --- a/.semaphore/semaphore.yml +++ b/.semaphore/semaphore.yml @@ -82,6 +82,10 @@ promotions: pipeline_file: push-images/typha.yml auto_promote: when: "branch =~ 'master|release-'" + - name: Push Goldmane images + pipeline_file: push-images/goldmane.yml + auto_promote: + when: "branch =~ 'master|release-'" - name: Publish openstack packages pipeline_file: push-images/packaging.yaml auto_promote: @@ -529,6 +533,30 @@ blocks: - ./.semaphore/clean-up-vms ${VM_PREFIX} secrets: - name: google-service-account-for-gce +- name: goldmane + run: + when: "false or change_in(['/*', '/goldmane/'], {exclude: ['/**/.gitignore', '/**/README.md', '/**/LICENSE']})" + execution_time_limit: + minutes: 30 + dependencies: + - Prerequisites + task: + prologue: + commands: + - cd goldmane + jobs: + - name: make ci + commands: + - ../.semaphore/run-and-monitor make-ci.log make ci + - name: Build binary + matrix: + - env_var: ARCH + values: + - arm64 + - ppc64le + - s390x + commands: + - ../.semaphore/run-and-monitor image-$ARCH.log make build ARCH=$ARCH - name: kube-controllers run: when: "false or change_in(['/*', '/api/', '/libcalico-go/', '/kube-controllers/', '/hack/test/certs/'], {exclude: ['/**/.gitignore', '/**/README.md', '/**/LICENSE']})" diff --git a/.semaphore/semaphore.yml.d/03-promotions.yml b/.semaphore/semaphore.yml.d/03-promotions.yml index d5f3343dbea..dc902d1aeaa 100644 --- a/.semaphore/semaphore.yml.d/03-promotions.yml +++ b/.semaphore/semaphore.yml.d/03-promotions.yml @@ -51,6 +51,10 @@ promotions: pipeline_file: push-images/typha.yml auto_promote: when: "branch =~ 'master|release-'" + - name: Push Goldmane images + pipeline_file: push-images/goldmane.yml + auto_promote: + when: "branch =~ 'master|release-'" - name: Publish openstack packages pipeline_file: push-images/packaging.yaml auto_promote: diff --git a/.semaphore/semaphore.yml.d/blocks/20-goldmane.yml b/.semaphore/semaphore.yml.d/blocks/20-goldmane.yml new file mode 100644 index 00000000000..bf43a888e24 --- /dev/null +++ b/.semaphore/semaphore.yml.d/blocks/20-goldmane.yml @@ -0,0 +1,24 @@ +- name: goldmane + run: + when: "${FORCE_RUN} or change_in(['/*', '/goldmane/'], {exclude: ['/**/.gitignore', '/**/README.md', '/**/LICENSE']})" + execution_time_limit: + minutes: 30 + dependencies: + - Prerequisites + task: + prologue: + commands: + - cd goldmane + jobs: + - name: make ci + commands: + - ../.semaphore/run-and-monitor make-ci.log make ci + - name: Build binary + matrix: + - env_var: ARCH + values: + - arm64 + - ppc64le + - s390x + commands: + - ../.semaphore/run-and-monitor image-$ARCH.log make build ARCH=$ARCH diff --git a/goldmane/Makefile b/goldmane/Makefile new file mode 100644 index 00000000000..734c869d712 --- /dev/null +++ b/goldmane/Makefile @@ -0,0 +1,80 @@ +include ../metadata.mk + +PACKAGE_NAME = github.com/projectcalico/calico/goldmane +IMAGE_BUILD_MARKER = goldmane_container-$(ARCH).created + +############################################################################### +# include ../lib.Makefile +# Additions to EXTRA_DOCKER_ARGS need to happen before the include since +# that variable is evaluated when we declare DOCKER_RUN and siblings. +############################################################################### +include ../lib.Makefile + +# Configure variables used by ci/cd common targets from lib.Makefile. +BUILD_IMAGES=goldmane + +.PHONY: image build +image: $(IMAGE_BUILD_MARKER) +build: bin/goldmane-$(ARCH) +clean: + rm -rf bin + rm -f $(IMAGE_BUILD_MARKER) + +image-all: $(addprefix sub-image-,$(VALIDARCHES)) +sub-image-%: + $(MAKE) image ARCH=$* + +# Build goldmane image. +calico/goldmane: $(IMAGE_BUILD_MARKER) +$(IMAGE_BUILD_MARKER): bin/goldmane-$(ARCH) + $(DOCKER_BUILD) --build-arg TARGETARCH=$(ARCH) -t goldmane:latest-$(ARCH) -f docker/Dockerfile . + $(MAKE) retag-build-images-with-registries BUILD_IMAGES=$(BUILD_IMAGES) VALIDARCHES=$(ARCH) IMAGETAG=latest + touch $@ + +bin/goldmane-$(ARCH): $(shell find . -name '*.go') + $(call build_binary, $(PACKAGE_NAME)/cmd/, $@) + +# Build the testserver image. +.PHONY: testserver +calico/flowgen: bin/flowgen + docker build -t calico/flowgen -f docker/flowgen/Dockerfile . + +bin/flowgen: $(shell find . -name '*.go') + $(call build_binary, $(PACKAGE_NAME)/cmd/flowgen, $@) + +# Update protobuf generation. +protobuf proto/api.pb.go: proto/api.proto + $(DOCKER_RUN) -v $(CURDIR):/code \ + $(CALICO_BUILD) sh -c 'protoc --proto_path=/code/proto --go_out=/code/proto --go-grpc_out=. --go_opt=paths=source_relative /code/proto/api.proto' + $(MAKE) fix-changed + +############################################################################### +# UTs +############################################################################### +ci: static-checks ut +ut: + $(DOCKER_GO_BUILD) go test ./... -cover -count 1 + +############################################################################### +# Release +############################################################################### +## Deploys images to registry +cd: image-all cd-common + +release-build: .release-$(VERSION).created +.release-$(VERSION).created: + $(MAKE) clean image-all RELEASE=true + $(MAKE) retag-build-images-with-registries RELEASE=true IMAGETAG=$(VERSION) + $(MAKE) retag-build-images-with-registries RELEASE=true IMAGETAG=latest + +release-verify: release-prereqs + @echo "Nothing to do" + +release-publish: release-prereqs release-verify .release-$(VERSION).published +.release-$(VERSION).published: + $(MAKE) push-images-to-registries push-manifests IMAGETAG=$(VERSION) RELEASE=$(RELEASE) CONFIRM=$(CONFIRM) + $(MAKE) FIPS=true push-images-to-registries push-manifests IMAGETAG=$(VERSION)-fips RELEASE=$(RELEASE) CONFIRM=$(CONFIRM) + + # Push Windows images. + $(MAKE) release-windows IMAGETAG=$(VERSION) CONFIRM=$(CONFIRM) + touch $@ diff --git a/goldmane/README.md b/goldmane/README.md new file mode 100644 index 00000000000..a3c73289ec9 --- /dev/null +++ b/goldmane/README.md @@ -0,0 +1,11 @@ +## Goldmane + +Goldmane is a flow aggregation service. It provides a central, aggregated view of network flows in a Kubernetes cluster. + +Some key packages: + +- **proto/** defines the Flow structure and gRPC services provided by Goldmane. +- **pkg/aggregator/** collects flow information from across the cluster and aggregates those flows across all nodes, building a cluster-wide view of network activity. +- **pkg/collector/** provides a gRPC API that allows each Calico node instance to stream network flow information to a central location for aggregation and consumption. +- **pkg/emitter/** periodically emits time-aggregated flow information to a configured endpoint. +- **pkg/server/** allows for filtered querying of aggregated flow information. diff --git a/goldmane/cmd/flowgen/main.go b/goldmane/cmd/flowgen/main.go new file mode 100644 index 00000000000..f6e4e5afdaa --- /dev/null +++ b/goldmane/cmd/flowgen/main.go @@ -0,0 +1,21 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "github.com/projectcalico/calico/goldmane/pkg/flowgen" + +func main() { + flowgen.Start() +} diff --git a/goldmane/cmd/main.go b/goldmane/cmd/main.go new file mode 100644 index 00000000000..856f2668fae --- /dev/null +++ b/goldmane/cmd/main.go @@ -0,0 +1,21 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "github.com/projectcalico/calico/goldmane/pkg/daemon" + +func main() { + daemon.Run() +} diff --git a/goldmane/config/testserver-daemonset.yaml b/goldmane/config/testserver-daemonset.yaml new file mode 100644 index 00000000000..947e70c1db8 --- /dev/null +++ b/goldmane/config/testserver-daemonset.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: flow-log-generator + namespace: default +spec: + selector: + matchLabels: + name: flow-log-generator + template: + metadata: + labels: + name: flow-log-generator + spec: + hostNetwork: true + tolerations: + - operator: Exists + effect: NoSchedule + containers: + - name: generator + image: caseydavenport/flowgen + env: + - name: SERVER + value: "goldmane.calico-system:443" diff --git a/goldmane/docker/Dockerfile b/goldmane/docker/Dockerfile new file mode 100644 index 00000000000..1a703d5076c --- /dev/null +++ b/goldmane/docker/Dockerfile @@ -0,0 +1,6 @@ +FROM scratch + +ARG TARGETARCH +COPY ./bin/goldmane-${TARGETARCH} /goldmane + +CMD ["/goldmane"] diff --git a/goldmane/docker/flowgen/Dockerfile b/goldmane/docker/flowgen/Dockerfile new file mode 100644 index 00000000000..a64d0626cd1 --- /dev/null +++ b/goldmane/docker/flowgen/Dockerfile @@ -0,0 +1,5 @@ +FROM scratch + +COPY ./bin/flowgen /flowgen + +CMD ["/flowgen"] diff --git a/goldmane/pkg/aggregator/aggregator.go b/goldmane/pkg/aggregator/aggregator.go new file mode 100644 index 00000000000..187aee23361 --- /dev/null +++ b/goldmane/pkg/aggregator/aggregator.go @@ -0,0 +1,364 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aggregator + +import ( + "sort" + "time" + + "github.com/sirupsen/logrus" + + "github.com/projectcalico/calico/goldmane/pkg/internal/types" + "github.com/projectcalico/calico/goldmane/proto" +) + +const ( + // numBuckets is the number of buckets to keep in memory. + // We keep 240 buckets. Assuming a default window of 15s each, this + // gives us a total of 1hr of history. + numBuckets = 240 + + // channelDepth is the depth of the channel to use for flow updates. + channelDepth = 5000 +) + +// Sink is an interface that can receive aggregated flows. +type Sink interface { + Receive(*AggregationBucket) +} + +// flowRequest is an internal helper used to synchronously request matching flows from the aggregator. +type flowRequest struct { + respCh chan []*proto.Flow + req *proto.FlowRequest +} + +type LogAggregator struct { + buckets []AggregationBucket + + // aggregationWindow is the size of each aggregation bucket. + aggregationWindow time.Duration + + // Used to trigger goroutine shutdown. + done chan struct{} + + // Used to make requests for flows synchronously. + flowRequests chan flowRequest + + // sink is a sink to send aggregated flows to. + sink Sink + + // recvChan is the channel to receive flow updates on. + recvChan chan *proto.FlowUpdate + + // rolloverFunc allows manual control over the rollover timer, used in tests. + // In production, this will be time.After. + rolloverFunc func(time.Duration) <-chan time.Time + + // bucketsToAggregate is the number of internal buckets to aggregate when pushing flows to the sink. + // This can be used to reduce the number of distinct flows that are sent to the sink, at the expense of + // delaying the emission of flows. + // 20 buckets of 15s provides a 5 minute aggregation. + bucketsToAggregate int + + // pushIndex is the index of the bucket to push to the emitter. We only push + // the bucket after it has been rolled over several times, to ensure that we have + // a complete view of the flows in the bucket. + // + // Increasing this value will increase the latency of the emitted flows. Decreasing it too much + // will cause the emitter to emit incomplete flows. + // + // Latency-to-emit is roughly (pushIndex * rolloverTime). + pushIndex int + + // nowFunc allows overriding the current time, used in tests. + nowFunc func() time.Time +} + +func NewLogAggregator(opts ...Option) *LogAggregator { + // Establish default aggregator configuration. Options can be used to override these. + a := &LogAggregator{ + aggregationWindow: 15 * time.Second, + done: make(chan struct{}), + flowRequests: make(chan flowRequest), + recvChan: make(chan *proto.FlowUpdate, channelDepth), + rolloverFunc: time.After, + bucketsToAggregate: 20, + pushIndex: 30, + nowFunc: time.Now, + } + + // Apply options. + for _, opt := range opts { + opt(a) + } + + // Log out some key information. + if a.sink != nil { + logrus.WithFields(logrus.Fields{ + // This is the soonest we will possible emit a flow as part of an aggregation. + "emissionWindowLeftBound": time.Duration(a.pushIndex-a.bucketsToAggregate) * a.aggregationWindow, + + // This is the latest we will emit a flow as part of an aggregation. + "emissionWindowRightBound": time.Duration(a.pushIndex) * a.aggregationWindow, + + // This is the total time window that we will aggregate over when generating emitted flows. + "emissionWindow": time.Duration(a.bucketsToAggregate) * a.aggregationWindow, + }).Info("Emission of aggregated flows configured") + } + + logrus.WithFields(logrus.Fields{ + // This is the size of each aggregation bucket. + "bucketSize": a.aggregationWindow, + + // This is the total amount of history that we will keep in memory. + "totalHistory": time.Duration(numBuckets) * a.aggregationWindow, + }).Info("Keeping bucketed flow history in memory") + + return a +} + +func (a *LogAggregator) Run(startTime int64) { + // Initialize the buckets. + a.buckets = InitialBuckets(numBuckets, int(a.aggregationWindow.Seconds()), startTime) + + // Schedule the first rollover one aggregation period from now. + rolloverCh := a.rolloverFunc(a.aggregationWindow) + + for { + select { + case upd := <-a.recvChan: + a.handleFlowUpdate(upd) + case <-rolloverCh: + rolloverCh = a.rolloverFunc(a.rollover()) + a.maybeEmitBucket() + case req := <-a.flowRequests: + logrus.Debug("Received flow request") + req.respCh <- a.queryFlows(req.req) + case <-a.done: + logrus.Warn("Aggregator shutting down") + return + } + } +} + +// Receive is used to send a flow update to the aggregator. +func (a *LogAggregator) Receive(f *proto.FlowUpdate) { + timeout := time.After(5 * time.Second) + + select { + case a.recvChan <- f: + case <-timeout: + logrus.Warn("Output channel full, dropping flow") + } +} + +func (a *LogAggregator) maybeEmitBucket() { + if a.sink == nil { + logrus.Debug("No sink configured, skip flow emission") + return + } + + if a.buckets[a.pushIndex].Pushed { + // We've already pushed this bucket, so we can skip it. We'll emit the next flow once + // bucketsToAggregate buckets have been rolled over. + logrus.WithFields(a.buckets[a.pushIndex].Fields()).Debug("Skipping already pushed bucket") + return + } + + // We should emit an aggregated flow of bucketsToAggregate buckets, starting from the pushIndex. + b := NewAggregationBucket( + time.Unix(a.buckets[a.pushIndex].StartTime, 0), + time.Unix(a.buckets[a.pushIndex-a.bucketsToAggregate+1].EndTime, 0), + ) + for i := a.pushIndex; i > a.pushIndex-a.bucketsToAggregate; i-- { + logrus.WithField("idx", i).WithFields(a.buckets[i].Fields()).Debug("Merging bucket") + + // Merge the bucket into the aggregation bucket, and mark it's contents as pushed. + b.merge(&a.buckets[i]) + a.buckets[i].Pushed = true + } + if len(b.Flows) > 0 { + logrus.WithFields(b.Fields()).Debug("Emitting aggregated bucket to receiver") + a.sink.Receive(b) + } +} + +// GetFlows returns a list of flows that match the given request. It uses a channel to +// synchronously request the flows from the aggregator. +func (a *LogAggregator) GetFlows(req *proto.FlowRequest) []*proto.Flow { + respCh := make(chan []*proto.Flow) + defer close(respCh) + a.flowRequests <- flowRequest{respCh, req} + return <-respCh +} + +func (a *LogAggregator) queryFlows(req *proto.FlowRequest) []*proto.Flow { + // Collect all of the flows across all buckets that match the request. We will then + // combine matching flows together, returning an aggregated view across the time range. + flowsByKey := map[types.FlowKey]*types.Flow{} + + for i, bucket := range a.buckets { + // Ignore buckets that fall outside the time range. Once we hit a bucket + // whose end time comes before the start time of the request, we can stop. + if bucket.EndTime <= req.StartTimeGt { + // We've reached a bucket that doesn't fall within the request time range. + // We can stop checking the remaining buckets, and can mark the start time + // of the aggregated flows as the end time of this bucket. + logrus.WithField("index", i).Debug("No need to check remaining buckets") + break + } + + // If this bucket's start time is after the end time of the request, then we can + // skip this bucket and move on to the next one. + if req.StartTimeLt > 0 && bucket.StartTime >= req.StartTimeLt { + logrus.WithField("index", i).Debug("Skipping bucket because it starts after the requested time window") + continue + } + + // Check each flow in the bucket to see if it matches the request. + for key, flow := range bucket.Flows { + if !flowMatches(flow, req) { + logrus.Debug("Skipping flow because it doesn't match the request") + continue + } + + if _, ok := flowsByKey[key]; !ok { + // Initialize the flow if it doesn't exist by making a copy. + cp := *flow + logrus.WithField("idx", i).WithFields(bucket.Fields()).Debug("Adding new flow to results") + + // Set the start and end times of this flow to match the bucket. + // Aggregated flows always align with bucket intervals for consistent rate calculation. + cp.StartTime = bucket.StartTime + cp.EndTime = bucket.EndTime + flowsByKey[key] = &cp + } else { + logrus.WithField("idx", i).WithFields(bucket.Fields()).Debug("Adding flow contribution from bucket to results") + + // Add this bucket's contribution to the flow. + mergeFlowInto(flowsByKey[key], flow) + + // Since this flow was present in a later (chronologically) bucket, we need to update the start time + // of the flow to the start time of this (earlier chronologically) bucket. + flowsByKey[key].StartTime = bucket.StartTime + } + } + } + + // Convert the map to a slice. + flows := []*proto.Flow{} + for _, flow := range flowsByKey { + flows = append(flows, types.FlowToProto(flow)) + } + // Sort the flows by start time, sorting newer flows first. + sort.Slice(flows, func(i, j int) bool { + return flows[i].StartTime > flows[j].StartTime + }) + + // If pagination was requested, apply it now after sorting. + // This is a bit inneficient - we collect more data than we need to return - + // but it's a simple way to implement basic pagination. + if req.PageSize > 0 { + startIdx := (req.PageNumber) * req.PageSize + endIdx := startIdx + req.PageSize + if startIdx >= int64(len(flows)) { + return []*proto.Flow{} + } + if endIdx > int64(len(flows)) { + endIdx = int64(len(flows)) + } + flows = flows[startIdx:endIdx] + logrus.WithFields(logrus.Fields{ + "pageSize": req.PageSize, + "pageNumber": req.PageNumber, + "startIdx": startIdx, + "endIdx": endIdx, + "total": len(flows), + }).Debug("Returning paginated flows") + } + return flows +} + +func (a *LogAggregator) Stop() { + close(a.done) +} + +func (a *LogAggregator) rollover() time.Duration { + currentBucketEnd := a.buckets[0].EndTime + + // Add a new bucket at the start and remove the last bucket. + start := time.Unix(a.buckets[0].EndTime, 0) + end := start.Add(a.aggregationWindow) + a.buckets = append([]AggregationBucket{*NewAggregationBucket(start, end)}, a.buckets[:len(a.buckets)-1]...) + logrus.WithFields(a.buckets[0].Fields()).Debug("Rolled over. New bucket") + + // Determine when we should next rollover. We don't just blindly use the rolloverTime, as this leave us + // susceptible to slowly drifting over time. Instead, we calculate when the next bucket should start and + // calculate the difference between then and now. + // + // The next bucket should start at the end time of the current bucket. + nextBucketStart := time.Unix(currentBucketEnd, 0) + now := a.nowFunc() + + // If the next bucket start time is in the past, we've fallen behind and need to catch up. + // Schedule a rollover immediately. + if nextBucketStart.Before(now) { + logrus.WithFields(logrus.Fields{ + "now": now.Unix(), + "nextBucketStart": nextBucketStart.Unix(), + }).Warn("Falling behind, scheduling immediate rollover") + // We don't actually use 0 time, as it could starve the main routine. Use a small amount of delay. + return 10 * time.Millisecond + } + + // The time until the next rollover is the difference between the next bucket start time and now. + rolloverIn := nextBucketStart.Sub(now) + logrus.WithFields(logrus.Fields{ + "nextBucketStart": nextBucketStart.Unix(), + "now": now.Unix(), + "rolloverIn": rolloverIn, + }).Debug("Scheduling next rollover") + return rolloverIn +} + +func (a *LogAggregator) handleFlowUpdate(upd *proto.FlowUpdate) { + logrus.WithField("update", upd).Debug("Received FlowUpdate") + + // Check if there is a FlowKey entry for this Flow. + i, bucket := a.findBucket(upd.Flow.StartTime) + if bucket == nil { + logrus.WithFields(logrus.Fields{ + "time": upd.Flow.StartTime, + "oldest": a.buckets[len(a.buckets)-1].StartTime, + "newest": a.buckets[0].EndTime, + }).Warn("Failed to find bucket, unable to ingest flow") + return + } + + logrus.WithField("idx", i).WithFields(bucket.Fields()).Debug("Adding flow to bucket") + bucket.AddFlow(types.ProtoToFlow(upd.Flow)) +} + +func (a *LogAggregator) findBucket(time int64) (int, *AggregationBucket) { + // Find the bucket that contains the given time. + for i, b := range a.buckets { + if time >= b.StartTime && time <= b.EndTime { + return i, &b + } + } + logrus.WithField("time", time).Warn("Failed to find bucket") + return 0, nil +} diff --git a/goldmane/pkg/aggregator/aggregator_test.go b/goldmane/pkg/aggregator/aggregator_test.go new file mode 100644 index 00000000000..f5964e761a9 --- /dev/null +++ b/goldmane/pkg/aggregator/aggregator_test.go @@ -0,0 +1,538 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aggregator_test + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + googleproto "google.golang.org/protobuf/proto" + + "github.com/projectcalico/calico/goldmane/pkg/aggregator" + "github.com/projectcalico/calico/goldmane/pkg/internal/types" + "github.com/projectcalico/calico/goldmane/pkg/internal/utils" + "github.com/projectcalico/calico/goldmane/proto" + "github.com/projectcalico/calico/libcalico-go/lib/logutils" +) + +var ( + agg *aggregator.LogAggregator + c *clock +) + +func setupTest(t *testing.T, opts ...aggregator.Option) func() { + // Hook logrus into testing.T + utils.ConfigureLogging("DEBUG") + logCancel := logutils.RedirectLogrusToTestingT(t) + agg = aggregator.NewLogAggregator(opts...) + return func() { + agg.Stop() + agg = nil + c = nil + logCancel() + } +} + +func ExpectFlowsEqual(t *testing.T, expected, actual *proto.Flow) { + if !googleproto.Equal(expected, actual) { + t.Errorf("Expected %v, got %v", expected, actual) + } +} + +func TestIngestFlowLogs(t *testing.T) { + c := newClock(100) + now := c.Now().Unix() + opts := []aggregator.Option{ + aggregator.WithRolloverTime(1 * time.Second), + aggregator.WithNowFunc(c.Now), + } + defer setupTest(t, opts...)() + + // Start the aggregator. + go agg.Run(now) + + // Ingest a flow log. + fl := &proto.Flow{ + Key: &proto.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: now - 15, + EndTime: now, + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + agg.Receive(&proto.FlowUpdate{Flow: fl}) + + // Expect the aggregator to have received it. + var flows []*proto.Flow + require.Eventually(t, func() bool { + flows = agg.GetFlows(&proto.FlowRequest{}) + return len(flows) == 1 + }, 100*time.Millisecond, 10*time.Millisecond, "Didn't receive flow") + + // Expect aggregation to have happened. + exp := proto.Flow{ + Key: &proto.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: flows[0].StartTime, + EndTime: flows[0].EndTime, + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + + ExpectFlowsEqual(t, &exp, flows[0]) + + // Send another copy of the flow log. + agg.Receive(&proto.FlowUpdate{Flow: fl}) + + // Expect the aggregator to have received it. + flows = agg.GetFlows(&proto.FlowRequest{}) + require.Len(t, flows, 1) + + // Expect aggregation to have happened. + exp.NumConnectionsStarted = 2 + exp.BytesIn = 200 + exp.BytesOut = 400 + exp.PacketsIn = 20 + exp.PacketsOut = 40 + ExpectFlowsEqual(t, &exp, flows[0]) + + // Wait for the aggregator to rollover. + time.Sleep(1001 * time.Millisecond) + + // Send another flow log. + agg.Receive(&proto.FlowUpdate{Flow: fl}) + + // Expect the aggregator to have received it. This should be added to a new bucket, + // but aggregated into the same flow on read. + flows = agg.GetFlows(&proto.FlowRequest{}) + require.Len(t, flows, 1) + + exp2 := proto.Flow{ + Key: &proto.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: flows[0].StartTime, + EndTime: flows[0].EndTime, + BytesIn: 300, + BytesOut: 600, + PacketsIn: 30, + PacketsOut: 60, + NumConnectionsStarted: 3, + } + ExpectFlowsEqual(t, &exp2, flows[0]) +} + +func TestManyFlows(t *testing.T) { + c := newClock(100) + now := c.Now().Unix() + opts := []aggregator.Option{ + aggregator.WithRolloverTime(1 * time.Second), + aggregator.WithNowFunc(c.Now), + } + defer setupTest(t, opts...)() + go agg.Run(now) + + // Create 20k flows and send them as fast as we can. See how the aggregator handles it. + fl := &proto.Flow{ + Key: &proto.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: now - 15, + EndTime: now, + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + for i := 0; i < 20000; i++ { + agg.Receive(&proto.FlowUpdate{Flow: fl}) + } + + // Query for the flow. + var flows []*proto.Flow + require.Eventually(t, func() bool { + flows = agg.GetFlows(&proto.FlowRequest{}) + if len(flows) != 1 { + return false + } + return flows[0].NumConnectionsStarted == 20000 + }, 1*time.Second, 20*time.Millisecond, "Didn't reach 20k flows: %d", len(flows)) +} + +func TestPagination(t *testing.T) { + c := newClock(100) + now := c.Now().Unix() + opts := []aggregator.Option{ + aggregator.WithRolloverTime(1 * time.Second), + aggregator.WithNowFunc(c.Now), + } + defer setupTest(t, opts...)() + go agg.Run(now) + + // Create 30 different flows. + for i := 0; i < 30; i++ { + fl := &proto.Flow{ + Key: &proto.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + + // Each flow is to a unique destination, thus making the flow unique. + DestName: fmt.Sprintf("test-dst-%d", i), + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + + // Give each flow a unique time stamp, for deterministic ordering. + StartTime: now - int64(i), + EndTime: now - int64(i) + 1, + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + agg.Receive(&proto.FlowUpdate{Flow: fl}) + } + + // Query without pagination. + var flows []*proto.Flow + require.Eventually(t, func() bool { + flows = agg.GetFlows(&proto.FlowRequest{}) + return len(flows) == 30 + }, 100*time.Millisecond, 10*time.Millisecond, "Didn't receive all flows") + + // Query with a page size of 5. + page1 := agg.GetFlows(&proto.FlowRequest{PageSize: 5}) + require.Len(t, page1, 5) + require.Equal(t, page1[0].StartTime, int64(100)) + require.Equal(t, page1[4].StartTime, int64(96)) + + // Query the third page - should be a different 5 flows (skipping page 2). + page3 := agg.GetFlows(&proto.FlowRequest{PageSize: 5, PageNumber: 2}) + require.Len(t, page3, 5) + require.Equal(t, page3[0].StartTime, int64(90)) + require.Equal(t, page3[4].StartTime, int64(86)) + + // Pages should not be equal. + require.NotEqual(t, page1, page3) + + // Query the third page again. It should be consistent (since no new data). + page2Again := agg.GetFlows(&proto.FlowRequest{PageSize: 5, PageNumber: 2}) + require.Equal(t, page3, page2Again) +} + +func TestTimeRanges(t *testing.T) { + c := newClock(100) + now := c.Now().Unix() + opts := []aggregator.Option{ + aggregator.WithRolloverTime(1 * time.Second), + aggregator.WithNowFunc(c.Now), + } + prepareFlows := func() { + // Create a flow spread across the full range of buckets within the aggregator. + // 60 buckes of 1s each means we want one flow per second for 60s. + for i := 0; i < 60; i++ { + flow := &proto.Flow{ + // Start one rollover period into the future, since that is how the aggregator works. + Key: &proto.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: now + 1 - int64(i), + EndTime: now + 1 - int64(i-1), + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + agg.Receive(&proto.FlowUpdate{Flow: flow}) + } + } + + type testCase struct { + name string + query *proto.FlowRequest + expectedNumConnectionsStarted int + expectNoMatch bool + } + + tests := []testCase{ + { + name: "All flows", + query: &proto.FlowRequest{}, + expectedNumConnectionsStarted: 60, + }, + { + name: "10s of flows", + query: &proto.FlowRequest{StartTimeGt: now - 10, StartTimeLt: now}, + expectedNumConnectionsStarted: 10, + }, + { + name: "10s of flows, starting in the future", + query: &proto.FlowRequest{StartTimeGt: now + 10, StartTimeLt: now + 20}, + // Should return no flows, since the query is in the future. + expectNoMatch: true, + }, + { + name: "5s of flows", + query: &proto.FlowRequest{StartTimeGt: now - 12, StartTimeLt: now - 7}, + expectedNumConnectionsStarted: 5, + }, + { + name: "end time before start time", + query: &proto.FlowRequest{StartTimeGt: now - 7, StartTimeLt: now - 12}, + // Should return no flows, since the query covers 0s. + expectNoMatch: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + defer setupTest(t, opts...)() + go agg.Run(now) + + // Create flows. + prepareFlows() + + // Run the query, and check how many flows we get back. + var flows []*proto.Flow + + if !test.expectNoMatch { + // Should return one aggregated flow that sums the component flows. + require.Eventually(t, func() bool { + flows = agg.GetFlows(test.query) + return len(flows) == 1 + }, 100*time.Millisecond, 10*time.Millisecond, "Didn't receive flow") + require.Eventually(t, func() bool { + flows = agg.GetFlows(test.query) + return flows[0].NumConnectionsStarted == int64(test.expectedNumConnectionsStarted) + }, 100*time.Millisecond, 10*time.Millisecond, "Expected %d to equal %d", flows[0].NumConnectionsStarted, test.expectedNumConnectionsStarted) + + // Verify other fields are aggregated correctly. + exp := proto.Flow{ + Key: &proto.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: flows[0].StartTime, + EndTime: flows[0].EndTime, + BytesIn: 100 * int64(test.expectedNumConnectionsStarted), + BytesOut: 200 * int64(test.expectedNumConnectionsStarted), + PacketsIn: 10 * int64(test.expectedNumConnectionsStarted), + PacketsOut: 20 * int64(test.expectedNumConnectionsStarted), + NumConnectionsStarted: int64(test.expectedNumConnectionsStarted), + } + ExpectFlowsEqual(t, &exp, flows[0]) + } else { + // Should consistently return no flows. + for i := 0; i < 10; i++ { + flows := agg.GetFlows(test.query) + require.Len(t, flows, 0) + time.Sleep(10 * time.Millisecond) + } + } + }) + } +} + +func TestSink(t *testing.T) { + c := newClock(100) + now := c.Now().Unix() + + // Configure the aggregator with a test sink. + sink := &testSink{buckets: []*aggregator.AggregationBucket{}} + roller := &rolloverController{ + ch: make(chan time.Time), + aggregationWindowSecs: 1, + clock: c, + } + opts := []aggregator.Option{ + aggregator.WithRolloverTime(1 * time.Second), + aggregator.WithSink(sink), + aggregator.WithRolloverFunc(roller.After), + aggregator.WithNowFunc(c.Now), + } + defer setupTest(t, opts...)() + + // Start the aggregator, and trigger enough rollovers to trigger an emission. + // We shouldn't see any buckets pushed to the sink, as we haven't sent any flows. + go agg.Run(now) + roller.rolloverAndAdvanceClock(35) + require.Len(t, sink.buckets, 0) + + // Place 5 new flow logs in the first 5 buckets of the aggregator. + for i := 0; i < 5; i++ { + fl := &proto.Flow{ + Key: &proto.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: roller.now() + 1 - int64(i), + EndTime: roller.now() + 2 - int64(i), + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + agg.Receive(&proto.FlowUpdate{Flow: fl}) + } + + // Wait for all flows to be received. + time.Sleep(10 * time.Millisecond) + + // Rollover until index 4 is in the rollover location (idx 30). This will trigger + // a rollover of this batch of 5 buckets. + roller.rolloverAndAdvanceClock(25) + require.Len(t, sink.buckets, 0) + roller.rolloverAndAdvanceClock(1) + require.Len(t, sink.buckets, 1, "Expected 1 bucket to be pushed to the sink") + + // Bucket should be aggregated across 20 intervals, for a total of 20 seconds. + require.Equal(t, int64(20), sink.buckets[0].EndTime-sink.buckets[0].StartTime) + + // Expect the bucket to have aggregated to a single flow. + require.Len(t, sink.buckets[0].Flows, 1) + + // Statistics should be aggregated correctly. The flow time range should + // be updated to match the bucket time range, since the flow was present in + // each of the 5 intervals. + exp := proto.Flow{ + Key: &proto.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: sink.buckets[0].StartTime, + EndTime: sink.buckets[0].StartTime + 5, // 5 seconds of flow. + BytesIn: 500, + BytesOut: 1000, + PacketsIn: 50, + PacketsOut: 100, + NumConnectionsStarted: 5, + } + flow := sink.buckets[0].Flows[*types.ProtoToFlowKey(exp.Key)] + require.NotNil(t, flow) + require.Equal(t, *types.ProtoToFlow(&exp), *flow) +} + +// TestBucketDrift makes sure that the aggregator is able to account for its internal array of +// aggregation buckets slowly drifting with respect to time.Now(). This can happen due to the time taken to process +// other operations on the shared main goroutine, and is accounted for by adjusting the the next rollover time. +func TestBucketDrift(t *testing.T) { + // Create a clock and rollover controller. + c := newClock(100) + aggregationWindowSecs := 10 + roller := &rolloverController{ + ch: make(chan time.Time), + aggregationWindowSecs: int64(aggregationWindowSecs), + clock: c, + } + + var rolloverScheduledAt time.Duration + rolloverFunc := func(d time.Duration) <-chan time.Time { + rolloverScheduledAt = d + return roller.After(d) + } + opts := []aggregator.Option{ + aggregator.WithRolloverTime(time.Duration(aggregationWindowSecs) * time.Second), + aggregator.WithRolloverFunc(rolloverFunc), + aggregator.WithNowFunc(c.Now), + } + defer setupTest(t, opts...)() + + // This can get a bit confusing, so let's walk through it: + // + // - The aggregator maintains an internal array of buckets. The most recent bucket actually starts one aggregation window in the future, to handle clock skew between nodes. + // - For this test, we want to simulate a rollover that happens slightly late. + // - Now() is mocked to 100, With an aggregation window of 10s. So buckets[0] will cover 110-120, bucket[1] will cover 100-110. + // - Normally, a rollover would occur at 110, adding a new bucket[0] covering 120-130. + // - For this test, we'll simulate a rollover at 113, which is 3 seconds late. + // + // From there, we can expect the aggregator to notice that it has missed time somehow and accelerate the scheduling of the next rollover + // in order to compensate. + go agg.Run(c.Now().Unix()) + + // We want to simulate a rollover that happens at 113, which is 3 seconds late for the scheduled 110 rollover. + c.Set(time.Unix(113, 0)) + roller.rollover() + + // Assert that the rollover function was called with an expedited reschedule time of 7 seconds, compared to the + // expected rollover interval of 10 seconds. + require.Equal(t, 7, int(rolloverScheduledAt.Seconds()), "Expedited rollover should have been scheduled at 7s") + + // Advance the clock to 120, the expected time of the next rollover. + c.Set(time.Unix(120, 0)) + + // Trigger another rollover. This time, the aggregator should have caught up, so the rollover should be scheduled + // at the expected time of one aggregation window in the future (10s). + roller.rollover() + + require.Equal(t, aggregationWindowSecs, int(rolloverScheduledAt.Seconds()), "Expected rollover to be scheduled at 10s") + + // Now let's try the other dirction - simulate a rollover that happens 4 seconds early. + // We expect the next rollover to occur at 130, so trigger one at 126. + c.Set(time.Unix(126, 0)) + roller.rollover() + + // The aggregator should notice that it's ahead of schedule and delay the next rollover by 4 seconds. + require.Equal(t, 14, int(rolloverScheduledAt.Seconds()), "Delayed rollover should have been scheduled at 14s") + + // And check what happens if we're so far behind that the next bucket is already in the past. + // The next bucket should start at 140, so trigger a rollover at 155. + // This should trigger an immediate rollover. + c.Set(time.Unix(155, 0)) + roller.rollover() + require.Equal(t, 10*time.Millisecond, rolloverScheduledAt, "Immediate rollover should have been scheduled for 10ms") +} diff --git a/goldmane/pkg/aggregator/flow.go b/goldmane/pkg/aggregator/flow.go new file mode 100644 index 00000000000..32e4a8a3c76 --- /dev/null +++ b/goldmane/pkg/aggregator/flow.go @@ -0,0 +1,85 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aggregator + +import ( + "github.com/projectcalico/calico/goldmane/pkg/internal/types" + "github.com/projectcalico/calico/goldmane/proto" + "github.com/projectcalico/calico/libcalico-go/lib/set" +) + +// flowMatches returns true if the flow matches the request. +func flowMatches(f *types.Flow, req *proto.FlowRequest) bool { + // Check if the time range matches the flow's start time. + if req.StartTimeGt > 0 && f.StartTime < req.StartTimeGt { + return false + } + if req.StartTimeLt > 0 && f.StartTime > req.StartTimeLt { + return false + } + return true +} + +// mergeFlowInto merges flow b into flow a. +func mergeFlowInto(a, b *types.Flow) { + // Merge in statistics. + a.PacketsIn += b.PacketsIn + a.PacketsOut += b.PacketsOut + a.BytesIn += b.BytesIn + a.BytesOut += b.BytesOut + a.NumConnectionsStarted += b.NumConnectionsStarted + a.NumConnectionsCompleted += b.NumConnectionsCompleted + a.NumConnectionsLive += b.NumConnectionsLive + + // Update Start/End times, to indicate the full duration across all of the + // component flows that have been merged into this aggregated one. + if a.StartTime > b.StartTime { + // The existing flow was present in a later (chronologically) bucket, we need to update the start time + // of the flow to the start time of this (earlier chronologically) bucket. + a.StartTime = b.StartTime + } + if a.EndTime < b.EndTime { + // The existing flow was present in an earlier (chronologically) bucket, we need to update the end time + // of the flow to the end time of this (later chronologically) bucket. + a.EndTime = b.EndTime + } + + // To merge labels, we include the intersection of the labels from both flows. + // This means the resulting aggregated flow will have all the labels common to + // its component flows. + a.SourceLabels = intersection(a.SourceLabels, b.SourceLabels) + a.DestLabels = intersection(a.DestLabels, b.DestLabels) +} + +// intersection returns the intersection of two slices of strings. i.e., all the values that +// exist in both input slices. +func intersection(a, b []string) []string { + labelsA := set.New[string]() + labelsB := set.New[string]() + intersection := set.New[string]() + for _, v := range a { + labelsA.Add(v) + } + for _, v := range b { + labelsB.Add(v) + } + labelsA.Iter(func(l string) error { + if labelsB.Contains(l) { + intersection.Add(l) + } + return nil + }) + return intersection.Slice() +} diff --git a/goldmane/pkg/aggregator/options.go b/goldmane/pkg/aggregator/options.go new file mode 100644 index 00000000000..359758dba38 --- /dev/null +++ b/goldmane/pkg/aggregator/options.go @@ -0,0 +1,63 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aggregator + +import ( + "time" +) + +type Option func(*LogAggregator) + +func WithSink(e Sink) Option { + return func(a *LogAggregator) { + a.sink = e + } +} + +// WithRolloverTime sets the rollover time for the aggregator. This configures the bucket size used +// to aggregate flows across nodes in the cluster. +func WithRolloverTime(rollover time.Duration) Option { + return func(a *LogAggregator) { + a.aggregationWindow = rollover + } +} + +// WithRolloverFunc allows manual control over the rollover timer, used in tests. +func WithRolloverFunc(f func(time.Duration) <-chan time.Time) Option { + return func(a *LogAggregator) { + a.rolloverFunc = f + } +} + +// WithBucketsToCombine sets the number of buckets to combine when pushing flows to the sink. +// This controls time-based aggregation when emiting flows. +func WithBucketsToCombine(numBuckets int) Option { + return func(a *LogAggregator) { + a.bucketsToAggregate = numBuckets + } +} + +// WithPushIndex sets the index of the bucket which triggers pushing to the emitter. +func WithPushIndex(index int) Option { + return func(a *LogAggregator) { + a.pushIndex = index + } +} + +func WithNowFunc(f func() time.Time) Option { + return func(a *LogAggregator) { + a.nowFunc = f + } +} diff --git a/goldmane/pkg/aggregator/types.go b/goldmane/pkg/aggregator/types.go new file mode 100644 index 00000000000..aea1c483829 --- /dev/null +++ b/goldmane/pkg/aggregator/types.go @@ -0,0 +1,164 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aggregator + +import ( + "time" + + "github.com/sirupsen/logrus" + + "github.com/projectcalico/calico/goldmane/pkg/internal/types" + "github.com/projectcalico/calico/libcalico-go/lib/set" +) + +// An aggregation bucket represents a bucket of aggregated flows across a time range. +type AggregationBucket struct { + // The start and end time of the bucket. + StartTime int64 + EndTime int64 + + // Pushed indicates whether this bucket has been pushed to the emitter. + Pushed bool + + // Flows contains the aggregated flows for this bucket. + Flows map[types.FlowKey]*types.Flow + + // Index flows by policy rule. This allows us to quickly generate per-rule statistics + // for a given time range. + RuleIndex map[string]set.Set[types.FlowKey] +} + +func (b *AggregationBucket) AddFlow(flow *types.Flow) { + if b.Pushed { + logrus.WithField("flow", flow).Warn("Adding flow to already published bucket") + } + + // Check if there is a FlowKey entry for this Flow within this bucket. + f, ok := b.Flows[*flow.Key] + if !ok { + cp := *flow + f = &cp + } else { + // Update flow stats based on the flowlog. + mergeFlowInto(f, flow) + } + + // Update the flow in the bucket. + b.Flows[*flow.Key] = f + + // Update the rule index. + if flow.Key.Policies != nil { + for _, rule := range flow.Key.Policies.AllPolicies { + if _, ok := b.RuleIndex[rule]; !ok { + b.RuleIndex[rule] = set.New[types.FlowKey]() + } + b.RuleIndex[rule].Add(*flow.Key) + } + } +} + +func (b *AggregationBucket) DeepCopy() *AggregationBucket { + newBucket := NewAggregationBucket(time.Unix(b.StartTime, 0), time.Unix(b.EndTime, 0)) + newBucket.Pushed = b.Pushed + + // Copy over the flows. + newBucket.Flows = make(map[types.FlowKey]*types.Flow) + for k, v := range b.Flows { + cp := *v + newBucket.Flows[k] = &cp + } + + // Copy the rule index. + newBucket.RuleIndex = make(map[string]set.Set[types.FlowKey]) + for k, v := range b.RuleIndex { + newBucket.RuleIndex[k] = v.Copy() + } + + return newBucket +} + +func NewAggregationBucket(start, end time.Time) *AggregationBucket { + return &AggregationBucket{ + StartTime: start.Unix(), + EndTime: end.Unix(), + Flows: make(map[types.FlowKey]*types.Flow), + RuleIndex: make(map[string]set.Set[types.FlowKey]), + } +} + +// merge merges the flows from b2 into b. +func (b *AggregationBucket) merge(b2 *AggregationBucket) { + for k, v := range b2.Flows { + f, ok := b.Flows[k] + if !ok { + logrus.WithFields(b2.Fields()).Debug("Adding new flow contribution from bucket") + b.Flows[k] = v + } else { + logrus.WithFields(b2.Fields()).Debug("Updating flow contribution from bucket") + mergeFlowInto(f, v) + } + } +} + +func (b *AggregationBucket) Fields() logrus.Fields { + return logrus.Fields{ + "start_time": b.StartTime, + "end_time": b.EndTime, + "flows": len(b.Flows), + } +} + +func GetStartTime(interval int) int64 { + // Start time should always align to interval boundaries so that on restart + // we can deterministically create a consistent set of buckets. e.g., if the interval is 30s, + // then the start time should be a multiple of 30s. + var startTime int64 + for { + startTime = time.Now().Unix() + int64(interval) + if startTime%int64(interval) == 0 { + // We found a multiple - break out of the loop. + break + } + logrus.WithField("start_time", startTime).Debug("Waiting for start time to align to interval") + time.Sleep(1 * time.Second) + } + return startTime +} + +func InitialBuckets(n int, interval int, startTime int64) []AggregationBucket { + logrus.WithFields(logrus.Fields{ + "num": n, + "bucketSize": time.Duration(interval) * time.Second, + }).Debug("Initializing aggregation buckets") + + // Generate an array of N buckets of interval seconds each. + buckets := make([]AggregationBucket, n) + + // First bucket start time / end time. To account for some amount of clock drift, + // we'll start the first bucket one interval into the future and work backwards in + // time from that. This helps ensure that we don't miss any flows that come from nodes + // with a clock that's slightly ahead of ours. + startTime = startTime + int64(interval) + endTime := startTime + int64(interval) + + for i := 0; i < n; i++ { + // Each bucket is i*interval seconds further back in time. + buckets[i] = *NewAggregationBucket( + time.Unix(startTime-int64(i*interval), 0), + time.Unix(endTime-int64(i*interval), 0), + ) + } + return buckets +} diff --git a/goldmane/pkg/aggregator/utils_test.go b/goldmane/pkg/aggregator/utils_test.go new file mode 100644 index 00000000000..f78fb729248 --- /dev/null +++ b/goldmane/pkg/aggregator/utils_test.go @@ -0,0 +1,84 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aggregator_test + +import ( + "time" + + "github.com/projectcalico/calico/goldmane/pkg/aggregator" +) + +// testSink implements the Sink interface for testing. +type testSink struct { + buckets []*aggregator.AggregationBucket +} + +func (t *testSink) Receive(b *aggregator.AggregationBucket) { + t.buckets = append(t.buckets, b) +} + +// rolloverController is a helper struct to control when rollovers occur. +type rolloverController struct { + ch chan time.Time + clock *clock + aggregationWindowSecs int64 +} + +func (r *rolloverController) After(_ time.Duration) <-chan time.Time { + return r.ch +} + +// rollover triggers a rollover without advancing the internal clock. Clock manipulation is left to the caller. +func (r *rolloverController) rollover() { + r.ch <- r.clock.Now() + + // Wait for rollover to complete. + time.Sleep(10 * time.Millisecond) +} + +// rolloverAndAdvanceClock triggers n rollovers, advancing the internal clock by the aggregation window each time. +func (r *rolloverController) rolloverAndAdvanceClock(n int) { + for i := 0; i < n; i++ { + r.ch <- r.clock.Now() + r.clock.Advance(time.Duration(r.aggregationWindowSecs) * time.Second) + } + // Wait for rollovers to complete. + time.Sleep(10 * time.Millisecond) +} + +func (r *rolloverController) now() int64 { + return r.clock.Now().Unix() +} + +func newClock(t int64) *clock { + return &clock{t: t} +} + +// clock is a helper structure for tests that need control over time. +type clock struct { + t int64 +} + +func (c *clock) Now() time.Time { + return time.Unix(c.t, 0) +} + +func (c *clock) Advance(d time.Duration) { + c.t += int64(d.Seconds()) +} + +func (c *clock) Set(t time.Time) { + c.t = t.Unix() +} diff --git a/goldmane/pkg/client/client.go b/goldmane/pkg/client/client.go new file mode 100644 index 00000000000..6e8be00983a --- /dev/null +++ b/goldmane/pkg/client/client.go @@ -0,0 +1,164 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import ( + "context" + "time" + + "github.com/sirupsen/logrus" + "google.golang.org/grpc" + + "github.com/projectcalico/calico/goldmane/pkg/internal/flowcache" + "github.com/projectcalico/calico/goldmane/proto" +) + +const ( + FlowCacheExpiry = 5 * time.Minute + FlowCacheCleanup = 30 * time.Second +) + +func NewFlowClient(server string) *FlowClient { + return &FlowClient{ + inChan: make(chan *proto.Flow, 5000), + cache: flowcache.NewExpiringFlowCache(FlowCacheExpiry), + } +} + +// FlowClient pushes flow updates to the flow server. +type FlowClient struct { + inChan chan *proto.Flow + cache *flowcache.ExpiringFlowCache +} + +func (c *FlowClient) Run(ctx context.Context, grpcClient grpc.ClientConnInterface) { + logrus.Info("Starting flow client") + defer func() { + logrus.Info("Stopping flow client") + }() + + // Start the cache cleanup task. + go c.cache.Run(FlowCacheCleanup) + + // Create a new client to push flows to the server. + cli := proto.NewFlowCollectorClient(grpcClient) + + // Create a backoff helper. + b := newBackoff(1*time.Second, 10*time.Second) + + for { + // Check if the parent context has been canceled. + if err := ctx.Err(); err != nil { + logrus.WithError(err).Warn("Parent context canceled") + return + } + + // Connect to the flow server. This establishes a streaming connection over which + // we can send flow updates. + rc, err := cli.Connect(ctx) + if err != nil { + logrus.WithError(err).Warn("Failed to connect to flow server") + b.Wait() + continue + } + + logrus.Info("Connected to flow server") + b.Reset() + + // On a new connection, send all of the flows that we have cached. We're assuming + // this indicates a restart of the flow server. The flow server will handle deuplication + // if we happen to send the same flow twice. + err = c.cache.Iter(func(f *proto.Flow) error { + // Send. + if err := rc.Send(&proto.FlowUpdate{Flow: f}); err != nil { + logrus.WithError(err).Warn("Failed to send flow") + return err + } + // Get receipt. + if _, err := rc.Recv(); err != nil { + logrus.WithError(err).Warn("Failed to receive receipt") + return err + } + return nil + }) + if err != nil { + b.Wait() + continue + } + + // Send new Flows as they are received. + for flog := range c.inChan { + // Add the flow to our cache. It will automatically be expired in the background. + // We don't need to pass in a value for scope, since the client is intrinsically scoped + // to a particular node. + c.cache.Add(flog, "") + + // Send the flow. + if err := rc.Send(&proto.FlowUpdate{Flow: flog}); err != nil { + logrus.WithError(err).Warn("Failed to send flow") + break + } + + // Receive a receipt. + if _, err := rc.Recv(); err != nil { + logrus.WithError(err).Warn("Failed to receive receipt") + break + } + } + + if err := rc.CloseSend(); err != nil { + logrus.WithError(err).Warn("Failed to close connection") + } + b.Wait() + } +} + +func (c *FlowClient) Push(f *proto.Flow) { + // Make a copy of the flow to decouple the caller from the client. + cp := f + select { + case c.inChan <- cp: + default: + logrus.Warn("Flow client buffer full, dropping flow") + } +} + +// backoff is a small helper to implement exponential backoff. +func newBackoff(base, maxBackoff time.Duration) *backoff { + return &backoff{ + base: base, + interval: base, + maxBackoff: maxBackoff, + } +} + +type backoff struct { + base time.Duration + interval time.Duration + maxBackoff time.Duration +} + +func (b *backoff) Wait() { + logrus.WithField("duration", b.interval).Info("Waiting before next connection attempt") + time.Sleep(b.interval) + b.interval *= 2 + if b.interval > b.maxBackoff { + b.interval = b.maxBackoff + } +} + +func (b *backoff) Reset() { + b.interval = b.base +} diff --git a/goldmane/pkg/collector/collector.go b/goldmane/pkg/collector/collector.go new file mode 100644 index 00000000000..9a587577792 --- /dev/null +++ b/goldmane/pkg/collector/collector.go @@ -0,0 +1,113 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "io" + + "github.com/sirupsen/logrus" + "google.golang.org/grpc" + "google.golang.org/grpc/peer" + + "github.com/projectcalico/calico/goldmane/pkg/client" + "github.com/projectcalico/calico/goldmane/pkg/internal/flowcache" + "github.com/projectcalico/calico/goldmane/proto" +) + +type Sink interface { + Receive(*proto.FlowUpdate) +} + +// NewFlowCollector returns a new push collector, which handles incoming flow streams from nodes in the cluster. +func NewFlowCollector(sink Sink) *collector { + return &collector{ + sink: sink, + deduplicator: flowcache.NewExpiringFlowCache(client.FlowCacheExpiry), + } +} + +type collector struct { + proto.UnimplementedFlowCollectorServer + + // sink is where we will send flows upon receipt. + sink Sink + + // deduplicator is used to deduplicate flows received from clients upon connection resets. + deduplicator *flowcache.ExpiringFlowCache +} + +func (p *collector) Run() { + logrus.Info("Starting flow collector") + p.deduplicator.Run(client.FlowCacheCleanup) +} + +func (p *collector) RegisterWith(srv *grpc.Server) { + // Register the collector with the gRPC server. + proto.RegisterFlowCollectorServer(srv, p) + logrus.Info("Registered FlowCollector Server") +} + +func (p *collector) Connect(srv proto.FlowCollector_ConnectServer) error { + return p.handleClient(srv) +} + +func (p *collector) handleClient(srv proto.FlowCollector_ConnectServer) error { + scope := "unknown" + pr, ok := peer.FromContext(srv.Context()) + if ok { + scope = pr.Addr.String() + } + logCtx := logrus.WithField("who", scope) + logCtx.Info("Connection from client") + + num := 0 + defer func() { + logCtx.WithField("numFlows", num).Info("Connection from client completed.") + }() + + for { + upd, err := srv.Recv() + if err == io.EOF { + logCtx.Info("Client closed connection") + return nil + } + if err != nil { + logCtx.WithError(err).Error("Failed to receive flow") + return err + } + + // Skip flows that we have already received from this node. This is a simple deduplication + // mechanism to avoid processing the same flow if the connection is reset for some reason. + // Should this happen, the client will resend all its flows and we must ensure we don't process + // the same flow twice. + if !p.deduplicator.Has(upd.Flow, scope) { + + // Add it to the deduplicator, scoped to the client's address (i.e., per-node). + // The cache will automatically time out this flow in the background when it is no longer + // relevant. + p.deduplicator.Add(upd.Flow, scope) + + // Send the flow to the configured Sink. + p.sink.Receive(upd) + } + num++ + + // Tell the client we have received the flow. + if err = srv.Send(&proto.FlowReceipt{}); err != nil { + logCtx.WithError(err).Error("Failed to send receipt") + return err + } + } +} diff --git a/goldmane/pkg/daemon/daemon.go b/goldmane/pkg/daemon/daemon.go new file mode 100644 index 00000000000..a87982d763a --- /dev/null +++ b/goldmane/pkg/daemon/daemon.go @@ -0,0 +1,143 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package daemon + +import ( + "fmt" + "log" + "net" + "os" + "time" + + "github.com/kelseyhightower/envconfig" + "github.com/sirupsen/logrus" + "google.golang.org/grpc" + "k8s.io/client-go/tools/clientcmd" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/projectcalico/calico/goldmane/pkg/aggregator" + "github.com/projectcalico/calico/goldmane/pkg/collector" + "github.com/projectcalico/calico/goldmane/pkg/emitter" + "github.com/projectcalico/calico/goldmane/pkg/internal/utils" + "github.com/projectcalico/calico/goldmane/pkg/server" +) + +type Config struct { + // LogLevel is the log level to use. + LogLevel string `json:"log_level" envconfig:"LOG_LEVEL" default:"info"` + + // PushURL is the URL to push logs to, if set. Logs will be pushed + // periodically in a bulk format. + PushURL string `json:"push_url" envconfig:"PUSH_URL"` + + // Port is the port to listen on for gRPC connections. + Port int `json:"port" envconfig:"PORT" default:"443"` + + // ClientKeyPath, ClientCertPath, and CACertPath are paths to the client key, client cert, and CA cert + // used when publishing logs to an HTTPS endpoint. + ClientCertPath string `json:"ca_client_cert_path" envconfig:"CLIENT_CERT_PATH"` + ClientKeyPath string `json:"client_key_path" envconfig:"CLIENT_KEY_PATH"` + CACertPath string `json:"ca_cert_path" envconfig:"CA_CERT_PATH"` + ServerName string `json:"server_name" envconfig:"SERVER_NAME" default:"tigera-linseed.tigera-elasticsearch.svc"` + + // AggregationWindow is the size in seconds of each bucket used when aggregating flows received + // from each node in the cluster. + AggregationWindow time.Duration `json:"rollover_time" envconfig:"ROLLOVER_TIME" default:"15s"` + + // The number of buckets to combine when pushing flows to the sink. This can be used to reduce the number + // buckets combined into time-aggregated flows that are sent to the sink. + NumBucketsToCombine int `json:"num_buckets_to_combine" envconfig:"NUM_BUCKETS_TO_COMBINE" default:"20"` + + // PushIndex is the index of the bucket which triggers pushing to the emitter. A larger value + // will increase the latency of emitted flows, while a smaller value will cause the emitter to emit + // potentially incomplete flows. + PushIndex int `json:"push_index" envconfig:"PUSH_INDEX" default:"30"` +} + +func Run() { + // Load configuration from environment variables. + var cfg Config + if err := envconfig.Process("", &cfg); err != nil { + logrus.WithError(err).Fatal("Failed to load configuration from environment") + } + + utils.ConfigureLogging(cfg.LogLevel) + logrus.WithField("cfg", cfg).Info("Loaded configuration") + + // Create a stop channel. + stopCh := make(chan struct{}) + + // Create a Kubenetes client. If we fail to create the client, we will log a warning and continue, + // but we will not be able to use the client to e.g., cache emitter progress. + var kclient client.Client + cliCfg, err := clientcmd.BuildConfigFromFlags("", os.Getenv("KUBECONFIG")) + if err != nil { + logrus.WithError(err).Warn("Failed to load Kubernetes client configuration") + } else { + kclient, err = client.New(cliCfg, client.Options{}) + if err != nil { + logrus.WithError(err).Warn("Failed to create Kubernetes client") + } + } + + // Create the shared gRPC server. + grpcServer := grpc.NewServer() + + // Track options for log aggregator. + aggOpts := []aggregator.Option{ + aggregator.WithRolloverTime(cfg.AggregationWindow), + aggregator.WithBucketsToCombine(cfg.NumBucketsToCombine), + aggregator.WithPushIndex(cfg.PushIndex), + } + + if cfg.PushURL != "" { + // Create an emitter, which forwards flows to an upstream HTTP endpoint. + logEmitter := emitter.NewEmitter( + emitter.WithKubeClient(kclient), + emitter.WithURL(cfg.PushURL), + emitter.WithCACertPath(cfg.CACertPath), + emitter.WithClientKeyPath(cfg.ClientKeyPath), + emitter.WithClientCertPath(cfg.ClientCertPath), + emitter.WithServerName(cfg.ServerName), + ) + aggOpts = append(aggOpts, aggregator.WithSink(logEmitter)) + go logEmitter.Run(stopCh) + } + + // Create an aggregator and collector, and connect the collector to the aggregator. + agg := aggregator.NewLogAggregator(aggOpts...) + collector := collector.NewFlowCollector(agg) + collector.RegisterWith(grpcServer) + go collector.Run() + + // Start the aggregator. + go agg.Run(aggregator.GetStartTime(int(cfg.AggregationWindow.Seconds()))) + + // Start a flow server, serving from the aggregator. + flowServer := server.NewServer(agg) + flowServer.RegisterWith(grpcServer) + + // Start the gRPC server. + lis, err := net.Listen("tcp", fmt.Sprintf(":%d", cfg.Port)) + if err != nil { + log.Fatalf("failed to listen: %v", err) + } + logrus.Info("Listening on ", cfg.Port) + + if err := grpcServer.Serve(lis); err != nil { + log.Fatalf("failed to serve: %v", err) + } + <-stopCh +} diff --git a/goldmane/pkg/emitter/bucket_cache.go b/goldmane/pkg/emitter/bucket_cache.go new file mode 100644 index 00000000000..b9e769271bc --- /dev/null +++ b/goldmane/pkg/emitter/bucket_cache.go @@ -0,0 +1,65 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package emitter + +import ( + "sync" + + "github.com/sirupsen/logrus" + + "github.com/projectcalico/calico/goldmane/pkg/aggregator" +) + +type bucketKey struct { + startTime int64 + endTime int64 +} + +// bucketCache is a thread-safe cache of aggregation buckets. +type bucketCache struct { + sync.Mutex + buckets map[bucketKey]*aggregator.AggregationBucket +} + +func newBucketCache() *bucketCache { + return &bucketCache{ + buckets: map[bucketKey]*aggregator.AggregationBucket{}, + } +} + +func (b *bucketCache) add(k bucketKey, bucket *aggregator.AggregationBucket) { + b.Lock() + defer b.Unlock() + if _, exists := b.buckets[k]; exists { + // This should never happen, but log an error if it does. This prevents + // us from overwriting a bucket that's already in the map, which indicates an upstream bug. + logrus.WithField("bucket", k).Error("Duplicate bucket received.") + return + } + b.buckets[k] = bucket +} + +func (b *bucketCache) get(k bucketKey) (*aggregator.AggregationBucket, bool) { + b.Lock() + defer b.Unlock() + bucket, exists := b.buckets[k] + return bucket, exists +} + +func (b *bucketCache) remove(k bucketKey) { + b.Lock() + defer b.Unlock() + delete(b.buckets, k) +} diff --git a/goldmane/pkg/emitter/emitter.go b/goldmane/pkg/emitter/emitter.go new file mode 100644 index 00000000000..c0de916221b --- /dev/null +++ b/goldmane/pkg/emitter/emitter.go @@ -0,0 +1,277 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package emitter + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "strconv" + "time" + + "github.com/sirupsen/logrus" + "golang.org/x/time/rate" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/workqueue" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/projectcalico/calico/goldmane/pkg/aggregator" +) + +var ( + maxRetries = 15 + configMapKey = types.NamespacedName{Name: "flow-emitter-state", Namespace: "calico-system"} +) + +// Emitter is a type that emits aggregated Flow objects to an HTTP endpoint. +type Emitter struct { + client *emitterClient + + kcli client.Client + + // Configuration for emitter endpoint. + url string + caCert string + clientKey string + clientCert string + serverName string + + // Use a rate limited workqueue to manage bucket emission. + buckets *bucketCache + q workqueue.TypedRateLimitingInterface[bucketKey] + + // Track the latest timestamp of emitted flows. This helps us avoid emitting the same flow multiple times + // on restart. + latestTimestamp int64 +} + +// Make sure Emitter implements the Receiver interface to be able to receive aggregated Flows. +var _ aggregator.Sink = &Emitter{} + +func NewEmitter(opts ...Option) *Emitter { + e := &Emitter{ + buckets: newBucketCache(), + q: workqueue.NewTypedRateLimitingQueue( + workqueue.NewTypedMaxOfRateLimiter( + workqueue.NewTypedItemExponentialFailureRateLimiter[bucketKey](1*time.Second, 30*time.Second), + &workqueue.TypedBucketRateLimiter[bucketKey]{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, + )), + } + + for _, opt := range opts { + opt(e) + } + + var err error + e.client, err = newEmitterClient(e.url, e.caCert, e.clientKey, e.clientCert, e.serverName) + if err != nil { + logrus.Fatalf("Error creating emitter client: %v", err) + } + logrus.WithField("url", e.url).Info("Created emitter client.") + + if e.kcli == nil { + logrus.Warn("No k8s client provided, will not be able to cache state.") + } + + return e +} + +func (e *Emitter) Run(stopCh chan struct{}) { + // Start by loading any state cached in our configmap, which will allow us to better pick up where we left off + // in the event of a restart. + if err := e.loadCachedState(); err != nil { + logrus.Errorf("Error loading cached state: %v", err) + } + + // This is the main loop for the emitter. It listens for new batches of flows to emit and emits them. + for { + // Get pending work from the queue. + key, quit := e.q.Get() + if quit { + logrus.WithField("cm", configMapKey).Info("Emitter shutting down.") + return + } + e.q.Done(key) + + bucket, ok := e.buckets.get(key) + if !ok { + logrus.WithField("bucket", key).Error("Bucket not found in cache.") + e.q.Forget(key) + continue + } + + // Emit the bucket. + if err := e.emit(bucket); err != nil { + logrus.Errorf("Error emitting flows to %s: %v", e.url, err) + e.retry(key) + continue + } + + // Success. Remove the bucket from our internal map, and + // clear it from the workqueue. + e.forget(key) + } +} + +func (e *Emitter) Receive(bucket *aggregator.AggregationBucket) { + // Add the bucket to our internal map so we can retry it if needed. + // We'll remove it from the map once it's successfully emitted. + k := bucketKey{startTime: bucket.StartTime, endTime: bucket.EndTime} + e.buckets.add(k, bucket) + e.q.Add(k) +} + +func (e *Emitter) retry(k bucketKey) { + if e.q.NumRequeues(k) < maxRetries { + logrus.WithField("bucket", k).Debug("Queueing retry for bucket.") + e.q.AddRateLimited(k) + } else { + logrus.WithField("bucket", k).Error("Max retries exceeded, dropping bucket.") + e.forget(k) + } +} + +// forget removes a bucket from the internal cache and the workqueue, and can be called safely +// from any goroutine after a bucket has been successfully emitted, or has reached the maximum +// maximum number of retries. +func (e *Emitter) forget(k bucketKey) { + e.buckets.remove(k) + e.q.Forget(k) +} + +func (e *Emitter) emit(bucket *aggregator.AggregationBucket) error { + // Check if we have already emitted this batch. If it pre-dates + // the latest timestamp we've emitted, skip it. This can happen, for example, on restart when + // we learn already emitted flows from the cache. + if bucket.EndTime <= e.latestTimestamp { + logrus.WithField("bucketEndTime", bucket.EndTime).Debug("Skipping already emitted flows.") + return nil + } + + // Marshal the flows to JSON and send them to the emitter. + rdr, err := e.bucketToReader(bucket) + if err != nil { + return err + } + if err := e.client.Post(rdr); err != nil { + return err + } + + // Update the timestamp of the latest bucket emitted. + e.latestTimestamp = bucket.EndTime + + // Update our configmap with the latest published timestamp. + if err = e.saveState(); err != nil { + logrus.WithError(err).Warn("Error saving state.") + } + return nil +} + +func (e *Emitter) bucketToReader(bucket *aggregator.AggregationBucket) (*bytes.Reader, error) { + body := []byte{} + for _, flow := range bucket.Flows { + if len(body) != 0 { + // Include a separator between logs. + body = append(body, []byte("\n")...) + } + + flowJSON, err := json.Marshal(flow) + if err != nil { + return nil, fmt.Errorf("Error marshalling flow: %v", err) + } + body = append(body, flowJSON...) + } + return bytes.NewReader(body), nil +} + +// saveState updates cached metadata stored across restart. We use a configmap to +// track the latest timestamp of emitted flows so we can pick up where we left off on reboot. +func (e *Emitter) saveState() error { + if e.kcli == nil { + return nil + } + if e.latestTimestamp == 0 { + return nil + } + + // Query the latest configmap. + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + cm := &corev1.ConfigMap{} + if err := e.kcli.Get(ctx, configMapKey, cm); err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("error getting configmap: %v", err) + } else if errors.IsNotFound(err) { + // Configmap doesn't exist, create it. + cm.Name = configMapKey.Name + cm.Namespace = configMapKey.Namespace + cm.Data = map[string]string{} + } + + // Update the timestamp in the configmap. + cm.Data["latestTimestamp"] = fmt.Sprintf("%d", e.latestTimestamp) + logCtx := logrus.WithFields(logrus.Fields{ + "cm": configMapKey, + "latestTimestamp": cm.Data["latestTimestamp"], + }) + + if cm.ResourceVersion == "" { + // Create the configmap. + if err := e.kcli.Create(context.Background(), cm); err != nil { + return fmt.Errorf("error creating configmap: %v", err) + } + logCtx.Debug("Created configmap") + } else { + // Update the configmap. + if err := e.kcli.Update(context.Background(), cm); err != nil { + return fmt.Errorf("error updating configmap: %v", err) + } + logCtx.Debug("Updated configmap") + } + return nil +} + +func (e *Emitter) loadCachedState() error { + if e.kcli == nil { + return nil + } + + // Query the latest configmap. + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + cm := &corev1.ConfigMap{} + if err := e.kcli.Get(ctx, configMapKey, cm); err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("error getting configmap: %v", err) + } else if errors.IsNotFound(err) { + logrus.WithField("cm", configMapKey).Debug("Configmap not found") + return nil + } + + raw, ok := cm.Data["latestTimestamp"] + if !ok { + return nil + } + + // Parse the timestamp from the configmap. + ts, err := strconv.ParseInt(raw, 10, 64) + if err != nil { + return fmt.Errorf("error parsing timestamp: %v", err) + } + e.latestTimestamp = ts + return nil +} diff --git a/goldmane/pkg/emitter/emitter_test.go b/goldmane/pkg/emitter/emitter_test.go new file mode 100644 index 00000000000..0682c941d5c --- /dev/null +++ b/goldmane/pkg/emitter/emitter_test.go @@ -0,0 +1,312 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package emitter_test + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ktypes "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/projectcalico/calico/goldmane/pkg/aggregator" + "github.com/projectcalico/calico/goldmane/pkg/emitter" + "github.com/projectcalico/calico/goldmane/pkg/internal/types" + "github.com/projectcalico/calico/goldmane/pkg/internal/utils" + "github.com/projectcalico/calico/libcalico-go/lib/logutils" +) + +var emt *emitter.Emitter + +var configMapKey = ktypes.NamespacedName{Name: "flow-emitter-state", Namespace: "calico-system"} + +func setupTest(t *testing.T, opts ...emitter.Option) func() { + // Hook logrus into testing.T + utils.ConfigureLogging("DEBUG") + logCancel := logutils.RedirectLogrusToTestingT(t) + + // Run the emitter. + stopCh := make(chan struct{}) + emt = emitter.NewEmitter(opts...) + go emt.Run(stopCh) + + return func() { + close(stopCh) + emt = nil + logCancel() + } +} + +func TestEmitterMainline(t *testing.T) { + // Create a flow to send. + flow := types.Flow{ + Key: &types.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: 18, + EndTime: 28, + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + expectedBody, err := json.Marshal(flow) + require.NoError(t, err) + + // Creat a mock HTTP server to use as our sink. + numBucketsEmitted := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Verify the request. + require.Equal(t, "/path/to/flows", r.URL.Path) + require.Equal(t, "POST", r.Method) + + // Read the body and assert it matches the expected flow. + buf := new(bytes.Buffer) + _, err := buf.ReadFrom(r.Body) + require.NoError(t, err) + require.Equal(t, buf.String(), string(expectedBody)) + w.WriteHeader(http.StatusOK) + + numBucketsEmitted++ + })) + + kcli := fake.NewFakeClient() + opts := []emitter.Option{ + emitter.WithURL(fmt.Sprintf("%s/path/to/flows", server.URL)), + emitter.WithServerName("test-server"), + emitter.WithKubeClient(kcli), + } + defer server.Close() + + // Set up the test. + defer setupTest(t, opts...)() + + // Send a bucket with a single flow. + b := aggregator.NewAggregationBucket(time.Unix(15, 0), time.Unix(30, 0)) + b.AddFlow(&flow) + emt.Receive(b) + + // Wait for the emitter to process the bucket. It should emit the flow to the mock server. + require.Eventually(t, func() bool { + return numBucketsEmitted == 1 + }, 5*time.Second, 500*time.Millisecond) + + // Verify that the emitter saved its state in a configmap. + cm := &corev1.ConfigMap{} + err = kcli.Get(context.Background(), configMapKey, cm) + require.NoError(t, err) + require.Equal(t, fmt.Sprintf("%d", b.EndTime), cm.Data["latestTimestamp"]) +} + +func TestEmitterRetry(t *testing.T) { + // Create a flow to send. + flow := types.Flow{ + Key: &types.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: 18, + EndTime: 28, + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + expectedBody, err := json.Marshal(flow) + require.NoError(t, err) + + // Creat a mock HTTP server to use as our sink. + numBucketsEmitted := 0 + numRequests := 0 + + // For this test, we configure the server to fail the first request with a 500 error + // and then succeed on subsequent requests. This verifies that the emitter retries in the case + // of a failure. + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if numRequests < 1 { + w.WriteHeader(500) + numRequests++ + return + } + numRequests++ + + // Verify the request. + require.Equal(t, "/path/to/flows", r.URL.Path) + require.Equal(t, "POST", r.Method) + + // Read the body and assert it matches the expected flow. + buf := new(bytes.Buffer) + _, err := buf.ReadFrom(r.Body) + require.NoError(t, err) + require.Equal(t, buf.String(), string(expectedBody)) + w.WriteHeader(http.StatusOK) + + numBucketsEmitted++ + })) + opts := []emitter.Option{ + emitter.WithURL(fmt.Sprintf("%s/path/to/flows", server.URL)), + emitter.WithServerName("test-server"), + } + defer server.Close() + + // Set up the test. + defer setupTest(t, opts...)() + + // Send a bucket with a single flow. + b := aggregator.NewAggregationBucket(time.Unix(15, 0), time.Unix(30, 0)) + b.AddFlow(&flow) + emt.Receive(b) + + // Wait for the emitter to process the bucket. It should emit the flow to the mock server. + require.Eventually(t, func() bool { + return numRequests >= 2 + }, 5*time.Second, 500*time.Millisecond, "Didn't retry the request?") + require.Eventually(t, func() bool { + return numBucketsEmitted == 1 + }, 5*time.Second, 500*time.Millisecond, "Didn't emit the flow?") +} + +// TestStaleBuckets tests that the emitter can properly skip emission of buckets that predate its latest +// saved timestamp. This can happen, for example, when goldmane restarts and learns about already emitted +// flows. +func TestStaleBuckets(t *testing.T) { + // Create a configmap which represents the latest timestamp emitted. This will be loaded by the emitter + // to determine which buckets to skip. + kcli := fake.NewFakeClient() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "flow-emitter-state", + Namespace: "calico-system", + }, + Data: map[string]string{ + // latestTimestamp is AFTER the start / end times of the flow and bucket below. + "latestTimestamp": "45", + }, + } + err := kcli.Create(context.Background(), cm) + require.NoError(t, err) + + // Two flows to send - one before the latest timestamp, and one after. + flow := types.Flow{ + Key: &types.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: 18, + EndTime: 28, + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + flowOK := types.Flow{ + Key: &types.FlowKey{ + SourceName: "test-src", + SourceNamespace: "test-ns", + DestName: "test-dst", + DestNamespace: "test-dst-ns", + Proto: "tcp", + }, + StartTime: 61, + EndTime: 65, + BytesIn: 100, + BytesOut: 200, + PacketsIn: 10, + PacketsOut: 20, + NumConnectionsStarted: 1, + } + unexpectedBody, err := json.Marshal(flow) + require.NoError(t, err) + okBody, err := json.Marshal(flowOK) + require.NoError(t, err) + + // Creat a mock HTTP server to use as our sink. We don't expect any requests to be made. + numBucketsEmitted := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Check the body. We don't expect the first flow to be sent. + buf := new(bytes.Buffer) + _, err := buf.ReadFrom(r.Body) + require.NoError(t, err) + if buf.String() == string(unexpectedBody) { + require.Fail(t, "Unexpected flow sent to server") + } + require.Equal(t, buf.String(), string(okBody)) + + numBucketsEmitted++ + w.WriteHeader(http.StatusOK) + })) + + opts := []emitter.Option{ + emitter.WithURL(fmt.Sprintf("%s/path/to/flows", server.URL)), + emitter.WithServerName("test-server"), + emitter.WithKubeClient(kcli), + } + defer server.Close() + + // Set up the test. + defer setupTest(t, opts...)() + + // Send a bucket with a single flow. + b := aggregator.NewAggregationBucket(time.Unix(15, 0), time.Unix(30, 0)) + // Create a flow to send. + b.AddFlow(&flow) + emt.Receive(b) + + // The emitter should skip emitting the bucket, and the flow should not be sent to the server. + // Wait a couple of seconds to confirm. + time.Sleep(2 * time.Second) + + // The timestamp should not be updated. + err = kcli.Get(context.Background(), configMapKey, cm) + require.NoError(t, err) + require.Equal(t, "45", cm.Data["latestTimestamp"]) + + // Send a new bucket that is after the latest timestamp. This one should be sent. + bOK := aggregator.NewAggregationBucket(time.Unix(60, 0), time.Unix(70, 0)) + bOK.AddFlow(&flowOK) + emt.Receive(bOK) + + // Expect the flow to be sent to the server. + require.Eventually(t, func() bool { + return numBucketsEmitted == 1 + }, 5*time.Second, 500*time.Millisecond) + + // The timestamp should be updated. + err = kcli.Get(context.Background(), configMapKey, cm) + require.NoError(t, err) + require.Equal(t, "70", cm.Data["latestTimestamp"]) +} diff --git a/goldmane/pkg/emitter/http_client.go b/goldmane/pkg/emitter/http_client.go new file mode 100644 index 00000000000..e70f124581c --- /dev/null +++ b/goldmane/pkg/emitter/http_client.go @@ -0,0 +1,97 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package emitter + +import ( + "crypto/tls" + "crypto/x509" + "fmt" + "io" + "net" + "net/http" + "os" + "time" + + "github.com/sirupsen/logrus" +) + +const ContentTypeMultilineJSON = "application/x-ndjson" + +func newHTTPClient(caCert, clientKey, clientCert, serverName string) (*http.Client, error) { + // Create a new HTTP client. + tlsConfig := &tls.Config{ServerName: serverName} + if caCert != "" { + caCertPool := x509.NewCertPool() + caCert, err := os.ReadFile(caCert) + if err != nil { + return nil, fmt.Errorf("error reading CA file: %s", err) + } + ok := caCertPool.AppendCertsFromPEM(caCert) + if !ok { + return nil, fmt.Errorf("failed to parse root certificate") + } + tlsConfig.RootCAs = caCertPool + } + + // Create a custom dialer so that we can configure a dial timeout. + // If we can't connect to the server within 10 seconds, something is up. + // Note: this is not the same as the request timeout, which is handled via the + // provided context on a per-request basis. + dialWithTimeout := func(network, addr string) (net.Conn, error) { + return net.DialTimeout(network, addr, 10*time.Second) + } + httpTransport := &http.Transport{ + Dial: dialWithTimeout, + TLSClientConfig: tlsConfig, + } + + if clientKey != "" && clientCert != "" { + clientCert, err := tls.LoadX509KeyPair(clientCert, clientKey) + if err != nil { + return nil, fmt.Errorf("error load cert key pair for emitter client: %s", err) + } + httpTransport.TLSClientConfig.Certificates = []tls.Certificate{clientCert} + logrus.Info("Using provided client certificates for mTLS") + } + return &http.Client{ + Transport: httpTransport, + }, nil +} + +func newEmitterClient(url, caCert, clientKey, clientCert, serverName string) (*emitterClient, error) { + client, err := newHTTPClient(caCert, clientKey, clientCert, serverName) + if err != nil { + return nil, err + } + return &emitterClient{url: url, client: client}, nil +} + +type emitterClient struct { + url string + client *http.Client +} + +func (e *emitterClient) Post(body io.Reader) error { + resp, err := e.client.Post(e.url, ContentTypeMultilineJSON, body) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected status code: %s", resp.Status) + } + logrus.WithField("body", resp.Body).Debug("Successfully posted flows") + return nil +} diff --git a/goldmane/pkg/emitter/options.go b/goldmane/pkg/emitter/options.go new file mode 100644 index 00000000000..bf84cf564f1 --- /dev/null +++ b/goldmane/pkg/emitter/options.go @@ -0,0 +1,55 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package emitter + +import "sigs.k8s.io/controller-runtime/pkg/client" + +type Option func(*Emitter) + +func WithURL(url string) Option { + return func(e *Emitter) { + e.url = url + } +} + +func WithCACertPath(path string) Option { + return func(e *Emitter) { + e.caCert = path + } +} + +func WithClientKeyPath(path string) Option { + return func(e *Emitter) { + e.clientKey = path + } +} + +func WithClientCertPath(path string) Option { + return func(e *Emitter) { + e.clientCert = path + } +} + +func WithKubeClient(kcli client.Client) Option { + return func(e *Emitter) { + e.kcli = kcli + } +} + +func WithServerName(name string) Option { + return func(e *Emitter) { + e.serverName = name + } +} diff --git a/goldmane/pkg/flowgen/testserver.go b/goldmane/pkg/flowgen/testserver.go new file mode 100644 index 00000000000..eb68fb048b8 --- /dev/null +++ b/goldmane/pkg/flowgen/testserver.go @@ -0,0 +1,155 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flowgen + +import ( + "context" + "os" + "sync" + "time" + + "github.com/sirupsen/logrus" + "golang.org/x/exp/rand" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + "github.com/projectcalico/calico/goldmane/pkg/client" + "github.com/projectcalico/calico/goldmane/proto" +) + +func Start() { + logrus.Info("Starting flow generator") + defer func() { + logrus.Info("Stopping flow generator") + }() + + // Create a flow client. + server := "127.0.0.1" + if s := os.Getenv("SERVER"); s != "" { + server = s + } + logrus.WithField("server", server).Info("Connecting to server") + flowClient := client.NewFlowClient(server) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Create a gRPC client conn. + cc, err := grpc.NewClient(server, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + logrus.WithError(err).Fatal("Failed to dial server") + } + go flowClient.Run(ctx, cc) + + // Create a new test gen. + gen := &flowGenerator{ + flogsByIndex: make(map[int]*proto.Flow), + outChan: make(chan *proto.Flow, 10000), + } + + // Start a goroutine to generate flows. + go gen.generateFlogs() + + // Send new logs as they are generated. + for flog := range gen.outChan { + flowClient.Push(flog) + } +} + +// flowGenerator implements a basic FlowLogAPI implementation for testing and developing purposes. +type flowGenerator struct { + sync.Mutex + flogsByIndex map[int]*proto.Flow + outChan chan *proto.Flow +} + +func (t *flowGenerator) generateFlogs() { + srcNames := map[int]string{ + 0: "client-aggr-1", + 1: "client-aggr-2", + 2: "client-aggr-3", + 3: "client-aggr-4", + } + dstNames := map[int]string{ + 0: "server-aggr-1", + 1: "server-aggr-2", + 2: "server-aggr-3", + 3: "server-aggr-4", + } + actions := map[int]string{ + 0: "allow", + 1: "deny", + } + reporters := map[int]string{ + 0: "src", + 1: "dst", + } + services := map[int]string{ + 0: "frontend-service", + 1: "backend-service", + 2: "db-service", + } + + // Periodically add flows to the server for testing, incrementing the index each time. + index := 0 + for { + // Use a 15 second aggregation interval for each flow. + startTime := time.Now() + endTime := time.Now().Add(15 * time.Second) + + wait := time.After(15 * time.Second) + + // Generate Several flows during this interval. + num := rand.Intn(30) + for i := 0; i < num; i++ { + t.Lock() + // Use some randomness to simulate different flows. + t.outChan <- &proto.Flow{ + Key: &proto.FlowKey{ + Proto: "TCP", + SourceName: randomFrommap(srcNames), + SourceNamespace: "default", + SourceType: "wep", + DestName: randomFrommap(dstNames), + DestNamespace: "default", + DestType: "wep", + DestServiceName: randomFrommap(services), + DestServicePort: 443, + DestServicePortName: "https", + DestServiceNamespace: "default", + Reporter: randomFrommap(reporters), + Action: randomFrommap(actions), + }, + StartTime: int64(startTime.Unix()), + EndTime: int64(endTime.Unix()), + BytesIn: int64(rand.Intn(1000)), + BytesOut: int64(rand.Intn(1000)), + PacketsIn: int64(rand.Intn(100)), + PacketsOut: int64(rand.Intn(100)), + } + index++ + t.Unlock() + wait := 13 * time.Second / time.Duration(num) + time.Sleep(wait) + } + + <-wait + + } +} + +func randomFrommap(m map[int]string) string { + // Generate a random number within the size of the map. + return m[rand.Intn(len(m))] +} diff --git a/goldmane/pkg/internal/flowcache/timed_cache.go b/goldmane/pkg/internal/flowcache/timed_cache.go new file mode 100644 index 00000000000..734ab36784e --- /dev/null +++ b/goldmane/pkg/internal/flowcache/timed_cache.go @@ -0,0 +1,110 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flowcache + +import ( + "sync" + "time" + + "github.com/projectcalico/calico/goldmane/pkg/internal/types" + "github.com/projectcalico/calico/goldmane/proto" +) + +// cacheKey wraps the canonical FlowKey type with a start and end time, as well as a scope (typically +// set to be the the originating node) since this cache stores flows across multiple +// sources and aggregation intervals. +type cacheKey struct { + fk types.FlowKey + startTime int64 + endTime int64 + scope string +} + +type expiringCacheEntry struct { + Flow *proto.Flow + ExpireAt time.Time +} + +// ExpiringFlowCache implements a cache of flow entries that expire after a configurable duration. +type ExpiringFlowCache struct { + sync.Mutex + flows map[cacheKey]*expiringCacheEntry + duration time.Duration +} + +func NewExpiringFlowCache(d time.Duration) *ExpiringFlowCache { + return &ExpiringFlowCache{ + flows: make(map[cacheKey]*expiringCacheEntry), + duration: d, + } +} + +func (c *ExpiringFlowCache) Add(f *proto.Flow, scope string) { + key := cacheKey{ + startTime: f.StartTime, + endTime: f.EndTime, + fk: *types.ProtoToFlowKey(f.Key), + scope: scope, + } + c.Lock() + defer c.Unlock() + c.flows[key] = &expiringCacheEntry{ + Flow: f, + ExpireAt: time.Now().Add(c.duration), + } +} + +func (c *ExpiringFlowCache) Has(f *proto.Flow, scope string) bool { + key := cacheKey{ + startTime: f.StartTime, + endTime: f.EndTime, + fk: *types.ProtoToFlowKey(f.Key), + scope: scope, + } + c.Lock() + defer c.Unlock() + _, ok := c.flows[key] + return ok +} + +func (c *ExpiringFlowCache) Iter(f func(f *proto.Flow) error) error { + c.Lock() + defer c.Unlock() + for _, v := range c.flows { + if err := f(v.Flow); err != nil { + return err + } + } + return nil +} + +func (c *ExpiringFlowCache) Run(interval time.Duration) { + for { + <-time.After(interval) + c.DeleteExpired() + } +} + +func (c *ExpiringFlowCache) DeleteExpired() { + c.Lock() + defer c.Unlock() + + now := time.Now() + for k, v := range c.flows { + if v.ExpireAt.Before(now) { + delete(c.flows, k) + } + } +} diff --git a/goldmane/pkg/internal/types/flow.go b/goldmane/pkg/internal/types/flow.go new file mode 100644 index 00000000000..f650cd533fa --- /dev/null +++ b/goldmane/pkg/internal/types/flow.go @@ -0,0 +1,210 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import "github.com/projectcalico/calico/goldmane/proto" + +// FlowKey is a unique key for a flow. It matches the protobuf API exactly. Unfortunately, +// we cannot use the protobuf API structures as map keys due to private fields that are inserted +// by the protobuf Go code generation. So we need a copy of the struct here. +// +// This struct should be an exact copy of the proto.FlowKey structure, but without the private fields. +type FlowKey struct { + // SourceName is the name of the source for this Flow. It represents one or more + // source pods that share a GenerateName. + SourceName string `protobuf:"bytes,3,opt,name=source_name,json=sourceName,proto3" json:"source_name,omitempty"` + // SourceNamespace is the namespace of the source pods for this flow. + SourceNamespace string `protobuf:"bytes,4,opt,name=source_namespace,json=sourceNamespace,proto3" json:"source_namespace,omitempty"` + // SourceType is the type of the source, used to contextualize the source + // name and namespace fields. + // + // This can be one of: + // + // - wep: WorkloadEndpoint (i.e., Pod) + // - hep: HostEndpoint + // - ns: NetworkSet + // - pub/pvt: External network (source name omitted) + SourceType string `protobuf:"bytes,5,opt,name=source_type,json=sourceType,proto3" json:"source_type,omitempty"` + // DestName is the name of the destination for this Flow. It represents one or more + // destination pods that share a GenerateName. + DestName string `protobuf:"bytes,7,opt,name=dest_name,json=destName,proto3" json:"dest_name,omitempty"` + // DestNamespace is the namespace of the destination pods for this flow. + DestNamespace string `protobuf:"bytes,8,opt,name=dest_namespace,json=destNamespace,proto3" json:"dest_namespace,omitempty"` + // DestType is the type of the destination, used to contextualize the dest + // name and namespace fields. + // + // This can be one of: + // + // - wep: WorkloadEndpoint (i.e., Pod) + // - hep: HostEndpoint + // - ns: NetworkSet + // - pub/pvt: External network (dest name omitted) + DestType string `protobuf:"bytes,9,opt,name=dest_type,json=destType,proto3" json:"dest_type,omitempty"` + // DestPort is the destination port on the specified protocol accessed by this flow. + DestPort int64 `protobuf:"varint,10,opt,name=dest_port,json=destPort,proto3" json:"dest_port,omitempty"` + // DestServiceName is the name of the destination service, if any. + DestServiceName string `protobuf:"bytes,11,opt,name=dest_service_name,json=destServiceName,proto3" json:"dest_service_name,omitempty"` + // DestServiceNamespace is the namespace of the destination service, if any. + DestServiceNamespace string `protobuf:"bytes,12,opt,name=dest_service_namespace,json=destServiceNamespace,proto3" json:"dest_service_namespace,omitempty"` + // DestServicePortName is the name of the port on the destination service, if any. + DestServicePortName string `protobuf:"bytes,13,opt,name=dest_service_port_name,json=destServicePortName,proto3" json:"dest_service_port_name,omitempty"` + // DestServicePort is the port number on the destination service. + DestServicePort int64 `protobuf:"varint,14,opt,name=dest_service_port,json=destServicePort,proto3" json:"dest_service_port,omitempty"` + // Proto is the L4 protocol for this flow. Either TCP or UDP. + Proto string `protobuf:"bytes,15,opt,name=proto,proto3" json:"proto,omitempty"` + // Reporter is either "src" or "dst", depending on whether this flow was generated + // at the initating or terminating end of the connection attempt. + Reporter string `protobuf:"bytes,16,opt,name=reporter,proto3" json:"reporter,omitempty"` + // Action is the ultimate action taken on the flow. Either Allow or Drop. + Action string `protobuf:"bytes,17,opt,name=action,proto3" json:"action,omitempty"` + // Policies includes an entry for each policy rule that took an action on the connections + // aggregated into this flow. + Policies *FlowLogPolicy `protobuf:"bytes,14,opt,name=policies,proto3" json:"policies,omitempty"` +} + +// This struct should be an exact copy of the proto.Flow structure, but without the private fields. +type Flow struct { + // Key includes the identifying fields for this flow. + Key *FlowKey `protobuf:"bytes,1,opt,name=Key,proto3" json:"Key,omitempty"` + // StartTime is the start time for this flow. It is represented as the number of + // seconds since the UNIX epoch. + StartTime int64 `protobuf:"varint,2,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` + // EndTime is the end time for this flow. It is always exactly one aggregation + // interval after the start time. + EndTime int64 `protobuf:"varint,3,opt,name=end_time,json=endTime,proto3" json:"end_time,omitempty"` + // SourceLabels contains the intersection of labels that appear on all source + // pods that contributed to this flow. + SourceLabels []string `protobuf:"bytes,4,rep,name=source_labels,json=sourceLabels,proto3" json:"source_labels,omitempty"` + // SourceLabels contains the intersection of labels that appear on all destination + // pods that contributed to this flow. + DestLabels []string `protobuf:"bytes,5,rep,name=dest_labels,json=destLabels,proto3" json:"dest_labels,omitempty"` + // Statistics. + PacketsIn int64 `protobuf:"varint,6,opt,name=packets_in,json=packetsIn,proto3" json:"packets_in,omitempty"` + PacketsOut int64 `protobuf:"varint,7,opt,name=packets_out,json=packetsOut,proto3" json:"packets_out,omitempty"` + BytesIn int64 `protobuf:"varint,8,opt,name=bytes_in,json=bytesIn,proto3" json:"bytes_in,omitempty"` + BytesOut int64 `protobuf:"varint,9,opt,name=bytes_out,json=bytesOut,proto3" json:"bytes_out,omitempty"` + // NumConnectionsStarted tracks the total number of new connections recorded for this Flow. It counts each + // connection attempt that matches the FlowKey that was made between this Flow's StartTime and EndTime. + NumConnectionsStarted int64 `protobuf:"varint,11,opt,name=num_connections_started,json=numConnectionsStarted,proto3" json:"num_connections_started,omitempty"` + // NumConnectionsCompleted tracks the total number of completed connections recorded for this Flow. It counts each + // connection that matches the FlowKey that was completed between this Flow's StartTime and EndTime. + NumConnectionsCompleted int64 `protobuf:"varint,12,opt,name=num_connections_completed,json=numConnectionsCompleted,proto3" json:"num_connections_completed,omitempty"` + // NumConnectionsLive tracks the total number of still active connections recorded for this Flow. It counts each + // connection that matches the FlowKey that was active at this Flow's EndTime. + NumConnectionsLive int64 `protobuf:"varint,13,opt,name=num_connections_live,json=numConnectionsLive,proto3" json:"num_connections_live,omitempty"` +} + +type FlowLogPolicy struct { + // AllPolicies is a list of strings containing policy rule information. + AllPolicies []string `protobuf:"bytes,1,rep,name=all_policies,json=allPolicies,proto3" json:"all_policies,omitempty"` +} + +func ProtoToFlow(p *proto.Flow) *Flow { + return &Flow{ + Key: ProtoToFlowKey(p.Key), + StartTime: p.StartTime, + EndTime: p.EndTime, + SourceLabels: p.SourceLabels, + DestLabels: p.DestLabels, + PacketsIn: p.PacketsIn, + PacketsOut: p.PacketsOut, + BytesIn: p.BytesIn, + BytesOut: p.BytesOut, + NumConnectionsStarted: p.NumConnectionsStarted, + NumConnectionsCompleted: p.NumConnectionsCompleted, + NumConnectionsLive: p.NumConnectionsLive, + } +} + +func ProtoToFlowKey(p *proto.FlowKey) *FlowKey { + if p == nil { + return nil + } + return &FlowKey{ + SourceName: p.SourceName, + SourceNamespace: p.SourceNamespace, + SourceType: p.SourceType, + DestName: p.DestName, + DestNamespace: p.DestNamespace, + DestType: p.DestType, + DestPort: p.DestPort, + DestServiceName: p.DestServiceName, + DestServiceNamespace: p.DestServiceNamespace, + DestServicePortName: p.DestServicePortName, + DestServicePort: p.DestServicePort, + Proto: p.Proto, + Reporter: p.Reporter, + Action: p.Action, + Policies: ProtoToFlowLogPolicy(p.Policies), + } +} + +func ProtoToFlowLogPolicy(p *proto.FlowLogPolicy) *FlowLogPolicy { + if p == nil { + return nil + } + return &FlowLogPolicy{ + AllPolicies: p.AllPolicies, + } +} + +func FlowToProto(f *Flow) *proto.Flow { + return &proto.Flow{ + Key: FlowKeyToProto(f.Key), + StartTime: f.StartTime, + EndTime: f.EndTime, + SourceLabels: f.SourceLabels, + DestLabels: f.DestLabels, + PacketsIn: f.PacketsIn, + PacketsOut: f.PacketsOut, + BytesIn: f.BytesIn, + BytesOut: f.BytesOut, + NumConnectionsStarted: f.NumConnectionsStarted, + NumConnectionsCompleted: f.NumConnectionsCompleted, + NumConnectionsLive: f.NumConnectionsLive, + } +} + +func FlowKeyToProto(f *FlowKey) *proto.FlowKey { + if f == nil { + return nil + } + return &proto.FlowKey{ + SourceName: f.SourceName, + SourceNamespace: f.SourceNamespace, + SourceType: f.SourceType, + DestName: f.DestName, + DestNamespace: f.DestNamespace, + DestType: f.DestType, + DestPort: f.DestPort, + DestServiceName: f.DestServiceName, + DestServiceNamespace: f.DestServiceNamespace, + DestServicePortName: f.DestServicePortName, + DestServicePort: f.DestServicePort, + Proto: f.Proto, + Reporter: f.Reporter, + Action: f.Action, + Policies: FlowLogPolicyToProto(f.Policies), + } +} + +func FlowLogPolicyToProto(f *FlowLogPolicy) *proto.FlowLogPolicy { + if f == nil { + return nil + } + return &proto.FlowLogPolicy{ + AllPolicies: f.AllPolicies, + } +} diff --git a/goldmane/pkg/internal/types/flow_test.go b/goldmane/pkg/internal/types/flow_test.go new file mode 100644 index 00000000000..f16ada8f37f --- /dev/null +++ b/goldmane/pkg/internal/types/flow_test.go @@ -0,0 +1,123 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types_test + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/require" + googleproto "google.golang.org/protobuf/proto" + + "github.com/projectcalico/calico/goldmane/pkg/internal/types" + "github.com/projectcalico/calico/goldmane/proto" +) + +type fromProtoTest struct { + name string + proto proto.Flow +} + +func TestTranslation(t *testing.T) { + // Assert that bidirection translation works. + tests := []*fromProtoTest{ + { + name: "empty proto.Flow", + proto: proto.Flow{}, + }, + { + name: "proto.Flow with all fields set", + proto: proto.Flow{ + Key: &proto.FlowKey{ + SourceName: "source-name", + SourceNamespace: "source-namespace", + SourceType: "source-type", + DestName: "dest-name", + DestNamespace: "dest-namespace", + DestType: "dest-type", + DestPort: 1234, + DestServiceName: "dest-service-name", + DestServiceNamespace: "dest-service-namespace", + DestServicePortName: "dest-service-port-name", + DestServicePort: 5678, + Proto: "proto", + Reporter: "reporter", + Action: "action", + Policies: &proto.FlowLogPolicy{ + AllPolicies: []string{"policy-1", "policy-2"}, + }, + }, + StartTime: 1234567890, + EndTime: 1234567891, + SourceLabels: []string{"source-label-1", "source-label-2"}, + DestLabels: []string{"dest-label-1", "dest-label-2"}, + PacketsIn: 123, + PacketsOut: 456, + BytesIn: 789, + BytesOut: 101112, + NumConnectionsStarted: 131415, + NumConnectionsCompleted: 161718, + NumConnectionsLive: 192021, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + f := types.ProtoToFlow(&test.proto) + p := types.FlowToProto(f) + if !googleproto.Equal(&test.proto, p) { + t.Fatalf("translated proto.Flow does not match the original proto.Flow: %v != %v", p, test.proto.String()) + } + }) + } +} + +// TestIdentical verifies that the exported fields on types.Flow and proto.Flow are identical. This ensures +// we don't accidentally add new fields to one type and forget to add them to the other. +func TestIdentical(t *testing.T) { + p := reflect.ValueOf(proto.Flow{}) + f := reflect.ValueOf(types.Flow{}) + + // Check each field in types.Flow is present in proto.Flow. + for _, fField := range reflect.VisibleFields(f.Type()) { + found := false + if !fField.IsExported() { + continue + } + for _, pField := range reflect.VisibleFields(p.Type()) { + if fField.Name == pField.Name { + found = true + break + } + } + require.True(t, found, "field %s not found in proto.Flow", fField.Name) + } + + // Check each field in proto.Flow is present in types.Flow. + for _, pField := range reflect.VisibleFields(p.Type()) { + found := false + if !pField.IsExported() { + continue + } + for _, fField := range reflect.VisibleFields(f.Type()) { + if pField.Name == fField.Name { + found = true + break + } + } + require.True(t, found, "field %s not found in types.Flow", pField.Name) + } +} diff --git a/goldmane/pkg/internal/utils/logging.go b/goldmane/pkg/internal/utils/logging.go new file mode 100644 index 00000000000..9f7d4edb1a1 --- /dev/null +++ b/goldmane/pkg/internal/utils/logging.go @@ -0,0 +1,41 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package utils + +import ( + "os" + + "github.com/sirupsen/logrus" + + "github.com/projectcalico/calico/libcalico-go/lib/logutils" +) + +// ConfigureLogging configures the logging framework. The logging level that will +// be used is passed in as a parameter. Otherwise, it will default to WARN +// The output will be set to STDOUT and the format is TextFormat +func ConfigureLogging(logLevel string) { + // Install a hook that adds file/line number information. + logutils.ConfigureFormatter("goldmane") + logrus.SetOutput(os.Stdout) + + // Override with desired log level + level, err := logrus.ParseLevel(logLevel) + if err != nil { + logrus.Error("Invalid logging level passed in. Will use default level set to WARN") + // Setting default to WARN + level = logrus.WarnLevel + } + + logrus.SetLevel(level) +} diff --git a/goldmane/pkg/server/server.go b/goldmane/pkg/server/server.go new file mode 100644 index 00000000000..ab14226bb45 --- /dev/null +++ b/goldmane/pkg/server/server.go @@ -0,0 +1,54 @@ +// Copyright (c) 2025 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "github.com/sirupsen/logrus" + "google.golang.org/grpc" + + "github.com/projectcalico/calico/goldmane/pkg/aggregator" + "github.com/projectcalico/calico/goldmane/proto" +) + +func NewServer(aggr *aggregator.LogAggregator) *FlowServer { + return &FlowServer{ + aggr: aggr, + } +} + +type FlowServer struct { + proto.UnimplementedFlowAPIServer + + aggr *aggregator.LogAggregator +} + +func (s *FlowServer) RegisterWith(srv *grpc.Server) { + // Register the server with the gRPC server. + proto.RegisterFlowAPIServer(srv, s) + logrus.Info("Registered FlowAPI Server") +} + +func (s *FlowServer) List(req *proto.FlowRequest, server grpc.ServerStreamingServer[proto.Flow]) error { + // Get flows. + flows := s.aggr.GetFlows(req) + + // Send flows. + for _, flow := range flows { + if err := server.Send(flow); err != nil { + return err + } + } + return nil +} diff --git a/goldmane/proto/api.pb.go b/goldmane/proto/api.pb.go new file mode 100644 index 00000000000..2dfed6b0827 --- /dev/null +++ b/goldmane/proto/api.pb.go @@ -0,0 +1,731 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.1 +// protoc v3.5.0 +// source: api.proto + +package proto + +import ( + reflect "reflect" + sync "sync" + + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// FlowReceipt is a response from the server to a client after publishing a Flow. +type FlowReceipt struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *FlowReceipt) Reset() { + *x = FlowReceipt{} + mi := &file_api_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FlowReceipt) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FlowReceipt) ProtoMessage() {} + +func (x *FlowReceipt) ProtoReflect() protoreflect.Message { + mi := &file_api_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FlowReceipt.ProtoReflect.Descriptor instead. +func (*FlowReceipt) Descriptor() ([]byte, []int) { + return file_api_proto_rawDescGZIP(), []int{0} +} + +// FlowRequest defines a message to request a particular selection of aggregated Flow objects. +type FlowRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + // StartTimeGt specifies the beginning of a time window with which to filter Flows. Flows + // will be returned only if their start time occurs after the requested time. + StartTimeGt int64 `protobuf:"varint,1,opt,name=start_time_gt,json=startTimeGt,proto3" json:"start_time_gt,omitempty"` + // StartTimeLt specifies the end of a time window with which to filter flows. Flows will + // be returned only if their start time occurs before the requested time. + StartTimeLt int64 `protobuf:"varint,2,opt,name=start_time_lt,json=startTimeLt,proto3" json:"start_time_lt,omitempty"` + // PageNumber specifies the page number to return. It requires that PageSize is also specified in order + // to determine page boundaries. Note that pages may change over time as new flow data is collected or expired. + // Querying the same page at different points in time may return different results. + PageNumber int64 `protobuf:"varint,3,opt,name=page_number,json=pageNumber,proto3" json:"page_number,omitempty"` + // PageSize configures the maximum number of results to return as part of this query. + PageSize int64 `protobuf:"varint,4,opt,name=page_size,json=pageSize,proto3" json:"page_size,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *FlowRequest) Reset() { + *x = FlowRequest{} + mi := &file_api_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FlowRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FlowRequest) ProtoMessage() {} + +func (x *FlowRequest) ProtoReflect() protoreflect.Message { + mi := &file_api_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FlowRequest.ProtoReflect.Descriptor instead. +func (*FlowRequest) Descriptor() ([]byte, []int) { + return file_api_proto_rawDescGZIP(), []int{1} +} + +func (x *FlowRequest) GetStartTimeGt() int64 { + if x != nil { + return x.StartTimeGt + } + return 0 +} + +func (x *FlowRequest) GetStartTimeLt() int64 { + if x != nil { + return x.StartTimeLt + } + return 0 +} + +func (x *FlowRequest) GetPageNumber() int64 { + if x != nil { + return x.PageNumber + } + return 0 +} + +func (x *FlowRequest) GetPageSize() int64 { + if x != nil { + return x.PageSize + } + return 0 +} + +// FlowUpdate wraps a Flow with additional metadata. +type FlowUpdate struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Flow contains the actual flow being sent. + Flow *Flow `protobuf:"bytes,1,opt,name=flow,proto3" json:"flow,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *FlowUpdate) Reset() { + *x = FlowUpdate{} + mi := &file_api_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FlowUpdate) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FlowUpdate) ProtoMessage() {} + +func (x *FlowUpdate) ProtoReflect() protoreflect.Message { + mi := &file_api_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FlowUpdate.ProtoReflect.Descriptor instead. +func (*FlowUpdate) Descriptor() ([]byte, []int) { + return file_api_proto_rawDescGZIP(), []int{2} +} + +func (x *FlowUpdate) GetFlow() *Flow { + if x != nil { + return x.Flow + } + return nil +} + +// FlowKey includes the identifying fields for a Flow. +// - Source: Name, namespace, type, and labels. +// - Destination: Name, namespace, type, labels and port +// - Action taken on the connection. +// - Reporter (i.e., measured at source or destination). +// - Protocol of the connection (TCP, UDP, etc.). +type FlowKey struct { + state protoimpl.MessageState `protogen:"open.v1"` + // SourceName is the name of the source for this Flow. It represents one or more + // source pods that share a GenerateName. + SourceName string `protobuf:"bytes,1,opt,name=source_name,json=sourceName,proto3" json:"source_name,omitempty"` + // SourceNamespace is the namespace of the source pods for this flow. + SourceNamespace string `protobuf:"bytes,2,opt,name=source_namespace,json=sourceNamespace,proto3" json:"source_namespace,omitempty"` + // SourceType is the type of the source, used to contextualize the source + // name and namespace fields. + // + // This can be one of: + // + // - wep: WorkloadEndpoint (i.e., Pod) + // - hep: HostEndpoint + // - ns: NetworkSet + // - pub/pvt: External network (source name omitted) + SourceType string `protobuf:"bytes,3,opt,name=source_type,json=sourceType,proto3" json:"source_type,omitempty"` + // DestName is the name of the destination for this Flow. It represents one or more + // destination pods that share a GenerateName. + DestName string `protobuf:"bytes,4,opt,name=dest_name,json=destName,proto3" json:"dest_name,omitempty"` + // DestNamespace is the namespace of the destination pods for this flow. + DestNamespace string `protobuf:"bytes,5,opt,name=dest_namespace,json=destNamespace,proto3" json:"dest_namespace,omitempty"` + // DestType is the type of the destination, used to contextualize the dest + // name and namespace fields. + // + // This can be one of: + // + // - wep: WorkloadEndpoint (i.e., Pod) + // - hep: HostEndpoint + // - ns: NetworkSet + // - pub/pvt: External network (dest name omitted) + DestType string `protobuf:"bytes,6,opt,name=dest_type,json=destType,proto3" json:"dest_type,omitempty"` + // DestPort is the destination port on the specified protocol accessed by this flow. + DestPort int64 `protobuf:"varint,7,opt,name=dest_port,json=destPort,proto3" json:"dest_port,omitempty"` + // DestServiceName is the name of the destination service, if any. + DestServiceName string `protobuf:"bytes,8,opt,name=dest_service_name,json=destServiceName,proto3" json:"dest_service_name,omitempty"` + // DestServiceNamespace is the namespace of the destination service, if any. + DestServiceNamespace string `protobuf:"bytes,9,opt,name=dest_service_namespace,json=destServiceNamespace,proto3" json:"dest_service_namespace,omitempty"` + // DestServicePortName is the name of the port on the destination service, if any. + DestServicePortName string `protobuf:"bytes,10,opt,name=dest_service_port_name,json=destServicePortName,proto3" json:"dest_service_port_name,omitempty"` + // DestServicePort is the port number on the destination service. + DestServicePort int64 `protobuf:"varint,11,opt,name=dest_service_port,json=destServicePort,proto3" json:"dest_service_port,omitempty"` + // Proto is the L4 protocol for this flow. Either TCP or UDP. + Proto string `protobuf:"bytes,12,opt,name=proto,proto3" json:"proto,omitempty"` + // Reporter is either "src" or "dst", depending on whether this flow was generated + // at the initating or terminating end of the connection attempt. + Reporter string `protobuf:"bytes,13,opt,name=reporter,proto3" json:"reporter,omitempty"` + // Action is the ultimate action taken on the flow. Either Allow or Drop. + Action string `protobuf:"bytes,14,opt,name=action,proto3" json:"action,omitempty"` + // Policies includes an entry for each policy rule that took an action on the connections + // aggregated into this flow. + Policies *FlowLogPolicy `protobuf:"bytes,15,opt,name=policies,proto3" json:"policies,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *FlowKey) Reset() { + *x = FlowKey{} + mi := &file_api_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FlowKey) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FlowKey) ProtoMessage() {} + +func (x *FlowKey) ProtoReflect() protoreflect.Message { + mi := &file_api_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FlowKey.ProtoReflect.Descriptor instead. +func (*FlowKey) Descriptor() ([]byte, []int) { + return file_api_proto_rawDescGZIP(), []int{3} +} + +func (x *FlowKey) GetSourceName() string { + if x != nil { + return x.SourceName + } + return "" +} + +func (x *FlowKey) GetSourceNamespace() string { + if x != nil { + return x.SourceNamespace + } + return "" +} + +func (x *FlowKey) GetSourceType() string { + if x != nil { + return x.SourceType + } + return "" +} + +func (x *FlowKey) GetDestName() string { + if x != nil { + return x.DestName + } + return "" +} + +func (x *FlowKey) GetDestNamespace() string { + if x != nil { + return x.DestNamespace + } + return "" +} + +func (x *FlowKey) GetDestType() string { + if x != nil { + return x.DestType + } + return "" +} + +func (x *FlowKey) GetDestPort() int64 { + if x != nil { + return x.DestPort + } + return 0 +} + +func (x *FlowKey) GetDestServiceName() string { + if x != nil { + return x.DestServiceName + } + return "" +} + +func (x *FlowKey) GetDestServiceNamespace() string { + if x != nil { + return x.DestServiceNamespace + } + return "" +} + +func (x *FlowKey) GetDestServicePortName() string { + if x != nil { + return x.DestServicePortName + } + return "" +} + +func (x *FlowKey) GetDestServicePort() int64 { + if x != nil { + return x.DestServicePort + } + return 0 +} + +func (x *FlowKey) GetProto() string { + if x != nil { + return x.Proto + } + return "" +} + +func (x *FlowKey) GetReporter() string { + if x != nil { + return x.Reporter + } + return "" +} + +func (x *FlowKey) GetAction() string { + if x != nil { + return x.Action + } + return "" +} + +func (x *FlowKey) GetPolicies() *FlowLogPolicy { + if x != nil { + return x.Policies + } + return nil +} + +// Flow is a message representing statistics gathered about connections that share common fields, +// aggregated across either time, nodes, or both. +type Flow struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Key includes the identifying fields for this flow. + Key *FlowKey `protobuf:"bytes,1,opt,name=Key,proto3" json:"Key,omitempty"` + // StartTime is the start time for this flow. It is represented as the number of + // seconds since the UNIX epoch. + StartTime int64 `protobuf:"varint,2,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` + // EndTime is the end time for this flow. It is always exactly one aggregation + // interval after the start time. + EndTime int64 `protobuf:"varint,3,opt,name=end_time,json=endTime,proto3" json:"end_time,omitempty"` + // SourceLabels contains the intersection of labels that appear on all source + // pods that contributed to this flow. + SourceLabels []string `protobuf:"bytes,4,rep,name=source_labels,json=sourceLabels,proto3" json:"source_labels,omitempty"` + // SourceLabels contains the intersection of labels that appear on all destination + // pods that contributed to this flow. + DestLabels []string `protobuf:"bytes,5,rep,name=dest_labels,json=destLabels,proto3" json:"dest_labels,omitempty"` + // Statistics. + PacketsIn int64 `protobuf:"varint,6,opt,name=packets_in,json=packetsIn,proto3" json:"packets_in,omitempty"` + PacketsOut int64 `protobuf:"varint,7,opt,name=packets_out,json=packetsOut,proto3" json:"packets_out,omitempty"` + BytesIn int64 `protobuf:"varint,8,opt,name=bytes_in,json=bytesIn,proto3" json:"bytes_in,omitempty"` + BytesOut int64 `protobuf:"varint,9,opt,name=bytes_out,json=bytesOut,proto3" json:"bytes_out,omitempty"` + // NumConnectionsStarted tracks the total number of new connections recorded for this Flow. It counts each + // connection attempt that matches the FlowKey that was made between this Flow's StartTime and EndTime. + NumConnectionsStarted int64 `protobuf:"varint,10,opt,name=num_connections_started,json=numConnectionsStarted,proto3" json:"num_connections_started,omitempty"` + // NumConnectionsCompleted tracks the total number of completed TCP connections recorded for this Flow. It counts each + // connection that matches the FlowKey that was completed between this Flow's StartTime and EndTime. + NumConnectionsCompleted int64 `protobuf:"varint,11,opt,name=num_connections_completed,json=numConnectionsCompleted,proto3" json:"num_connections_completed,omitempty"` + // NumConnectionsLive tracks the total number of still active connections recorded for this Flow. It counts each + // connection that matches the FlowKey that was active at this Flow's EndTime. + NumConnectionsLive int64 `protobuf:"varint,12,opt,name=num_connections_live,json=numConnectionsLive,proto3" json:"num_connections_live,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Flow) Reset() { + *x = Flow{} + mi := &file_api_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Flow) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Flow) ProtoMessage() {} + +func (x *Flow) ProtoReflect() protoreflect.Message { + mi := &file_api_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Flow.ProtoReflect.Descriptor instead. +func (*Flow) Descriptor() ([]byte, []int) { + return file_api_proto_rawDescGZIP(), []int{4} +} + +func (x *Flow) GetKey() *FlowKey { + if x != nil { + return x.Key + } + return nil +} + +func (x *Flow) GetStartTime() int64 { + if x != nil { + return x.StartTime + } + return 0 +} + +func (x *Flow) GetEndTime() int64 { + if x != nil { + return x.EndTime + } + return 0 +} + +func (x *Flow) GetSourceLabels() []string { + if x != nil { + return x.SourceLabels + } + return nil +} + +func (x *Flow) GetDestLabels() []string { + if x != nil { + return x.DestLabels + } + return nil +} + +func (x *Flow) GetPacketsIn() int64 { + if x != nil { + return x.PacketsIn + } + return 0 +} + +func (x *Flow) GetPacketsOut() int64 { + if x != nil { + return x.PacketsOut + } + return 0 +} + +func (x *Flow) GetBytesIn() int64 { + if x != nil { + return x.BytesIn + } + return 0 +} + +func (x *Flow) GetBytesOut() int64 { + if x != nil { + return x.BytesOut + } + return 0 +} + +func (x *Flow) GetNumConnectionsStarted() int64 { + if x != nil { + return x.NumConnectionsStarted + } + return 0 +} + +func (x *Flow) GetNumConnectionsCompleted() int64 { + if x != nil { + return x.NumConnectionsCompleted + } + return 0 +} + +func (x *Flow) GetNumConnectionsLive() int64 { + if x != nil { + return x.NumConnectionsLive + } + return 0 +} + +type FlowLogPolicy struct { + state protoimpl.MessageState `protogen:"open.v1"` + // AllPolicies is a list of strings containing policy rule information. + AllPolicies []string `protobuf:"bytes,1,rep,name=all_policies,json=allPolicies,proto3" json:"all_policies,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *FlowLogPolicy) Reset() { + *x = FlowLogPolicy{} + mi := &file_api_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FlowLogPolicy) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FlowLogPolicy) ProtoMessage() {} + +func (x *FlowLogPolicy) ProtoReflect() protoreflect.Message { + mi := &file_api_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FlowLogPolicy.ProtoReflect.Descriptor instead. +func (*FlowLogPolicy) Descriptor() ([]byte, []int) { + return file_api_proto_rawDescGZIP(), []int{5} +} + +func (x *FlowLogPolicy) GetAllPolicies() []string { + if x != nil { + return x.AllPolicies + } + return nil +} + +var File_api_proto protoreflect.FileDescriptor + +var file_api_proto_rawDesc = []byte{ + 0x0a, 0x09, 0x61, 0x70, 0x69, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x05, 0x66, 0x65, 0x6c, + 0x69, 0x78, 0x22, 0x0d, 0x0a, 0x0b, 0x46, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x63, 0x65, 0x69, 0x70, + 0x74, 0x22, 0x93, 0x01, 0x0a, 0x0b, 0x46, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, + 0x67, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x73, 0x74, 0x61, 0x72, 0x74, 0x54, + 0x69, 0x6d, 0x65, 0x47, 0x74, 0x12, 0x22, 0x0a, 0x0d, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x74, + 0x69, 0x6d, 0x65, 0x5f, 0x6c, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x73, 0x74, + 0x61, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x4c, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x70, 0x61, 0x67, + 0x65, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, + 0x70, 0x61, 0x67, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x1b, 0x0a, 0x09, 0x70, 0x61, + 0x67, 0x65, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x08, 0x70, + 0x61, 0x67, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x22, 0x2d, 0x0a, 0x0a, 0x46, 0x6c, 0x6f, 0x77, 0x55, + 0x70, 0x64, 0x61, 0x74, 0x65, 0x12, 0x1f, 0x0a, 0x04, 0x66, 0x6c, 0x6f, 0x77, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x0b, 0x2e, 0x66, 0x65, 0x6c, 0x69, 0x78, 0x2e, 0x46, 0x6c, 0x6f, 0x77, + 0x52, 0x04, 0x66, 0x6c, 0x6f, 0x77, 0x22, 0xb3, 0x04, 0x0a, 0x07, 0x46, 0x6c, 0x6f, 0x77, 0x4b, + 0x65, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x6e, 0x61, 0x6d, + 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x4e, + 0x61, 0x6d, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x6e, 0x61, + 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x73, + 0x6f, 0x75, 0x72, 0x63, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x1f, + 0x0a, 0x0b, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, + 0x1b, 0x0a, 0x09, 0x64, 0x65, 0x73, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x64, 0x65, 0x73, 0x74, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x25, 0x0a, 0x0e, + 0x64, 0x65, 0x73, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x64, 0x65, 0x73, 0x74, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x70, + 0x61, 0x63, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x64, 0x65, 0x73, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x64, 0x65, 0x73, 0x74, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x1b, 0x0a, 0x09, 0x64, 0x65, 0x73, 0x74, 0x5f, 0x70, 0x6f, 0x72, 0x74, 0x18, 0x07, 0x20, + 0x01, 0x28, 0x03, 0x52, 0x08, 0x64, 0x65, 0x73, 0x74, 0x50, 0x6f, 0x72, 0x74, 0x12, 0x2a, 0x0a, + 0x11, 0x64, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x5f, 0x6e, 0x61, + 0x6d, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x64, 0x65, 0x73, 0x74, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x34, 0x0a, 0x16, 0x64, 0x65, 0x73, + 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, + 0x61, 0x63, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x14, 0x64, 0x65, 0x73, 0x74, 0x53, + 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, + 0x33, 0x0a, 0x16, 0x64, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x5f, + 0x70, 0x6f, 0x72, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x13, 0x64, 0x65, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x50, 0x6f, 0x72, 0x74, + 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a, 0x0a, 0x11, 0x64, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x72, + 0x76, 0x69, 0x63, 0x65, 0x5f, 0x70, 0x6f, 0x72, 0x74, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x0f, 0x64, 0x65, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x50, 0x6f, 0x72, 0x74, + 0x12, 0x14, 0x0a, 0x05, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x05, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6f, 0x72, 0x74, + 0x65, 0x72, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x72, 0x65, 0x70, 0x6f, 0x72, 0x74, + 0x65, 0x72, 0x12, 0x16, 0x0a, 0x06, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x0e, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x06, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x30, 0x0a, 0x08, 0x70, 0x6f, + 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x66, + 0x65, 0x6c, 0x69, 0x78, 0x2e, 0x46, 0x6c, 0x6f, 0x77, 0x4c, 0x6f, 0x67, 0x50, 0x6f, 0x6c, 0x69, + 0x63, 0x79, 0x52, 0x08, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x22, 0xc6, 0x03, 0x0a, + 0x04, 0x46, 0x6c, 0x6f, 0x77, 0x12, 0x20, 0x0a, 0x03, 0x4b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x0e, 0x2e, 0x66, 0x65, 0x6c, 0x69, 0x78, 0x2e, 0x46, 0x6c, 0x6f, 0x77, 0x4b, + 0x65, 0x79, 0x52, 0x03, 0x4b, 0x65, 0x79, 0x12, 0x1d, 0x0a, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, + 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x73, 0x74, 0x61, + 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x19, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x5f, 0x74, 0x69, + 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x65, 0x6e, 0x64, 0x54, 0x69, 0x6d, + 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x6c, 0x61, 0x62, 0x65, + 0x6c, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, + 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x74, 0x5f, 0x6c, + 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0a, 0x64, 0x65, 0x73, + 0x74, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x61, 0x63, 0x6b, 0x65, + 0x74, 0x73, 0x5f, 0x69, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x70, 0x61, 0x63, + 0x6b, 0x65, 0x74, 0x73, 0x49, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x74, + 0x73, 0x5f, 0x6f, 0x75, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x70, 0x61, 0x63, + 0x6b, 0x65, 0x74, 0x73, 0x4f, 0x75, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x79, 0x74, 0x65, 0x73, + 0x5f, 0x69, 0x6e, 0x18, 0x08, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x62, 0x79, 0x74, 0x65, 0x73, + 0x49, 0x6e, 0x12, 0x1b, 0x0a, 0x09, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5f, 0x6f, 0x75, 0x74, 0x18, + 0x09, 0x20, 0x01, 0x28, 0x03, 0x52, 0x08, 0x62, 0x79, 0x74, 0x65, 0x73, 0x4f, 0x75, 0x74, 0x12, + 0x36, 0x0a, 0x17, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x15, 0x6e, 0x75, 0x6d, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x53, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, 0x12, 0x3a, 0x0a, 0x19, 0x6e, 0x75, 0x6d, 0x5f, 0x63, + 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, + 0x65, 0x74, 0x65, 0x64, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, 0x17, 0x6e, 0x75, 0x6d, 0x43, + 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, + 0x74, 0x65, 0x64, 0x12, 0x30, 0x0a, 0x14, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x6f, 0x6e, 0x6e, 0x65, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x5f, 0x6c, 0x69, 0x76, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x12, 0x6e, 0x75, 0x6d, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x4c, 0x69, 0x76, 0x65, 0x22, 0x32, 0x0a, 0x0d, 0x46, 0x6c, 0x6f, 0x77, 0x4c, 0x6f, 0x67, + 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x6c, 0x6c, 0x5f, 0x70, 0x6f, + 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x61, 0x6c, + 0x6c, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x69, 0x65, 0x73, 0x32, 0x34, 0x0a, 0x07, 0x46, 0x6c, 0x6f, + 0x77, 0x41, 0x50, 0x49, 0x12, 0x29, 0x0a, 0x04, 0x4c, 0x69, 0x73, 0x74, 0x12, 0x12, 0x2e, 0x66, + 0x65, 0x6c, 0x69, 0x78, 0x2e, 0x46, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x1a, 0x0b, 0x2e, 0x66, 0x65, 0x6c, 0x69, 0x78, 0x2e, 0x46, 0x6c, 0x6f, 0x77, 0x30, 0x01, 0x32, + 0x45, 0x0a, 0x0d, 0x46, 0x6c, 0x6f, 0x77, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x12, 0x34, 0x0a, 0x07, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x12, 0x11, 0x2e, 0x66, 0x65, + 0x6c, 0x69, 0x78, 0x2e, 0x46, 0x6c, 0x6f, 0x77, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x1a, 0x12, + 0x2e, 0x66, 0x65, 0x6c, 0x69, 0x78, 0x2e, 0x46, 0x6c, 0x6f, 0x77, 0x52, 0x65, 0x63, 0x65, 0x69, + 0x70, 0x74, 0x28, 0x01, 0x30, 0x01, 0x42, 0x09, 0x5a, 0x07, 0x2e, 0x2f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_api_proto_rawDescOnce sync.Once + file_api_proto_rawDescData = file_api_proto_rawDesc +) + +func file_api_proto_rawDescGZIP() []byte { + file_api_proto_rawDescOnce.Do(func() { + file_api_proto_rawDescData = protoimpl.X.CompressGZIP(file_api_proto_rawDescData) + }) + return file_api_proto_rawDescData +} + +var file_api_proto_msgTypes = make([]protoimpl.MessageInfo, 6) +var file_api_proto_goTypes = []any{ + (*FlowReceipt)(nil), // 0: felix.FlowReceipt + (*FlowRequest)(nil), // 1: felix.FlowRequest + (*FlowUpdate)(nil), // 2: felix.FlowUpdate + (*FlowKey)(nil), // 3: felix.FlowKey + (*Flow)(nil), // 4: felix.Flow + (*FlowLogPolicy)(nil), // 5: felix.FlowLogPolicy +} +var file_api_proto_depIdxs = []int32{ + 4, // 0: felix.FlowUpdate.flow:type_name -> felix.Flow + 5, // 1: felix.FlowKey.policies:type_name -> felix.FlowLogPolicy + 3, // 2: felix.Flow.Key:type_name -> felix.FlowKey + 1, // 3: felix.FlowAPI.List:input_type -> felix.FlowRequest + 2, // 4: felix.FlowCollector.Connect:input_type -> felix.FlowUpdate + 4, // 5: felix.FlowAPI.List:output_type -> felix.Flow + 0, // 6: felix.FlowCollector.Connect:output_type -> felix.FlowReceipt + 5, // [5:7] is the sub-list for method output_type + 3, // [3:5] is the sub-list for method input_type + 3, // [3:3] is the sub-list for extension type_name + 3, // [3:3] is the sub-list for extension extendee + 0, // [0:3] is the sub-list for field type_name +} + +func init() { file_api_proto_init() } +func file_api_proto_init() { + if File_api_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_api_proto_rawDesc, + NumEnums: 0, + NumMessages: 6, + NumExtensions: 0, + NumServices: 2, + }, + GoTypes: file_api_proto_goTypes, + DependencyIndexes: file_api_proto_depIdxs, + MessageInfos: file_api_proto_msgTypes, + }.Build() + File_api_proto = out.File + file_api_proto_rawDesc = nil + file_api_proto_goTypes = nil + file_api_proto_depIdxs = nil +} diff --git a/goldmane/proto/api.proto b/goldmane/proto/api.proto new file mode 100644 index 00000000000..ed3d9e0e506 --- /dev/null +++ b/goldmane/proto/api.proto @@ -0,0 +1,170 @@ +syntax = "proto3"; + +package felix; + +option go_package = "./proto"; + +service FlowAPI { + // List is an API call to query for one or more Flows. + // Matching Flows are streamed back to the caller. + rpc List(FlowRequest) returns (stream Flow); +} + +// FlowCollector represents an API capable of receiving streams of Flow data +// from cluster nodes. +service FlowCollector { + // Connect receives a connection that may stream one or more FlowUpdates. A FlowReceipt is returned + // to the client by the server after each FlowUpdate. + // + // Following a connection or reconnection to the server, clients should duplicates of previously transmitted FlowsUpdates + // in order to allow the server to rebuild its cache, as well as any new FlowUpdates that have not previously been transmitted. + // The server is responsible for deduplicating where needed. + rpc Connect(stream FlowUpdate) returns (stream FlowReceipt); +} + +// FlowReceipt is a response from the server to a client after publishing a Flow. +message FlowReceipt {} + +// FlowRequest defines a message to request a particular selection of aggregated Flow objects. +message FlowRequest { + // StartTimeGt specifies the beginning of a time window with which to filter Flows. Flows + // will be returned only if their start time occurs after the requested time. + int64 start_time_gt = 1; + + // StartTimeLt specifies the end of a time window with which to filter flows. Flows will + // be returned only if their start time occurs before the requested time. + int64 start_time_lt = 2; + + // PageNumber specifies the page number to return. It requires that PageSize is also specified in order + // to determine page boundaries. Note that pages may change over time as new flow data is collected or expired. + // Querying the same page at different points in time may return different results. + int64 page_number = 3; + + // PageSize configures the maximum number of results to return as part of this query. + int64 page_size = 4; +} + +// FlowUpdate wraps a Flow with additional metadata. +message FlowUpdate { + // Flow contains the actual flow being sent. + Flow flow = 1; +} + +// FlowKey includes the identifying fields for a Flow. +// - Source: Name, namespace, type, and labels. +// - Destination: Name, namespace, type, labels and port +// - Action taken on the connection. +// - Reporter (i.e., measured at source or destination). +// - Protocol of the connection (TCP, UDP, etc.). +message FlowKey { + // SourceName is the name of the source for this Flow. It represents one or more + // source pods that share a GenerateName. + string source_name = 1; + + // SourceNamespace is the namespace of the source pods for this flow. + string source_namespace = 2; + + // SourceType is the type of the source, used to contextualize the source + // name and namespace fields. + // + // This can be one of: + // + // - wep: WorkloadEndpoint (i.e., Pod) + // - hep: HostEndpoint + // - ns: NetworkSet + // - pub/pvt: External network (source name omitted) + string source_type = 3; + + // DestName is the name of the destination for this Flow. It represents one or more + // destination pods that share a GenerateName. + string dest_name = 4; + + // DestNamespace is the namespace of the destination pods for this flow. + string dest_namespace = 5; + + // DestType is the type of the destination, used to contextualize the dest + // name and namespace fields. + // + // This can be one of: + // + // - wep: WorkloadEndpoint (i.e., Pod) + // - hep: HostEndpoint + // - ns: NetworkSet + // - pub/pvt: External network (dest name omitted) + string dest_type = 6; + + // DestPort is the destination port on the specified protocol accessed by this flow. + int64 dest_port = 7; + + // DestServiceName is the name of the destination service, if any. + string dest_service_name = 8; + + // DestServiceNamespace is the namespace of the destination service, if any. + string dest_service_namespace = 9; + + // DestServicePortName is the name of the port on the destination service, if any. + string dest_service_port_name = 10; + + // DestServicePort is the port number on the destination service. + int64 dest_service_port = 11; + + // Proto is the L4 protocol for this flow. Either TCP or UDP. + string proto = 12; + + // Reporter is either "src" or "dst", depending on whether this flow was generated + // at the initating or terminating end of the connection attempt. + string reporter = 13; + + // Action is the ultimate action taken on the flow. Either Allow or Drop. + string action = 14; + + // Policies includes an entry for each policy rule that took an action on the connections + // aggregated into this flow. + FlowLogPolicy policies = 15; +} + +// Flow is a message representing statistics gathered about connections that share common fields, +// aggregated across either time, nodes, or both. +message Flow { + // Key includes the identifying fields for this flow. + FlowKey Key = 1; + + // StartTime is the start time for this flow. It is represented as the number of + // seconds since the UNIX epoch. + int64 start_time = 2; + + // EndTime is the end time for this flow. It is always exactly one aggregation + // interval after the start time. + int64 end_time = 3; + + // SourceLabels contains the intersection of labels that appear on all source + // pods that contributed to this flow. + repeated string source_labels = 4; + + // SourceLabels contains the intersection of labels that appear on all destination + // pods that contributed to this flow. + repeated string dest_labels = 5; + + // Statistics. + int64 packets_in = 6; + int64 packets_out = 7; + int64 bytes_in = 8; + int64 bytes_out = 9; + + // NumConnectionsStarted tracks the total number of new connections recorded for this Flow. It counts each + // connection attempt that matches the FlowKey that was made between this Flow's StartTime and EndTime. + int64 num_connections_started = 10; + + // NumConnectionsCompleted tracks the total number of completed TCP connections recorded for this Flow. It counts each + // connection that matches the FlowKey that was completed between this Flow's StartTime and EndTime. + int64 num_connections_completed = 11; + + // NumConnectionsLive tracks the total number of still active connections recorded for this Flow. It counts each + // connection that matches the FlowKey that was active at this Flow's EndTime. + int64 num_connections_live = 12; +} + +message FlowLogPolicy { + // AllPolicies is a list of strings containing policy rule information. + repeated string all_policies = 1; +} diff --git a/goldmane/proto/api_grpc.pb.go b/goldmane/proto/api_grpc.pb.go new file mode 100644 index 00000000000..5ca60a2371e --- /dev/null +++ b/goldmane/proto/api_grpc.pb.go @@ -0,0 +1,243 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.5.1 +// - protoc v3.5.0 +// source: api.proto + +package proto + +import ( + context "context" + + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + FlowAPI_List_FullMethodName = "/felix.FlowAPI/List" +) + +// FlowAPIClient is the client API for FlowAPI service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type FlowAPIClient interface { + // List is an API call to query for one or more Flows. + // Matching Flows are streamed back to the caller. + List(ctx context.Context, in *FlowRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[Flow], error) +} + +type flowAPIClient struct { + cc grpc.ClientConnInterface +} + +func NewFlowAPIClient(cc grpc.ClientConnInterface) FlowAPIClient { + return &flowAPIClient{cc} +} + +func (c *flowAPIClient) List(ctx context.Context, in *FlowRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[Flow], error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + stream, err := c.cc.NewStream(ctx, &FlowAPI_ServiceDesc.Streams[0], FlowAPI_List_FullMethodName, cOpts...) + if err != nil { + return nil, err + } + x := &grpc.GenericClientStream[FlowRequest, Flow]{ClientStream: stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type FlowAPI_ListClient = grpc.ServerStreamingClient[Flow] + +// FlowAPIServer is the server API for FlowAPI service. +// All implementations must embed UnimplementedFlowAPIServer +// for forward compatibility. +type FlowAPIServer interface { + // List is an API call to query for one or more Flows. + // Matching Flows are streamed back to the caller. + List(*FlowRequest, grpc.ServerStreamingServer[Flow]) error + mustEmbedUnimplementedFlowAPIServer() +} + +// UnimplementedFlowAPIServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedFlowAPIServer struct{} + +func (UnimplementedFlowAPIServer) List(*FlowRequest, grpc.ServerStreamingServer[Flow]) error { + return status.Errorf(codes.Unimplemented, "method List not implemented") +} +func (UnimplementedFlowAPIServer) mustEmbedUnimplementedFlowAPIServer() {} +func (UnimplementedFlowAPIServer) testEmbeddedByValue() {} + +// UnsafeFlowAPIServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to FlowAPIServer will +// result in compilation errors. +type UnsafeFlowAPIServer interface { + mustEmbedUnimplementedFlowAPIServer() +} + +func RegisterFlowAPIServer(s grpc.ServiceRegistrar, srv FlowAPIServer) { + // If the following call pancis, it indicates UnimplementedFlowAPIServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&FlowAPI_ServiceDesc, srv) +} + +func _FlowAPI_List_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(FlowRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(FlowAPIServer).List(m, &grpc.GenericServerStream[FlowRequest, Flow]{ServerStream: stream}) +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type FlowAPI_ListServer = grpc.ServerStreamingServer[Flow] + +// FlowAPI_ServiceDesc is the grpc.ServiceDesc for FlowAPI service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var FlowAPI_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "felix.FlowAPI", + HandlerType: (*FlowAPIServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{ + { + StreamName: "List", + Handler: _FlowAPI_List_Handler, + ServerStreams: true, + }, + }, + Metadata: "api.proto", +} + +const ( + FlowCollector_Connect_FullMethodName = "/felix.FlowCollector/Connect" +) + +// FlowCollectorClient is the client API for FlowCollector service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +// +// FlowCollector represents an API capable of receiving streams of Flow data +// from cluster nodes. +type FlowCollectorClient interface { + // Connect receives a connection that may stream one or more FlowUpdates. A FlowReceipt is returned + // to the client by the server after each FlowUpdate. + // + // Following a connection or reconnection to the server, clients should duplicates of previously transmitted FlowsUpdates + // in order to allow the server to rebuild its cache, as well as any new FlowUpdates that have not previously been transmitted. + // The server is responsible for deduplicating where needed. + Connect(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[FlowUpdate, FlowReceipt], error) +} + +type flowCollectorClient struct { + cc grpc.ClientConnInterface +} + +func NewFlowCollectorClient(cc grpc.ClientConnInterface) FlowCollectorClient { + return &flowCollectorClient{cc} +} + +func (c *flowCollectorClient) Connect(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[FlowUpdate, FlowReceipt], error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + stream, err := c.cc.NewStream(ctx, &FlowCollector_ServiceDesc.Streams[0], FlowCollector_Connect_FullMethodName, cOpts...) + if err != nil { + return nil, err + } + x := &grpc.GenericClientStream[FlowUpdate, FlowReceipt]{ClientStream: stream} + return x, nil +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type FlowCollector_ConnectClient = grpc.BidiStreamingClient[FlowUpdate, FlowReceipt] + +// FlowCollectorServer is the server API for FlowCollector service. +// All implementations must embed UnimplementedFlowCollectorServer +// for forward compatibility. +// +// FlowCollector represents an API capable of receiving streams of Flow data +// from cluster nodes. +type FlowCollectorServer interface { + // Connect receives a connection that may stream one or more FlowUpdates. A FlowReceipt is returned + // to the client by the server after each FlowUpdate. + // + // Following a connection or reconnection to the server, clients should duplicates of previously transmitted FlowsUpdates + // in order to allow the server to rebuild its cache, as well as any new FlowUpdates that have not previously been transmitted. + // The server is responsible for deduplicating where needed. + Connect(grpc.BidiStreamingServer[FlowUpdate, FlowReceipt]) error + mustEmbedUnimplementedFlowCollectorServer() +} + +// UnimplementedFlowCollectorServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedFlowCollectorServer struct{} + +func (UnimplementedFlowCollectorServer) Connect(grpc.BidiStreamingServer[FlowUpdate, FlowReceipt]) error { + return status.Errorf(codes.Unimplemented, "method Connect not implemented") +} +func (UnimplementedFlowCollectorServer) mustEmbedUnimplementedFlowCollectorServer() {} +func (UnimplementedFlowCollectorServer) testEmbeddedByValue() {} + +// UnsafeFlowCollectorServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to FlowCollectorServer will +// result in compilation errors. +type UnsafeFlowCollectorServer interface { + mustEmbedUnimplementedFlowCollectorServer() +} + +func RegisterFlowCollectorServer(s grpc.ServiceRegistrar, srv FlowCollectorServer) { + // If the following call pancis, it indicates UnimplementedFlowCollectorServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&FlowCollector_ServiceDesc, srv) +} + +func _FlowCollector_Connect_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(FlowCollectorServer).Connect(&grpc.GenericServerStream[FlowUpdate, FlowReceipt]{ServerStream: stream}) +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type FlowCollector_ConnectServer = grpc.BidiStreamingServer[FlowUpdate, FlowReceipt] + +// FlowCollector_ServiceDesc is the grpc.ServiceDesc for FlowCollector service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var FlowCollector_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "felix.FlowCollector", + HandlerType: (*FlowCollectorServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{ + { + StreamName: "Connect", + Handler: _FlowCollector_Connect_Handler, + ServerStreams: true, + ClientStreams: true, + }, + }, + Metadata: "api.proto", +} diff --git a/release/internal/pinnedversion/templates/calico-versions.yaml.gotmpl b/release/internal/pinnedversion/templates/calico-versions.yaml.gotmpl index 810ed8fdc17..e5d2b2853c3 100644 --- a/release/internal/pinnedversion/templates/calico-versions.yaml.gotmpl +++ b/release/internal/pinnedversion/templates/calico-versions.yaml.gotmpl @@ -25,6 +25,8 @@ version: {{.ProductVersion}} calico/api: version: {{.ProductVersion}} + calico/goldmane: + version: {{.ProductVersion}} networking-calico: version: {{.ReleaseBranch}} flannel: diff --git a/release/pkg/manager/calico/manager.go b/release/pkg/manager/calico/manager.go index 01effa5e38d..ebb8ecdaa00 100644 --- a/release/pkg/manager/calico/manager.go +++ b/release/pkg/manager/calico/manager.go @@ -58,6 +58,7 @@ var ( "node", "pod2daemon", "typha", + "goldmane", } // Directories for Windows. @@ -83,6 +84,7 @@ var ( "calico/pod2daemon-flexvol", "calico/test-signer", "calico/typha", + "calico/goldmane", } windowsImages = []string{ "calico/cni-windows", @@ -733,6 +735,8 @@ func (r *CalicoManager) assertImageVersions() error { return fmt.Errorf("version does not match for image %s/%s:%s", reg, imageName, r.calicoVersion) } } + case "calico/goldmane": + // goldmane does not have version information in the image. default: return fmt.Errorf("unknown image: %s, update assertion to include validating image", img) }