Compare commits
20 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
0d93c9ea67 | |
|
|
587290f1fb | |
|
|
24904ef084 | |
|
|
966985dc3e | |
|
|
d3cb92eb0f | |
|
|
4cce553441 | |
|
|
a0ecc5c11a | |
|
|
5fa41a6aad | |
|
|
49fb881f24 | |
|
|
e54a02ad8c | |
|
|
0a3249f30b | |
|
|
1eeb4b20df | |
|
|
0c490e95d2 | |
|
|
81b771d1ee | |
|
|
3e21f978ee | |
|
|
458b786ff4 | |
|
|
96be13a23c | |
|
|
8d51afc24e | |
|
|
a2d57908f6 | |
|
|
4298031a2d |
|
|
@ -19,7 +19,16 @@ jobs:
|
||||||
- name: Set up Helm
|
- name: Set up Helm
|
||||||
uses: azure/setup-helm@v3
|
uses: azure/setup-helm@v3
|
||||||
with:
|
with:
|
||||||
version: v3.10.0
|
version: v3.10.0 # Using a specific version, can be updated
|
||||||
|
|
||||||
|
- name: Add Helm repositories
|
||||||
|
run: |
|
||||||
|
helm repo add bjw-s https://bjw-s-labs.github.io/helm-charts/ --force-update
|
||||||
|
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts --force-update
|
||||||
|
helm repo update
|
||||||
|
|
||||||
|
- name: Build Helm chart dependencies
|
||||||
|
run: helm dependency build ./charts/iperf3-monitor
|
||||||
|
|
||||||
- name: Helm Lint
|
- name: Helm Lint
|
||||||
run: helm lint ./charts/iperf3-monitor
|
run: helm lint ./charts/iperf3-monitor
|
||||||
|
|
@ -27,23 +36,49 @@ jobs:
|
||||||
build:
|
build:
|
||||||
name: Build Docker Image
|
name: Build Docker Image
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read # Needed to checkout the repository
|
||||||
|
packages: write # Needed to push Docker images to GHCR
|
||||||
steps:
|
steps:
|
||||||
- name: Check out code
|
- name: Check out code
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v2
|
||||||
|
|
||||||
- name: Extract metadata (tags, labels) for Docker
|
- name: Extract metadata (tags, labels) for Docker
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@v4
|
uses: docker/metadata-action@v4
|
||||||
with:
|
with:
|
||||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
tags: |
|
||||||
|
# Tag with the PR number if it's a pull request event
|
||||||
|
type=match,pattern=pull_request,value=pr-{{number}}
|
||||||
|
# Tag with the git SHA
|
||||||
|
type=sha,prefix=
|
||||||
|
# Tag with 'latest' if on the main branch (though this workflow only runs on PRs to main)
|
||||||
|
type=ref,event=branch,pattern=main,value=latest
|
||||||
|
|
||||||
|
- name: Log in to GitHub Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Build Docker image
|
- name: Build Docker image
|
||||||
uses: docker/build-push-action@v4
|
uses: docker/build-push-action@v4
|
||||||
with:
|
with:
|
||||||
context: ./exporter
|
context: ./exporter
|
||||||
push: false # Do not push on PRs
|
# Push the image if the event is a pull request.
|
||||||
|
# The workflow currently only triggers on pull_request events.
|
||||||
|
push: ${{ github.event_name == 'pull_request' }}
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|
||||||
test:
|
test:
|
||||||
name: Run Tests
|
name: Run Tests
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,15 @@ jobs:
|
||||||
with:
|
with:
|
||||||
version: v3.10.0
|
version: v3.10.0
|
||||||
|
|
||||||
|
- name: Add Helm repositories
|
||||||
|
run: |
|
||||||
|
helm repo add bjw-s https://bjw-s-labs.github.io/helm-charts/ --force-update
|
||||||
|
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts --force-update
|
||||||
|
helm repo update
|
||||||
|
|
||||||
|
- name: Build Helm chart dependencies
|
||||||
|
run: helm dependency build ./charts/iperf3-monitor
|
||||||
|
|
||||||
- name: Helm Lint
|
- name: Helm Lint
|
||||||
run: helm lint ./charts/iperf3-monitor
|
run: helm lint ./charts/iperf3-monitor
|
||||||
|
|
||||||
|
|
@ -36,6 +45,12 @@ jobs:
|
||||||
- name: Check out code
|
- name: Check out code
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v2
|
||||||
|
|
||||||
- name: Log in to GitHub Container Registry
|
- name: Log in to GitHub Container Registry
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v2
|
||||||
with:
|
with:
|
||||||
|
|
@ -48,6 +63,11 @@ jobs:
|
||||||
uses: docker/metadata-action@v4
|
uses: docker/metadata-action@v4
|
||||||
with:
|
with:
|
||||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
tags: |
|
||||||
|
type=semver,pattern={{version}}
|
||||||
|
# This ensures that for a git tag like "v0.1.0",
|
||||||
|
# an image tag "0.1.0" is generated.
|
||||||
|
# It will also generate "latest" for the most recent semver tag.
|
||||||
|
|
||||||
- name: Build and push Docker image
|
- name: Build and push Docker image
|
||||||
uses: docker/build-push-action@v4
|
uses: docker/build-push-action@v4
|
||||||
|
|
@ -56,6 +76,7 @@ jobs:
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|
||||||
package-and-publish-chart:
|
package-and-publish-chart:
|
||||||
name: Package and Publish Helm Chart
|
name: Package and Publish Helm Chart
|
||||||
|
|
@ -79,11 +100,20 @@ jobs:
|
||||||
sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq &&\
|
sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq &&\
|
||||||
sudo chmod +x /usr/bin/yq
|
sudo chmod +x /usr/bin/yq
|
||||||
|
|
||||||
|
- name: Add Helm repositories
|
||||||
|
run: |
|
||||||
|
helm repo add bjw-s https://bjw-s-labs.github.io/helm-charts/ --force-update
|
||||||
|
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts --force-update
|
||||||
|
helm repo update
|
||||||
|
|
||||||
|
- name: Build Helm chart dependencies
|
||||||
|
run: helm dependency build ./charts/iperf3-monitor
|
||||||
|
|
||||||
- name: Set Chart Version from Tag
|
- name: Set Chart Version from Tag
|
||||||
run: |
|
run: |
|
||||||
VERSION=$(echo "${{ github.ref_name }}" | sed 's/^v//')
|
VERSION=$(echo "${{ github.ref_name }}" | sed 's/^v//')
|
||||||
yq e -i '.version = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
VERSION=$VERSION yq e -i '.version = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
||||||
yq e -i '.appVersion = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
VERSION=$VERSION yq e -i '.appVersion = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
||||||
cat ./charts/iperf3-monitor/Chart.yaml # Optional: print updated Chart.yaml
|
cat ./charts/iperf3-monitor/Chart.yaml # Optional: print updated Chart.yaml
|
||||||
|
|
||||||
- name: Publish Helm chart
|
- name: Publish Helm chart
|
||||||
|
|
|
||||||
|
|
@ -36,4 +36,8 @@ Thumbs.db
|
||||||
|
|
||||||
# Helm
|
# Helm
|
||||||
!charts/iperf3-monitor/.helmignore
|
!charts/iperf3-monitor/.helmignore
|
||||||
charts/*.tgz # Ignore packaged chart files
|
charts/iperf3-monitor/charts/
|
||||||
|
|
||||||
|
# Rendered Kubernetes manifests (for local testing)
|
||||||
|
rendered-manifests.yaml
|
||||||
|
rendered-manifests-updated.yaml
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
118
README.md
118
README.md
|
|
@ -74,37 +74,123 @@ nameOverride: ""
|
||||||
# -- Override the fully qualified app name.
|
# -- Override the fully qualified app name.
|
||||||
fullnameOverride: ""
|
fullnameOverride: ""
|
||||||
|
|
||||||
|
# Exporter Configuration (`controllers.exporter`)
|
||||||
|
# The iperf3 exporter is managed under the `controllers.exporter` section,
|
||||||
|
# leveraging the `bjw-s/common-library` for robust workload management.
|
||||||
|
controllers:
|
||||||
exporter:
|
exporter:
|
||||||
# -- Configuration for the exporter container image.
|
# -- Enable the exporter controller.
|
||||||
image:
|
enabled: true
|
||||||
# -- The container image repository for the exporter.
|
# -- Set the controller type for the exporter.
|
||||||
repository: ghcr.io/malarinv/iperf3-monitor
|
# Valid options are "deployment" or "daemonset".
|
||||||
# -- The container image tag for the exporter. If not set, the chart's appVersion is used.
|
# Use "daemonset" for N-to-N node monitoring where an exporter runs on each node (or selected nodes).
|
||||||
tag: ""
|
# Use "deployment" for a centralized exporter (typically with replicaCount: 1).
|
||||||
# -- The image pull policy for the exporter container.
|
# @default -- "deployment"
|
||||||
pullPolicy: IfNotPresent
|
type: deployment
|
||||||
|
# -- Number of desired exporter pods. Only used if type is "deployment".
|
||||||
# -- Number of exporter pod replicas. Typically 1 is sufficient.
|
# @default -- 1
|
||||||
replicaCount: 1
|
replicas: 1
|
||||||
|
|
||||||
|
# -- Application-specific configuration for the iperf3 exporter.
|
||||||
|
# These values are used to populate environment variables for the exporter container.
|
||||||
|
appConfig:
|
||||||
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
|
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
|
||||||
testInterval: 300
|
testInterval: 300
|
||||||
|
# -- Log level for the iperf3 exporter (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
||||||
|
logLevel: INFO
|
||||||
# -- Timeout in seconds for a single iperf3 test run.
|
# -- Timeout in seconds for a single iperf3 test run.
|
||||||
testTimeout: 10
|
testTimeout: 10
|
||||||
|
|
||||||
# -- Protocol to use for testing (tcp or udp).
|
# -- Protocol to use for testing (tcp or udp).
|
||||||
testProtocol: tcp
|
testProtocol: tcp
|
||||||
|
# -- iperf3 server port to connect to. Should match the server's listening port.
|
||||||
|
serverPort: "5201"
|
||||||
|
# -- Label selector to find iperf3 server pods.
|
||||||
|
# This is templated. Default: 'app.kubernetes.io/name=<chart-name>,app.kubernetes.io/instance=<release-name>,app.kubernetes.io/component=server'
|
||||||
|
serverLabelSelector: 'app.kubernetes.io/name={{ include "iperf3-monitor.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server'
|
||||||
|
|
||||||
# -- CPU and memory resource requests and limits for the exporter pod.
|
# -- Pod-level configurations for the exporter.
|
||||||
# @default -- A small default is provided if commented out.
|
pod:
|
||||||
resources: {}
|
# -- Annotations for the exporter pod.
|
||||||
|
annotations: {}
|
||||||
|
# -- Labels for the exporter pod (the common library adds its own defaults too).
|
||||||
|
labels: {}
|
||||||
|
# -- Node selector for scheduling exporter pods. Useful for DaemonSet or specific scheduling with Deployments.
|
||||||
|
# Example:
|
||||||
|
# nodeSelector:
|
||||||
|
# kubernetes.io/os: linux
|
||||||
|
nodeSelector: {}
|
||||||
|
# -- Tolerations for scheduling exporter pods.
|
||||||
|
# Example:
|
||||||
|
# tolerations:
|
||||||
|
# - key: "node-role.kubernetes.io/control-plane"
|
||||||
|
# operator: "Exists"
|
||||||
|
# effect: "NoSchedule"
|
||||||
|
tolerations: []
|
||||||
|
# -- Affinity rules for scheduling exporter pods.
|
||||||
|
# Example:
|
||||||
|
# affinity:
|
||||||
|
# nodeAffinity:
|
||||||
|
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
# nodeSelectorTerms:
|
||||||
|
# - matchExpressions:
|
||||||
|
# - key: "kubernetes.io/arch"
|
||||||
|
# operator: In
|
||||||
|
# values:
|
||||||
|
# - amd64
|
||||||
|
affinity: {}
|
||||||
|
# -- Security context for the exporter pod.
|
||||||
|
# securityContext:
|
||||||
|
# fsGroup: 65534
|
||||||
|
# runAsUser: 65534
|
||||||
|
# runAsGroup: 65534
|
||||||
|
# runAsNonRoot: true
|
||||||
|
securityContext: {}
|
||||||
|
# -- Automount service account token for the pod.
|
||||||
|
automountServiceAccountToken: true
|
||||||
|
|
||||||
|
# -- Container-level configurations for the main exporter container.
|
||||||
|
containers:
|
||||||
|
exporter: # Name of the primary container
|
||||||
|
image:
|
||||||
|
repository: ghcr.io/malarinv/iperf3-monitor
|
||||||
|
tag: "" # Defaults to .Chart.AppVersion
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
# -- Custom environment variables for the exporter container.
|
||||||
|
# These are merged with the ones generated from appConfig.
|
||||||
|
# env:
|
||||||
|
# MY_CUSTOM_VAR: "my_value"
|
||||||
|
env: {}
|
||||||
|
# -- Ports for the exporter container.
|
||||||
|
ports:
|
||||||
|
metrics: # Name of the port
|
||||||
|
port: 9876 # Container port for metrics
|
||||||
|
protocol: TCP
|
||||||
|
enabled: true
|
||||||
|
# -- CPU and memory resource requests and limits.
|
||||||
|
# resources:
|
||||||
# requests:
|
# requests:
|
||||||
# cpu: "100m"
|
# cpu: "100m"
|
||||||
# memory: "128Mi"
|
# memory: "128Mi"
|
||||||
# limits:
|
# limits:
|
||||||
# cpu: "500m"
|
# cpu: "500m"
|
||||||
# memory: "256Mi"
|
# memory: "256Mi"
|
||||||
|
resources: {}
|
||||||
|
# -- Probes configuration for the exporter container.
|
||||||
|
# probes:
|
||||||
|
# liveness:
|
||||||
|
# enabled: true # Example: enable liveness probe
|
||||||
|
# spec: # Customize probe spec if needed
|
||||||
|
# initialDelaySeconds: 30
|
||||||
|
# periodSeconds: 15
|
||||||
|
# timeoutSeconds: 5
|
||||||
|
# failureThreshold: 3
|
||||||
|
probes:
|
||||||
|
liveness:
|
||||||
|
enabled: false
|
||||||
|
readiness:
|
||||||
|
enabled: false
|
||||||
|
startup:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
server:
|
server:
|
||||||
# -- Configuration for the iperf3 server container image (DaemonSet).
|
# -- Configuration for the iperf3 server container image (DaemonSet).
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,12 @@
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: kube-prometheus-stack
|
- name: kube-prometheus-stack
|
||||||
repository: https://prometheus-community.github.io/helm-charts
|
repository: https://prometheus-community.github.io/helm-charts
|
||||||
version: 75.3.6
|
version: 75.7.0
|
||||||
digest: sha256:d15acd48bfc0b842654ae025e1bd1969e636a66508020312d555db84f381c379
|
- name: prometheus-operator
|
||||||
generated: "2025-06-19T20:40:53.415529365Z"
|
repository: oci://tccr.io/truecharts
|
||||||
|
version: 11.5.1
|
||||||
|
- name: common
|
||||||
|
repository: https://bjw-s-labs.github.io/helm-charts/
|
||||||
|
version: 4.1.2
|
||||||
|
digest: sha256:68485b4e158a6a405073e9c59966d251b62971846cdc9871e41fde46f19aabfe
|
||||||
|
generated: "2025-07-01T20:32:00.061995907Z"
|
||||||
|
|
|
||||||
|
|
@ -27,4 +27,11 @@ dependencies:
|
||||||
- name: kube-prometheus-stack # Example dependency if you package the whole stack
|
- name: kube-prometheus-stack # Example dependency if you package the whole stack
|
||||||
version: ">=30.0.0" # Specify a compatible version range
|
version: ">=30.0.0" # Specify a compatible version range
|
||||||
repository: https://prometheus-community.github.io/helm-charts
|
repository: https://prometheus-community.github.io/helm-charts
|
||||||
condition: serviceMonitor.enabled # Only include if ServiceMonitor is enabled (assuming Prometheus Operator)
|
condition: "dependencies.install, serviceMonitor.enabled, !dependencies.useTrueChartsPrometheusOperator"
|
||||||
|
- name: prometheus-operator
|
||||||
|
version: ">=8.11.1"
|
||||||
|
repository: "oci://tccr.io/truecharts"
|
||||||
|
condition: "dependencies.install, serviceMonitor.enabled, dependencies.useTrueChartsPrometheusOperator"
|
||||||
|
- name: common
|
||||||
|
version: "4.1.2"
|
||||||
|
repository: "https://bjw-s-labs.github.io/helm-charts/"
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,194 @@
|
||||||
|
{
|
||||||
|
"__inputs": [],
|
||||||
|
"__requires": [
|
||||||
|
{
|
||||||
|
"type": "grafana",
|
||||||
|
"id": "grafana",
|
||||||
|
"name": "Grafana",
|
||||||
|
"version": "8.0.0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "datasource",
|
||||||
|
"id": "prometheus",
|
||||||
|
"name": "Prometheus",
|
||||||
|
"version": "1.0.0"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"builtIn": 1,
|
||||||
|
"datasource": {
|
||||||
|
"type": "grafana",
|
||||||
|
"uid": "-- Grafana --"
|
||||||
|
},
|
||||||
|
"enable": true,
|
||||||
|
"hide": true,
|
||||||
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||||||
|
"name": "Annotations & Alerts",
|
||||||
|
"type": "dashboard"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"gnetId": null,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 9,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "avg(iperf_network_bandwidth_mbps) by (source_node, destination_node)",
|
||||||
|
"format": "heatmap",
|
||||||
|
"legendFormat": "{{source_node}} -> {{destination_node}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cards": { "cardPadding": null, "cardRound": null },
|
||||||
|
"color": {
|
||||||
|
"mode": "spectrum",
|
||||||
|
"scheme": "red-yellow-green",
|
||||||
|
"exponent": 0.5,
|
||||||
|
"reverse": false
|
||||||
|
},
|
||||||
|
"dataFormat": "tsbuckets",
|
||||||
|
"yAxis": { "show": true, "format": "short" },
|
||||||
|
"xAxis": { "show": true }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Bandwidth Over Time (Source: $source_node, Dest: $destination_node)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 9
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=~\"^$protocol$\"}",
|
||||||
|
"legendFormat": "Bandwidth",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "mbps"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Jitter Over Time (Source: $source_node, Dest: $destination_node)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 9
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "iperf_network_jitter_ms{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=\"udp\"}",
|
||||||
|
"legendFormat": "Jitter",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "ms"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 36,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": ["iperf3", "network", "kubernetes"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"current": {},
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"definition": "label_values(iperf_network_bandwidth_mbps, source_node)",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"multi": false,
|
||||||
|
"name": "source_node",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(iperf_network_bandwidth_mbps, source_node)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"sort": 1,
|
||||||
|
"type": "query"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"current": {},
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"definition": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"multi": false,
|
||||||
|
"name": "destination_node",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"sort": 1,
|
||||||
|
"type": "query"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"current": { "selected": true, "text": "tcp", "value": "tcp" },
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"multi": false,
|
||||||
|
"name": "protocol",
|
||||||
|
"options": [
|
||||||
|
{ "selected": true, "text": "tcp", "value": "tcp" },
|
||||||
|
{ "selected": false, "text": "udp", "value": "udp" }
|
||||||
|
],
|
||||||
|
"query": "tcp,udp",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"type": "custom"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-1h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Kubernetes iperf3 Network Performance",
|
||||||
|
"uid": "k8s-iperf3-dashboard",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
|
|
@ -47,7 +47,7 @@ app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
Create the name of the service account to use
|
Create the name of the service account to use
|
||||||
*/}}
|
*/}}
|
||||||
{{- define "iperf3-monitor.serviceAccountName" -}}
|
{{- define "iperf3-monitor.serviceAccountName" -}}
|
||||||
{{- if .Values.serviceAccount.create -}}
|
{{- if .Values.rbac.create -}}
|
||||||
{{- default (include "iperf3-monitor.fullname" .) .Values.serviceAccount.name -}}
|
{{- default (include "iperf3-monitor.fullname" .) .Values.serviceAccount.name -}}
|
||||||
{{- else -}}
|
{{- else -}}
|
||||||
{{- default "default" .Values.serviceAccount.name -}}
|
{{- default "default" .Values.serviceAccount.name -}}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,140 @@
|
||||||
|
{{- /*
|
||||||
|
This template is responsible for rendering the 'exporter' controller (Deployment or DaemonSet)
|
||||||
|
by calling the bjw-s common library.
|
||||||
|
|
||||||
|
The primary values for the exporter are expected under .Values.controllers.exporter.
|
||||||
|
Modifications to environment variables and service account are handled here before
|
||||||
|
passing the configuration to the common library.
|
||||||
|
*/}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Prepare a local, modifiable copy of the .Values. This allows us to adjust the
|
||||||
|
exporter controller's configuration (like env vars and SA) specifically for this chart's needs
|
||||||
|
before the common library processes it.
|
||||||
|
Convert to map[string]interface{} via toYaml/fromYaml to ensure compatibility with 'dig'.
|
||||||
|
*/}}
|
||||||
|
{{- $localValues := .Values | toYaml | fromYaml | deepCopy -}}
|
||||||
|
{{- $chart := .Chart -}}
|
||||||
|
{{- $release := .Release -}}
|
||||||
|
{{- $appName := include "iperf3-monitor.name" . -}}
|
||||||
|
{{- $fullName := include "iperf3-monitor.fullname" . -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Define the key for the exporter controller, typically "exporter" as per our values.yaml.
|
||||||
|
*/}}
|
||||||
|
{{- $exporterControllerKey := "exporter" -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Attempt to get the exporter controller's configuration block.
|
||||||
|
Proceed with modifications only if the exporter controller is defined.
|
||||||
|
*/}}
|
||||||
|
{{- $exporterControllerConfig := get $localValues.controllers $exporterControllerKey -}}
|
||||||
|
{{- if $exporterControllerConfig -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Construct the base set of environment variables required by the iperf3-exporter application.
|
||||||
|
These are derived from the 'appConfig' section of the exporter's controller configuration.
|
||||||
|
*/}}
|
||||||
|
{{- $baseExporterEnv := dict -}}
|
||||||
|
{{- if $exporterControllerConfig.appConfig -}}
|
||||||
|
{{- $_ := set $baseExporterEnv "SOURCE_NODE_NAME" (dict "valueFrom" (dict "fieldRef" (dict "fieldPath" "spec.nodeName"))) -}}
|
||||||
|
{{- $_ := set $baseExporterEnv "IPERF_TEST_INTERVAL" ($exporterControllerConfig.appConfig.testInterval | default "300" | toString) -}}
|
||||||
|
{{- $_ := set $baseExporterEnv "IPERF_TEST_PROTOCOL" ($exporterControllerConfig.appConfig.testProtocol | default "tcp") -}}
|
||||||
|
{{- $_ := set $baseExporterEnv "LOG_LEVEL" ($exporterControllerConfig.appConfig.logLevel | default "INFO") -}}
|
||||||
|
{{- $_ := set $baseExporterEnv "IPERF_SERVER_PORT" ($exporterControllerConfig.appConfig.serverPort | default "5201" | toString) -}}
|
||||||
|
{{- $_ := set $baseExporterEnv "IPERF_SERVER_NAMESPACE" (dict "valueFrom" (dict "fieldRef" (dict "fieldPath" "metadata.namespace"))) -}}
|
||||||
|
{{- $_ := set $baseExporterEnv "IPERF_TEST_TIMEOUT" ($exporterControllerConfig.appConfig.testTimeout | default "10" | toString) -}}
|
||||||
|
{{- $serverLabelSelectorDefault := printf "app.kubernetes.io/name=%s,app.kubernetes.io/instance=%s,app.kubernetes.io/component=server" $appName $release.Name -}}
|
||||||
|
{{- $serverLabelSelector := tpl ($exporterControllerConfig.appConfig.serverLabelSelector | default $serverLabelSelectorDefault) . -}}
|
||||||
|
{{- $_ := set $baseExporterEnv "IPERF_SERVER_LABEL_SELECTOR" $serverLabelSelector -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Merge the base environment variables with any user-defined environment variables.
|
||||||
|
User-defined variables (from .Values.controllers.exporter.containers.exporter.env)
|
||||||
|
will take precedence in case of conflicting keys.
|
||||||
|
*/}}
|
||||||
|
{{- $userExporterEnv := $exporterControllerConfig.containers.exporter.env | default dict -}}
|
||||||
|
{{- $finalExporterEnv := mergeOverwrite $baseExporterEnv $userExporterEnv -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Ensure the container structure exists and update its 'env' field with the final set.
|
||||||
|
The common library expects this under controllers.<key>.containers.<containerName>.env
|
||||||
|
*/}}
|
||||||
|
{{- if not $exporterControllerConfig.containers -}}
|
||||||
|
{{- $_ := set $exporterControllerConfig "containers" dict -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- if not $exporterControllerConfig.containers.exporter -}}
|
||||||
|
{{- $_ := set $exporterControllerConfig.containers "exporter" dict -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- $_ := set $exporterControllerConfig.containers.exporter "env" $finalExporterEnv -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Ensure the container image tag is set, defaulting to Chart.AppVersion if empty,
|
||||||
|
as the common library validation requires it during 'helm template'.
|
||||||
|
|
||||||
|
NOTE: BJW-S common library typically handles defaulting image.tag to Chart.appVersion
|
||||||
|
if image.tag is empty or null in values. The custom logic below prepending "v"
|
||||||
|
is specific to this chart and might be redundant if the common library's default
|
||||||
|
is preferred. For now, we keep it as it was the reason for previous errors if tag was not set.
|
||||||
|
However, if common library handles it, this block could be removed and image.tag in values.yaml set to "" or null.
|
||||||
|
Forcing the tag to be set (even if to chart.appVersion) ensures the common library doesn't complain.
|
||||||
|
The issue encountered during `helm template` earlier (empty output) was resolved by
|
||||||
|
explicitly setting the tag (e.g. via --set or by ensuring values.yaml has it).
|
||||||
|
The common library's internal validation likely needs *a* tag to be present in the values passed to it,
|
||||||
|
even if that tag is derived from AppVersion. This block ensures that.
|
||||||
|
*/}}
|
||||||
|
{{- $exporterContainerCfg := get $exporterControllerConfig.containers "exporter" -}}
|
||||||
|
{{- if $exporterContainerCfg -}}
|
||||||
|
{{- if not $exporterContainerCfg.image.tag -}}
|
||||||
|
{{- if $chart.AppVersion -}}
|
||||||
|
{{- $_ := set $exporterContainerCfg.image "tag" (printf "%s" $chart.AppVersion) -}} # Removed "v" prefix
|
||||||
|
{{- else -}}
|
||||||
|
{{- fail (printf "Error: Container image tag is not specified for controller '%s', container '%s', and Chart.AppVersion is also empty." $exporterControllerKey "exporter") -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Configure the Service Account for the exporter controller.
|
||||||
|
This ensures the controller pod uses the ServiceAccount that is intended by this chart's
|
||||||
|
RBAC configuration (.Values.rbac.create and .Values.serviceAccount.name).
|
||||||
|
*/}}
|
||||||
|
{{- $serviceAccountNameFromValues := $localValues.serviceAccount.name | default (printf "%s-exporter" $fullName) -}}
|
||||||
|
{{- if not $exporterControllerConfig.serviceAccount -}}
|
||||||
|
{{- $_ := set $exporterControllerConfig "serviceAccount" dict -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- $_ := set $exporterControllerConfig.serviceAccount "name" $serviceAccountNameFromValues -}}
|
||||||
|
{{- $_ := set $exporterControllerConfig.serviceAccount "create" $localValues.rbac.create -}}
|
||||||
|
{{- $_ := set $exporterControllerConfig.serviceAccount "automountServiceAccountToken" ($exporterControllerConfig.pod.automountServiceAccountToken | default true) -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Replace the original exporter controller config in our $localValues copy
|
||||||
|
with the modified version (that now includes the correct env and SA settings).
|
||||||
|
*/}}
|
||||||
|
{{- $_ := set $localValues.controllers $exporterControllerKey $exporterControllerConfig -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Ensure .Values.global exists and is a map, as the common library expects it.
|
||||||
|
*/}}
|
||||||
|
{{- if not (get $localValues "global") -}}
|
||||||
|
{{- $_ := set $localValues "global" dict -}}
|
||||||
|
{{- else if not (kindIs "map" (get $localValues "global")) -}}
|
||||||
|
{{- $_ := set $localValues "global" dict -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Ensure defaultPodOptionsStrategy exists, as common lib expects it at the root of Values.
|
||||||
|
*/}}
|
||||||
|
{{- if not (get $localValues "defaultPodOptionsStrategy") -}}
|
||||||
|
{{- $_ := set $localValues "defaultPodOptionsStrategy" "overwrite" -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{- /*
|
||||||
|
Call the common library's main render function for controllers.
|
||||||
|
This function iterates through all controllers defined under $localValues.controllers
|
||||||
|
(in our case, just "exporter") and renders them using their specified type and configuration.
|
||||||
|
The context passed must mirror the global Helm context, including 'Values', 'Chart', 'Release', 'Capabilities', and 'Template'.
|
||||||
|
*/}}
|
||||||
|
{{- include "bjw-s.common.render.controllers" (dict "Values" $localValues "Chart" $chart "Release" $release "Capabilities" .Capabilities "Template" .Template) | nindent 0 -}}
|
||||||
|
|
@ -1,48 +0,0 @@
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: {{ include "iperf3-monitor.fullname" . }}-exporter
|
|
||||||
labels:
|
|
||||||
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
spec:
|
|
||||||
replicas: {{ .Values.exporter.replicaCount }}
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
{{- include "iperf3-monitor.selectorLabels" . | nindent 6 }}
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
{{- include "iperf3-monitor.selectorLabels" . | nindent 8 }}
|
|
||||||
app.kubernetes.io/component: exporter
|
|
||||||
spec:
|
|
||||||
serviceAccountName: {{ include "iperf3-monitor.serviceAccountName" . }}
|
|
||||||
containers:
|
|
||||||
- name: iperf3-exporter
|
|
||||||
image: "{{ .Values.exporter.image.repository }}:{{ .Values.exporter.image.tag | default .Chart.AppVersion }}"
|
|
||||||
imagePullPolicy: {{ .Values.exporter.image.pullPolicy }}
|
|
||||||
ports:
|
|
||||||
- containerPort: {{ .Values.service.targetPort }}
|
|
||||||
name: metrics
|
|
||||||
env:
|
|
||||||
- name: SOURCE_NODE_NAME
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: spec.nodeName
|
|
||||||
- name: IPERF_TEST_INTERVAL
|
|
||||||
value: "{{ .Values.exporter.testInterval }}"
|
|
||||||
- name: IPERF_TEST_PROTOCOL
|
|
||||||
value: "{{ .Values.exporter.testProtocol }}"
|
|
||||||
- name: IPERF_SERVER_PORT
|
|
||||||
value: "5201" # Hardcoded as per server DaemonSet
|
|
||||||
- name: IPERF_SERVER_NAMESPACE
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: metadata.namespace
|
|
||||||
- name: IPERF_SERVER_LABEL_SELECTOR
|
|
||||||
value: 'app.kubernetes.io/name={{ include "iperf3-monitor.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server'
|
|
||||||
{{- with .Values.exporter.resources }}
|
|
||||||
resources:
|
|
||||||
{{- toYaml . | nindent 10 }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-grafana-dashboard
|
||||||
|
labels:
|
||||||
|
grafana_dashboard: "1"
|
||||||
|
app.kubernetes.io/name: {{ include "iperf3-monitor.name" . }}
|
||||||
|
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
data:
|
||||||
|
iperf3-dashboard.json: |
|
||||||
|
{{ .Files.Get "grafana/iperf3-dashboard.json" | nindent 4 }}
|
||||||
|
|
@ -7,9 +7,10 @@ metadata:
|
||||||
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
---
|
---
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: Role
|
||||||
metadata:
|
metadata:
|
||||||
name: {{ include "iperf3-monitor.fullname" . }}-role
|
name: {{ include "iperf3-monitor.fullname" . }}-role
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
labels:
|
labels:
|
||||||
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
rules:
|
rules:
|
||||||
|
|
@ -18,9 +19,10 @@ rules:
|
||||||
verbs: ["get", "list", "watch"]
|
verbs: ["get", "list", "watch"]
|
||||||
---
|
---
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: RoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
name: {{ include "iperf3-monitor.fullname" . }}-rb
|
name: {{ include "iperf3-monitor.fullname" . }}-rb
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
labels:
|
labels:
|
||||||
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
subjects:
|
subjects:
|
||||||
|
|
@ -28,7 +30,7 @@ subjects:
|
||||||
name: {{ include "iperf3-monitor.serviceAccountName" . }}
|
name: {{ include "iperf3-monitor.serviceAccountName" . }}
|
||||||
namespace: {{ .Release.Namespace }}
|
namespace: {{ .Release.Namespace }}
|
||||||
roleRef:
|
roleRef:
|
||||||
kind: ClusterRole
|
kind: Role # Changed from ClusterRole
|
||||||
name: {{ include "iperf3-monitor.fullname" . }}-role
|
name: {{ include "iperf3-monitor.fullname" . }}-role
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
|
@ -11,7 +11,7 @@ spec:
|
||||||
{{- include "iperf3-monitor.selectorLabels" . | nindent 4 }}
|
{{- include "iperf3-monitor.selectorLabels" . | nindent 4 }}
|
||||||
app.kubernetes.io/component: exporter
|
app.kubernetes.io/component: exporter
|
||||||
ports:
|
ports:
|
||||||
- name: metrics
|
- name: metrics # Assuming 'metrics' is the intended name, aligns with values structure
|
||||||
port: {{ .Values.service.port }}
|
port: {{ .Values.service.main.ports.metrics.port }}
|
||||||
targetPort: {{ .Values.service.targetPort }}
|
targetPort: {{ .Values.service.main.ports.metrics.targetPort }}
|
||||||
protocol: TCP
|
protocol: {{ .Values.service.main.ports.metrics.protocol | default "TCP" }}
|
||||||
|
|
@ -8,30 +8,96 @@ nameOverride: ""
|
||||||
# -- Override the fully qualified app name.
|
# -- Override the fully qualified app name.
|
||||||
fullnameOverride: ""
|
fullnameOverride: ""
|
||||||
|
|
||||||
|
controllers:
|
||||||
exporter:
|
exporter:
|
||||||
# -- Configuration for the exporter container image.
|
# -- Enable the exporter controller.
|
||||||
|
enabled: true
|
||||||
|
# -- Set the controller type for the exporter.
|
||||||
|
# Valid options are "deployment" or "daemonset".
|
||||||
|
# @default -- "deployment"
|
||||||
|
type: deployment
|
||||||
|
# -- Number of desired exporter pods. Only used if type is "deployment".
|
||||||
|
# @default -- 1
|
||||||
|
replicas: 1
|
||||||
|
|
||||||
|
# -- Application-specific configuration for the iperf3 exporter.
|
||||||
|
# These values are used to populate environment variables for the exporter container.
|
||||||
|
appConfig:
|
||||||
|
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
|
||||||
|
testInterval: 300
|
||||||
|
# -- Log level for the iperf3 exporter (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
||||||
|
logLevel: INFO
|
||||||
|
# -- Timeout in seconds for a single iperf3 test run.
|
||||||
|
testTimeout: 10
|
||||||
|
# -- Protocol to use for testing (tcp or udp).
|
||||||
|
testProtocol: tcp
|
||||||
|
# -- iperf3 server port to connect to. Should match the server's listening port.
|
||||||
|
# @default -- "5201" (hardcoded in the original chart for server daemonset)
|
||||||
|
serverPort: "5201"
|
||||||
|
# -- Label selector to find iperf3 server pods.
|
||||||
|
# This will be templated in the actual deployment.
|
||||||
|
# Example default (if not overridden by template logic): 'app.kubernetes.io/component=server'
|
||||||
|
serverLabelSelector: 'app.kubernetes.io/name={{ include "iperf3-monitor.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server'
|
||||||
|
|
||||||
|
|
||||||
|
# -- Pod-level configurations for the exporter, leveraging bjw-s common library structure.
|
||||||
|
pod:
|
||||||
|
# -- Annotations for the exporter pod.
|
||||||
|
annotations: {}
|
||||||
|
# -- Labels for the exporter pod.
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter # Ensure pods get the component label for service selection
|
||||||
|
# -- Node selector for scheduling exporter pods.
|
||||||
|
nodeSelector: {}
|
||||||
|
# -- Tolerations for scheduling exporter pods.
|
||||||
|
tolerations: []
|
||||||
|
# -- Affinity rules for scheduling exporter pods.
|
||||||
|
affinity: {}
|
||||||
|
# -- Security context for the exporter pod.
|
||||||
|
securityContext: {}
|
||||||
|
# fsGroup: 65534
|
||||||
|
# runAsUser: 65534
|
||||||
|
# runAsGroup: 65534
|
||||||
|
# runAsNonRoot: true
|
||||||
|
# -- Automount service account token for the pod.
|
||||||
|
automountServiceAccountToken: true # Default from common lib
|
||||||
|
|
||||||
|
# -- Container-level configurations for the main exporter container.
|
||||||
|
containers:
|
||||||
|
exporter: # This is the primary container, name it 'exporter'
|
||||||
image:
|
image:
|
||||||
# -- The container image repository for the exporter.
|
# -- The container image repository for the exporter.
|
||||||
repository: ghcr.io/malarinv/iperf3-monitor
|
repository: ghcr.io/malarinv/iperf3-monitor
|
||||||
# -- The container image tag for the exporter. If not set, the chart's appVersion is used.
|
# -- The container image tag for the exporter. If not set, the chart's appVersion is used.
|
||||||
tag: ""
|
tag: "" # Defaults to .Chart.AppVersion via common library
|
||||||
# -- The image pull policy for the exporter container.
|
# -- The image pull policy for the exporter container.
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
# -- Number of exporter pod replicas. Typically 1 is sufficient.
|
# -- Environment variables for the exporter container.
|
||||||
replicaCount: 1
|
# The actual env map will be constructed in the main chart template
|
||||||
|
# and passed to the common library. This section is for user overrides
|
||||||
|
# if they want to directly set other env vars using common lib's env schema.
|
||||||
|
env: {}
|
||||||
|
# Example:
|
||||||
|
# MY_CUSTOM_VAR: "my_value"
|
||||||
|
# ANOTHER_VAR:
|
||||||
|
# valueFrom:
|
||||||
|
# secretKeyRef:
|
||||||
|
# name: mysecret
|
||||||
|
# key: mykey
|
||||||
|
|
||||||
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
|
# -- Ports for the exporter container.
|
||||||
testInterval: 300
|
# Expected by Kubernetes and bjw-s common library as a list of objects.
|
||||||
|
ports:
|
||||||
|
- name: metrics # Name of the port, referenced by Service's targetPort
|
||||||
|
# -- Port number for the metrics endpoint on the container.
|
||||||
|
containerPort: 9876
|
||||||
|
# -- Protocol for the metrics port.
|
||||||
|
protocol: TCP
|
||||||
|
# -- Whether this port definition is enabled. Specific to bjw-s common library.
|
||||||
|
enabled: true
|
||||||
|
|
||||||
# -- Timeout in seconds for a single iperf3 test run.
|
# -- CPU and memory resource requests and limits for the exporter container.
|
||||||
testTimeout: 10
|
|
||||||
|
|
||||||
# -- Protocol to use for testing (tcp or udp).
|
|
||||||
testProtocol: tcp
|
|
||||||
|
|
||||||
# -- CPU and memory resource requests and limits for the exporter pod.
|
|
||||||
# @default -- A small default is provided if commented out.
|
|
||||||
resources:
|
resources:
|
||||||
{}
|
{}
|
||||||
# requests:
|
# requests:
|
||||||
|
|
@ -41,6 +107,16 @@ exporter:
|
||||||
# cpu: "500m"
|
# cpu: "500m"
|
||||||
# memory: "256Mi"
|
# memory: "256Mi"
|
||||||
|
|
||||||
|
# -- Probes configuration for the exporter container.
|
||||||
|
probes:
|
||||||
|
liveness:
|
||||||
|
enabled: false
|
||||||
|
readiness:
|
||||||
|
enabled: false
|
||||||
|
startup:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
# Server configuration (iperf3 server daemonset)
|
||||||
server:
|
server:
|
||||||
# -- Configuration for the iperf3 server container image (DaemonSet).
|
# -- Configuration for the iperf3 server container image (DaemonSet).
|
||||||
image:
|
image:
|
||||||
|
|
@ -50,8 +126,6 @@ server:
|
||||||
tag: latest
|
tag: latest
|
||||||
|
|
||||||
# -- CPU and memory resource requests and limits for the iperf3 server pods (DaemonSet).
|
# -- CPU and memory resource requests and limits for the iperf3 server pods (DaemonSet).
|
||||||
# These should be very low as the server is mostly idle.
|
|
||||||
# @default -- A small default is provided if commented out.
|
|
||||||
resources:
|
resources:
|
||||||
{}
|
{}
|
||||||
# requests:
|
# requests:
|
||||||
|
|
@ -62,13 +136,9 @@ server:
|
||||||
# memory: "128Mi"
|
# memory: "128Mi"
|
||||||
|
|
||||||
# -- Node selector for scheduling iperf3 server pods.
|
# -- Node selector for scheduling iperf3 server pods.
|
||||||
# Use this to restrict the DaemonSet to a subset of nodes.
|
|
||||||
# @default -- {} (schedule on all nodes)
|
|
||||||
nodeSelector: {}
|
nodeSelector: {}
|
||||||
|
|
||||||
# -- Tolerations for scheduling iperf3 server pods on tainted nodes (e.g., control-plane nodes).
|
# -- Tolerations for scheduling iperf3 server pods on tainted nodes.
|
||||||
# This is often necessary to include master nodes in the test mesh.
|
|
||||||
# @default -- Tolerates control-plane and master taints.
|
|
||||||
tolerations:
|
tolerations:
|
||||||
- key: "node-role.kubernetes.io/control-plane"
|
- key: "node-role.kubernetes.io/control-plane"
|
||||||
operator: "Exists"
|
operator: "Exists"
|
||||||
|
|
@ -77,44 +147,62 @@ server:
|
||||||
operator: "Exists"
|
operator: "Exists"
|
||||||
effect: "NoSchedule"
|
effect: "NoSchedule"
|
||||||
|
|
||||||
|
# RBAC and ServiceAccount settings
|
||||||
|
# These are for the exporter. The exporter deployment (managed by common library)
|
||||||
|
# will need to use the ServiceAccount specified here or one created by the library.
|
||||||
rbac:
|
rbac:
|
||||||
# -- If true, create ServiceAccount, ClusterRole, and ClusterRoleBinding for the exporter.
|
# -- If true, create ServiceAccount, ClusterRole, and ClusterRoleBinding for the exporter.
|
||||||
# Set to false if you manage RBAC externally.
|
|
||||||
create: true
|
create: true
|
||||||
|
|
||||||
serviceAccount:
|
serviceAccount:
|
||||||
# -- The name of the ServiceAccount to use for the exporter pod.
|
# -- The name of the ServiceAccount to use/create for the exporter pod.
|
||||||
# Only used if rbac.create is false. If not set, it defaults to the chart's fullname.
|
# If rbac.create is true, this SA is created. The exporter pod must use this SA.
|
||||||
name: ""
|
name: "iperf3-monitor"
|
||||||
|
|
||||||
|
# Service Monitor configuration for Prometheus
|
||||||
serviceMonitor:
|
serviceMonitor:
|
||||||
# -- If true, create a ServiceMonitor resource for integration with Prometheus Operator.
|
# -- If true, create a ServiceMonitor resource.
|
||||||
# Requires a running Prometheus Operator in the cluster.
|
|
||||||
enabled: true
|
enabled: true
|
||||||
|
# -- Scrape interval for the ServiceMonitor.
|
||||||
# -- Scrape interval for the ServiceMonitor. How often Prometheus scrapes the exporter metrics.
|
|
||||||
interval: 60s
|
interval: 60s
|
||||||
|
# -- Scrape timeout for the ServiceMonitor.
|
||||||
# -- Scrape timeout for the ServiceMonitor. How long Prometheus waits for metrics response.
|
|
||||||
scrapeTimeout: 30s
|
scrapeTimeout: 30s
|
||||||
|
|
||||||
# -- Configuration for the exporter Service.
|
# Service configuration for the exporter
|
||||||
|
# This defines how the exporter is exposed.
|
||||||
|
# The common library can also manage services, or we can use our own template.
|
||||||
|
# This structure is compatible with bjw-s common library's service management if we choose to use it.
|
||||||
service:
|
service:
|
||||||
# -- Service type. ClusterIP is typically sufficient.
|
main: # A key for the service, 'main' is a common convention.
|
||||||
type: ClusterIP
|
# -- Enable the exporter service.
|
||||||
# -- Port on which the exporter service is exposed.
|
enabled: true
|
||||||
|
# -- Service type.
|
||||||
|
type: ClusterIP # ClusterIP is typical for internal services scraped by Prometheus.
|
||||||
|
# -- Ports configuration for the service.
|
||||||
|
ports:
|
||||||
|
metrics: # Name of the service port, should align with a container port name.
|
||||||
|
# -- Port number on which the service is exposed.
|
||||||
port: 9876
|
port: 9876
|
||||||
# -- Target port on the exporter pod.
|
# -- Target port on the exporter pod. Can be a number or name.
|
||||||
targetPort: 9876
|
# Refers to the 'metrics' port defined in controllers.exporter.containers.exporter.ports.
|
||||||
|
targetPort: metrics
|
||||||
|
protocol: TCP
|
||||||
|
|
||||||
# -- Optional configuration for a network policy to allow traffic to the iperf3 server DaemonSet.
|
# Network Policy (optional)
|
||||||
# This is often necessary if you are using a network policy controller.
|
|
||||||
networkPolicy:
|
networkPolicy:
|
||||||
# -- If true, create a NetworkPolicy resource.
|
# -- If true, create a NetworkPolicy resource.
|
||||||
enabled: false
|
enabled: false
|
||||||
# -- Specify source selectors if needed (e.g., pods in a specific namespace).
|
# -- Source selectors for ingress rules.
|
||||||
from: []
|
from: []
|
||||||
# -- Specify namespace selectors if needed.
|
# -- Namespace selectors for ingress rules.
|
||||||
namespaceSelector: {}
|
namespaceSelector: {}
|
||||||
# -- Specify pod selectors if needed.
|
# -- Pod selectors for ingress rules.
|
||||||
podSelector: {}
|
podSelector: {}
|
||||||
|
|
||||||
|
# Dependency Configuration (for Prometheus Operator)
|
||||||
|
dependencies:
|
||||||
|
# -- Set to false by default. Set to true to install a Prometheus operator dependency (used if serviceMonitor.enabled=true).
|
||||||
|
# -- If false (default), and serviceMonitor.enabled is true, you must have a compatible Prometheus Operator already running in your cluster.
|
||||||
|
install: false
|
||||||
|
# -- If true, use TrueCharts Prometheus Operator instead of kube-prometheus-stack (used if dependencies.install is true).
|
||||||
|
useTrueChartsPrometheusOperator: false
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,15 @@
|
||||||
# Stage 1: Build stage with dependencies
|
# Stage 1: Build stage with dependencies
|
||||||
FROM python:3.9-slim as builder
|
FROM python:3.9-slim as builder
|
||||||
|
|
||||||
|
# Declare TARGETARCH for use in this stage
|
||||||
|
ARG TARGETARCH
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install iperf3 and build dependencies
|
# Minimal dependencies for builder stage if any Python packages had C extensions.
|
||||||
|
# Assuming requirements.txt does not need gcc or other build tools for now.
|
||||||
|
# If pip install fails later, add necessary build tools (e.g., gcc, python3-dev) here.
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends gcc iperf3 libiperf-dev && \
|
# apt-get install -y --no-install-recommends gcc python3-dev # Example if needed
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install Python dependencies
|
# Install Python dependencies
|
||||||
|
|
@ -17,9 +21,11 @@ FROM python:3.9-slim
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy iperf3 binary and library from the builder stage
|
# Install iperf3 and its runtime dependency libsctp1 directly in the final stage.
|
||||||
COPY --from=builder /usr/bin/iperf3 /usr/bin/iperf3
|
# This simplifies the Dockerfile by removing the need to copy iperf3 components from the builder.
|
||||||
COPY --from=builder /usr/lib/x86_64-linux-gnu/libiperf.so.0 /usr/lib/x86_64-linux-gnu/libiperf.so.0
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends iperf3 libsctp1 && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Copy installed Python packages from the builder stage
|
# Copy installed Python packages from the builder stage
|
||||||
COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
|
COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
|
||||||
|
|
|
||||||
|
|
@ -1,28 +1,60 @@
|
||||||
|
"""
|
||||||
|
Prometheus exporter for iperf3 network performance monitoring.
|
||||||
|
|
||||||
|
This script runs iperf3 tests between the node it's running on (source) and
|
||||||
|
other iperf3 server pods discovered in a Kubernetes cluster. It then exposes
|
||||||
|
these metrics for Prometheus consumption.
|
||||||
|
|
||||||
|
Configuration is primarily through environment variables and command-line arguments
|
||||||
|
for log level.
|
||||||
|
"""
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
from kubernetes import client, config
|
from kubernetes import client, config
|
||||||
from prometheus_client import start_http_server, Gauge
|
from prometheus_client import start_http_server, Gauge
|
||||||
import iperf3
|
import iperf3
|
||||||
|
|
||||||
# --- Configuration ---
|
# --- Global Configuration & Setup ---
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
# Argument parsing for log level configuration
|
||||||
|
# The command-line --log-level argument takes precedence over the LOG_LEVEL env var.
|
||||||
|
# Defaults to INFO if neither is set.
|
||||||
|
parser = argparse.ArgumentParser(description="iperf3 Prometheus exporter.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--log-level',
|
||||||
|
default=os.environ.get('LOG_LEVEL', 'INFO').upper(),
|
||||||
|
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||||
|
help='Set the logging level. Overrides LOG_LEVEL environment variable. (Default: INFO)'
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
log_level_str = args.log_level
|
||||||
|
|
||||||
|
# Convert log level string (e.g., 'INFO') to its numeric representation (e.g., logging.INFO)
|
||||||
|
numeric_level = getattr(logging, log_level_str.upper(), None)
|
||||||
|
if not isinstance(numeric_level, int):
|
||||||
|
# This case should ideally not be reached if choices in argparse are respected.
|
||||||
|
logging.error(f"Invalid log level: {log_level_str}. Defaulting to INFO.")
|
||||||
|
numeric_level = logging.INFO
|
||||||
|
logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
# --- Prometheus Metrics Definition ---
|
# --- Prometheus Metrics Definition ---
|
||||||
|
# These gauges will be used to expose iperf3 test results.
|
||||||
IPERF_BANDWIDTH_MBPS = Gauge(
|
IPERF_BANDWIDTH_MBPS = Gauge(
|
||||||
'iperf_network_bandwidth_mbps',
|
'iperf_network_bandwidth_mbps',
|
||||||
'Network bandwidth measured by iperf3 in Megabits per second',
|
'Network bandwidth measured by iperf3 in Megabits per second (Mbps)',
|
||||||
['source_node', 'destination_node', 'protocol']
|
['source_node', 'destination_node', 'protocol']
|
||||||
)
|
)
|
||||||
IPERF_JITTER_MS = Gauge(
|
IPERF_JITTER_MS = Gauge(
|
||||||
'iperf_network_jitter_ms',
|
'iperf_network_jitter_ms',
|
||||||
'Network jitter measured by iperf3 in milliseconds',
|
'Network jitter measured by iperf3 in milliseconds (ms) for UDP tests',
|
||||||
['source_node', 'destination_node', 'protocol']
|
['source_node', 'destination_node', 'protocol']
|
||||||
)
|
)
|
||||||
IPERF_PACKETS_TOTAL = Gauge(
|
IPERF_PACKETS_TOTAL = Gauge(
|
||||||
'iperf_network_packets_total',
|
'iperf_network_packets_total',
|
||||||
'Total packets transmitted or received during the iperf3 test',
|
'Total packets transmitted/received during the iperf3 UDP test',
|
||||||
['source_node', 'destination_node', 'protocol']
|
['source_node', 'destination_node', 'protocol']
|
||||||
)
|
)
|
||||||
IPERF_LOST_PACKETS = Gauge(
|
IPERF_LOST_PACKETS = Gauge(
|
||||||
|
|
@ -38,12 +70,21 @@ IPERF_TEST_SUCCESS = Gauge(
|
||||||
|
|
||||||
def discover_iperf_servers():
|
def discover_iperf_servers():
|
||||||
"""
|
"""
|
||||||
Discover iperf3 server pods in the cluster using the Kubernetes API.
|
Discovers iperf3 server pods within a Kubernetes cluster.
|
||||||
|
|
||||||
|
It uses the in-cluster Kubernetes configuration to connect to the API.
|
||||||
|
The target namespace and label selector for iperf3 server pods are configured
|
||||||
|
via environment variables:
|
||||||
|
- IPERF_SERVER_NAMESPACE (default: 'default')
|
||||||
|
- IPERF_SERVER_LABEL_SELECTOR (default: 'app=iperf3-server')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: A list of dictionaries, where each dictionary contains the 'ip'
|
||||||
|
and 'node_name' of a discovered iperf3 server pod. Returns an
|
||||||
|
empty list if discovery fails or no servers are found.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Load in-cluster configuration
|
config.load_incluster_config() # Assumes running inside a Kubernetes pod
|
||||||
# Assumes the exporter runs in a pod with a service account having permissions
|
|
||||||
config.load_incluster_config()
|
|
||||||
v1 = client.CoreV1Api()
|
v1 = client.CoreV1Api()
|
||||||
|
|
||||||
namespace = os.getenv('IPERF_SERVER_NAMESPACE', 'default')
|
namespace = os.getenv('IPERF_SERVER_NAMESPACE', 'default')
|
||||||
|
|
@ -51,110 +92,208 @@ def discover_iperf_servers():
|
||||||
|
|
||||||
logging.info(f"Discovering iperf3 servers with label '{label_selector}' in namespace '{namespace}'")
|
logging.info(f"Discovering iperf3 servers with label '{label_selector}' in namespace '{namespace}'")
|
||||||
|
|
||||||
# List pods across all namespaces with the specified label selector
|
# Use list_namespaced_pod to query only the specified namespace
|
||||||
# Note: list_pod_for_all_namespaces requires cluster-wide permissions
|
ret = v1.list_namespaced_pod(namespace=namespace, label_selector=label_selector, watch=False)
|
||||||
ret = v1.list_pod_for_all_namespaces(label_selector=label_selector, watch=False)
|
|
||||||
|
|
||||||
servers = []
|
servers = []
|
||||||
for i in ret.items:
|
for item in ret.items:
|
||||||
# Ensure pod has an IP and is running
|
# No need to filter by namespace here as the API call is already namespaced
|
||||||
if i.status.pod_ip and i.status.phase == 'Running':
|
if item.status.pod_ip and item.status.phase == 'Running':
|
||||||
servers.append({
|
servers.append({
|
||||||
'ip': i.status.pod_ip,
|
'ip': item.status.pod_ip,
|
||||||
'node_name': i.spec.node_name
|
'node_name': item.spec.node_name # Node where the iperf server pod is running
|
||||||
})
|
})
|
||||||
logging.info(f"Discovered {len(servers)} iperf3 server pods.")
|
logging.info(f"Discovered {len(servers)} iperf3 server pods in namespace '{namespace}'.")
|
||||||
return servers
|
return servers
|
||||||
|
except config.ConfigException as e:
|
||||||
|
logging.error(f"Kubernetes config error: {e}. Is the exporter running in a cluster with RBAC permissions?")
|
||||||
|
return []
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error discovering iperf servers: {e}")
|
logging.error(f"Error discovering iperf servers: {e}")
|
||||||
return [] # Return empty list on error to avoid crashing the loop
|
return [] # Return empty list on error to avoid crashing the main loop
|
||||||
|
|
||||||
def run_iperf_test(server_ip, server_port, protocol, source_node, dest_node):
|
def run_iperf_test(server_ip, server_port, protocol, source_node_name, dest_node_name):
|
||||||
"""
|
"""
|
||||||
Runs a single iperf3 test and updates Prometheus metrics.
|
Runs a single iperf3 test against a specified server and publishes metrics.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
server_ip (str): The IP address of the iperf3 server.
|
||||||
|
server_port (int): The port number of the iperf3 server.
|
||||||
|
protocol (str): The protocol to use ('tcp' or 'udp').
|
||||||
|
source_node_name (str): The name of the source node (where this exporter is running).
|
||||||
|
dest_node_name (str): The name of the destination node (where the server is running).
|
||||||
|
|
||||||
|
The test duration is controlled by the IPERF_TEST_DURATION environment variable
|
||||||
|
(default: 5 seconds).
|
||||||
"""
|
"""
|
||||||
logging.info(f"Running iperf3 test from {source_node} to {dest_node} ({server_ip}:{server_port}) using {protocol.upper()}")
|
logging.info(f"Running iperf3 {protocol.upper()} test from {source_node_name} to {dest_node_name} ({server_ip}:{server_port})")
|
||||||
|
|
||||||
client = iperf3.Client()
|
iperf_client = iperf3.Client()
|
||||||
client.server_hostname = server_ip
|
iperf_client.server_hostname = server_ip
|
||||||
client.port = server_port
|
iperf_client.port = server_port
|
||||||
client.protocol = protocol
|
iperf_client.protocol = protocol
|
||||||
# Duration of the test (seconds)
|
iperf_client.duration = int(os.getenv('IPERF_TEST_DURATION', 5)) # Test duration in seconds
|
||||||
client.duration = int(os.getenv('IPERF_TEST_DURATION', 5))
|
iperf_client.json_output = True # Enables easy parsing of results
|
||||||
# Output results as JSON for easy parsing
|
|
||||||
client.json_output = True
|
|
||||||
|
|
||||||
result = client.run()
|
try:
|
||||||
|
result = iperf_client.run()
|
||||||
|
parse_and_publish_metrics(result, source_node_name, dest_node_name, protocol)
|
||||||
|
except Exception as e:
|
||||||
|
# Catch unexpected errors during client.run() or parsing
|
||||||
|
logging.error(f"Exception during iperf3 test or metric parsing for {dest_node_name}: {e}")
|
||||||
|
labels = {'source_node': source_node_name, 'destination_node': dest_node_name, 'protocol': protocol}
|
||||||
|
IPERF_TEST_SUCCESS.labels(**labels).set(0)
|
||||||
|
try:
|
||||||
|
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
|
||||||
|
IPERF_JITTER_MS.labels(**labels).set(0)
|
||||||
|
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
||||||
|
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
||||||
|
except KeyError:
|
||||||
|
logging.debug(f"KeyError setting failure metrics for {labels} after client.run() exception.")
|
||||||
|
|
||||||
# Parse results and update metrics
|
|
||||||
parse_and_publish_metrics(result, source_node, dest_node, protocol)
|
|
||||||
|
|
||||||
def parse_and_publish_metrics(result, source_node, dest_node, protocol):
|
def parse_and_publish_metrics(result, source_node, dest_node, protocol):
|
||||||
"""
|
"""
|
||||||
Parses the iperf3 result and updates Prometheus gauges.
|
Parses the iperf3 test result and updates Prometheus gauges.
|
||||||
Handles both successful and failed tests.
|
|
||||||
|
Args:
|
||||||
|
result (iperf3.TestResult): The result object from the iperf3 client.
|
||||||
|
source_node (str): Name of the source node.
|
||||||
|
dest_node (str): Name of the destination node.
|
||||||
|
protocol (str): Protocol used for the test ('tcp' or 'udp').
|
||||||
"""
|
"""
|
||||||
labels = {'source_node': source_node, 'destination_node': dest_node, 'protocol': protocol}
|
labels = {'source_node': source_node, 'destination_node': dest_node, 'protocol': protocol}
|
||||||
|
|
||||||
if result and result.error:
|
# Handle failed tests (e.g., server unreachable) or missing result object
|
||||||
logging.error(f"Test from {source_node} to {dest_node} failed: {result.error}")
|
if not result or result.error:
|
||||||
|
error_message = result.error if result and result.error else "No result object from iperf3 client"
|
||||||
|
logging.warning(f"Test from {source_node} to {dest_node} ({protocol.upper()}) failed: {error_message}")
|
||||||
IPERF_TEST_SUCCESS.labels(**labels).set(0)
|
IPERF_TEST_SUCCESS.labels(**labels).set(0)
|
||||||
# Set metrics to 0 on failure
|
# Set all relevant metrics to 0 on failure to clear stale values from previous successes
|
||||||
try:
|
try:
|
||||||
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
|
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
|
||||||
IPERF_JITTER_MS.labels(**labels).set(0)
|
IPERF_JITTER_MS.labels(**labels).set(0) # Applicable for UDP, zeroed for TCP later
|
||||||
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
IPERF_PACKETS_TOTAL.labels(**labels).set(0) # Applicable for UDP, zeroed for TCP later
|
||||||
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
IPERF_LOST_PACKETS.labels(**labels).set(0) # Applicable for UDP, zeroed for TCP later
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# Labels might not be registered yet if this is the first failure
|
# This can happen if labels were never registered due to continuous failures
|
||||||
pass
|
logging.debug(f"KeyError when setting failure metrics for {labels}. Gauges might not be initialized.")
|
||||||
return
|
return
|
||||||
|
|
||||||
if not result:
|
# If we reach here, the test itself was successful in execution
|
||||||
logging.error(f"Test from {source_node} to {dest_node} failed to return a result object.")
|
|
||||||
IPERF_TEST_SUCCESS.labels(**labels).set(0)
|
|
||||||
try:
|
|
||||||
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
|
|
||||||
IPERF_JITTER_MS.labels(**labels).set(0)
|
|
||||||
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
|
||||||
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
IPERF_TEST_SUCCESS.labels(**labels).set(1)
|
IPERF_TEST_SUCCESS.labels(**labels).set(1)
|
||||||
|
|
||||||
# The summary data is typically in result.json['end']['sum_sent'] or result.json['end']['sum_received']
|
# Determine bandwidth:
|
||||||
# The iperf3-python client often exposes this directly as attributes like sent_Mbps or received_Mbps
|
# Order of preference: received_Mbps, sent_Mbps, Mbps, then JSON fallbacks.
|
||||||
# For TCP, we usually care about the received bandwidth on the client side (which is the exporter)
|
# received_Mbps is often most relevant for TCP client perspective.
|
||||||
# For UDP, the client report contains jitter, lost packets, etc.
|
# sent_Mbps can be relevant for UDP or as a TCP fallback.
|
||||||
bandwidth_mbps = 0
|
bandwidth_mbps = 0
|
||||||
if hasattr(result, 'received_Mbps') and result.received_Mbps is not None:
|
if hasattr(result, 'received_Mbps') and result.received_Mbps is not None:
|
||||||
bandwidth_mbps = result.received_Mbps
|
bandwidth_mbps = result.received_Mbps
|
||||||
elif hasattr(result, 'sent_Mbps') and result.sent_Mbps is not None:
|
elif hasattr(result, 'sent_Mbps') and result.sent_Mbps is not None:
|
||||||
# Fallback, though received_Mbps is usually more relevant for TCP client
|
|
||||||
bandwidth_mbps = result.sent_Mbps
|
bandwidth_mbps = result.sent_Mbps
|
||||||
# Add a check for the raw JSON output structure as a fallback
|
elif hasattr(result, 'Mbps') and result.Mbps is not None: # General attribute from iperf3 library
|
||||||
elif result.json and 'end' in result.json and 'sum_received' in result.json['end'] and result.json['end']['sum_received']['bits_per_second'] is not None:
|
bandwidth_mbps = result.Mbps
|
||||||
bandwidth_mbps = result.json['end']['sum_received']['bits_per_second'] / 1000000
|
# Fallback to raw JSON if direct attributes are None or missing
|
||||||
elif result.json and 'end' in result.json and 'sum_sent' in result.json['end'] and result.json['end']['sum_sent']['bits_per_second'] is not None:
|
elif result.json:
|
||||||
bandwidth_mbps = result.json['end']['sum_sent']['bits_per_second'] / 1000000
|
# Prefer received sum, then sent sum from the JSON output's 'end' summary
|
||||||
|
if 'end' in result.json and 'sum_received' in result.json['end'] and \
|
||||||
|
result.json['end']['sum_received'].get('bits_per_second') is not None:
|
||||||
|
bandwidth_mbps = result.json['end']['sum_received']['bits_per_second'] / 1000000.0
|
||||||
|
elif 'end' in result.json and 'sum_sent' in result.json['end'] and \
|
||||||
|
result.json['end']['sum_sent'].get('bits_per_second') is not None:
|
||||||
|
bandwidth_mbps = result.json['end']['sum_sent']['bits_per_second'] / 1000000.0
|
||||||
|
|
||||||
IPERF_BANDWIDTH_MBPS.labels(**labels).set(bandwidth_mbps)
|
IPERF_BANDWIDTH_MBPS.labels(**labels).set(bandwidth_mbps)
|
||||||
|
|
||||||
# UDP specific metrics
|
# UDP specific metrics
|
||||||
if protocol == 'udp':
|
if protocol == 'udp':
|
||||||
# iperf3-python exposes UDP results directly
|
# These attributes are specific to UDP tests in iperf3
|
||||||
IPERF_JITTER_MS.labels(**labels).set(result.jitter_ms if hasattr(result, 'jitter_ms') and result.jitter_ms is not None else 0)
|
IPERF_JITTER_MS.labels(**labels).set(getattr(result, 'jitter_ms', 0) if result.jitter_ms is not None else 0)
|
||||||
IPERF_PACKETS_TOTAL.labels(**labels).set(result.packets if hasattr(result, 'packets') and result.packets is not None else 0)
|
IPERF_PACKETS_TOTAL.labels(**labels).set(getattr(result, 'packets', 0) if result.packets is not None else 0)
|
||||||
IPERF_LOST_PACKETS.labels(**labels).set(result.lost_packets if hasattr(result, 'lost_packets') and result.lost_packets is not None else 0)
|
IPERF_LOST_PACKETS.labels(**labels).set(getattr(result, 'lost_packets', 0) if result.lost_packets is not None else 0)
|
||||||
else:
|
else:
|
||||||
# Ensure UDP metrics are zeroed or absent for TCP tests
|
# For TCP tests, ensure UDP-specific metrics are set to 0
|
||||||
try:
|
try:
|
||||||
IPERF_JITTER_MS.labels(**labels).set(0)
|
IPERF_JITTER_MS.labels(**labels).set(0)
|
||||||
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
||||||
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
# Can occur if labels not yet registered (e.g. first test is TCP)
|
||||||
|
logging.debug(f"KeyError for {labels} when zeroing UDP metrics for TCP test.")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def main_loop():
|
||||||
|
"""
|
||||||
|
Main operational loop of the iperf3 exporter.
|
||||||
|
|
||||||
|
This loop periodically:
|
||||||
|
1. Fetches configuration from environment variables:
|
||||||
|
- IPERF_TEST_INTERVAL (default: 300s): Time between test cycles.
|
||||||
|
- IPERF_SERVER_PORT (default: 5201): Port for iperf3 servers.
|
||||||
|
- IPERF_TEST_PROTOCOL (default: 'tcp'): 'tcp' or 'udp'.
|
||||||
|
- SOURCE_NODE_NAME (critical): Name of the node this exporter runs on.
|
||||||
|
2. Discovers iperf3 server pods in the Kubernetes cluster.
|
||||||
|
3. Runs iperf3 tests against each discovered server (unless it's on the same node).
|
||||||
|
4. Sleeps for the configured test interval.
|
||||||
|
|
||||||
|
If SOURCE_NODE_NAME is not set, the script will log an error and exit.
|
||||||
|
"""
|
||||||
|
# Fetch operational configuration from environment variables
|
||||||
|
test_interval = int(os.getenv('IPERF_TEST_INTERVAL', 300))
|
||||||
|
server_port = int(os.getenv('IPERF_SERVER_PORT', 5201))
|
||||||
|
protocol = os.getenv('IPERF_TEST_PROTOCOL', 'tcp').lower() # Ensure lowercase
|
||||||
|
source_node_name = os.getenv('SOURCE_NODE_NAME')
|
||||||
|
|
||||||
|
# SOURCE_NODE_NAME is crucial for labeling metrics correctly.
|
||||||
|
if not source_node_name:
|
||||||
|
logging.error("CRITICAL: SOURCE_NODE_NAME environment variable not set. This is required. Exiting.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
logging.info(
|
||||||
|
f"Exporter configured. Source Node: {source_node_name}, "
|
||||||
|
f"Test Interval: {test_interval}s, Server Port: {server_port}, Protocol: {protocol.upper()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
logging.info("Starting new iperf test cycle...")
|
||||||
|
servers = discover_iperf_servers()
|
||||||
|
|
||||||
|
if not servers:
|
||||||
|
logging.warning("No iperf servers discovered in this cycle. Check K8s setup and RBAC permissions.")
|
||||||
|
else:
|
||||||
|
for server in servers:
|
||||||
|
dest_node_name = server.get('node_name', 'unknown_destination_node') # Default if key missing
|
||||||
|
server_ip = server.get('ip')
|
||||||
|
|
||||||
|
if not server_ip:
|
||||||
|
logging.warning(f"Discovered server entry missing an IP: {server}. Skipping.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Avoid testing a node against itself
|
||||||
|
if dest_node_name == source_node_name:
|
||||||
|
logging.info(f"Skipping test to self: {source_node_name} to {server_ip} (on same node: {dest_node_name}).")
|
||||||
|
continue
|
||||||
|
|
||||||
|
run_iperf_test(server_ip, server_port, protocol, source_node_name, dest_node_name)
|
||||||
|
|
||||||
|
logging.info(f"Test cycle completed. Sleeping for {test_interval} seconds.")
|
||||||
|
time.sleep(test_interval)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Initial logging (like log level) is configured globally at the start of the script.
|
||||||
|
|
||||||
|
# Fetch Prometheus exporter listen port from environment variable
|
||||||
|
listen_port = int(os.getenv('LISTEN_PORT', 9876))
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Start the Prometheus HTTP server to expose metrics.
|
||||||
|
start_http_server(listen_port)
|
||||||
|
logging.info(f"Prometheus exporter listening on port {listen_port}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to start Prometheus HTTP server on port {listen_port}: {e}")
|
||||||
|
sys.exit(1) # Exit if the metrics server cannot start
|
||||||
|
|
||||||
|
# Enter the main operational loop.
|
||||||
|
# main_loop() contains its own critical checks (e.g., SOURCE_NODE_NAME) and will exit if necessary.
|
||||||
|
main_loop()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue