Compare commits

..

No commits in common. "main" and "v0.1.4" have entirely different histories.
main ... v0.1.4

18 changed files with 237 additions and 2688 deletions

View File

@ -19,16 +19,7 @@ jobs:
- name: Set up Helm
uses: azure/setup-helm@v3
with:
version: v3.10.0 # Using a specific version, can be updated
- name: Add Helm repositories
run: |
helm repo add bjw-s https://bjw-s-labs.github.io/helm-charts/ --force-update
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts --force-update
helm repo update
- name: Build Helm chart dependencies
run: helm dependency build ./charts/iperf3-monitor
version: v3.10.0
- name: Helm Lint
run: helm lint ./charts/iperf3-monitor
@ -36,49 +27,23 @@ jobs:
build:
name: Build Docker Image
runs-on: ubuntu-latest
permissions:
contents: read # Needed to checkout the repository
packages: write # Needed to push Docker images to GHCR
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
# Tag with the PR number if it's a pull request event
type=match,pattern=pull_request,value=pr-{{number}}
# Tag with the git SHA
type=sha,prefix=
# Tag with 'latest' if on the main branch (though this workflow only runs on PRs to main)
type=ref,event=branch,pattern=main,value=latest
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build Docker image
uses: docker/build-push-action@v4
with:
context: ./exporter
# Push the image if the event is a pull request.
# The workflow currently only triggers on pull_request events.
push: ${{ github.event_name == 'pull_request' }}
push: false # Do not push on PRs
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
test:
name: Run Tests

View File

@ -22,15 +22,6 @@ jobs:
with:
version: v3.10.0
- name: Add Helm repositories
run: |
helm repo add bjw-s https://bjw-s-labs.github.io/helm-charts/ --force-update
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts --force-update
helm repo update
- name: Build Helm chart dependencies
run: helm dependency build ./charts/iperf3-monitor
- name: Helm Lint
run: helm lint ./charts/iperf3-monitor
@ -45,12 +36,6 @@ jobs:
- name: Check out code
uses: actions/checkout@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Log in to GitHub Container Registry
uses: docker/login-action@v2
with:
@ -63,11 +48,6 @@ jobs:
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=semver,pattern={{version}}
# This ensures that for a git tag like "v0.1.0",
# an image tag "0.1.0" is generated.
# It will also generate "latest" for the most recent semver tag.
- name: Build and push Docker image
uses: docker/build-push-action@v4
@ -76,7 +56,6 @@ jobs:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
package-and-publish-chart:
name: Package and Publish Helm Chart
@ -100,15 +79,6 @@ jobs:
sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq &&\
sudo chmod +x /usr/bin/yq
- name: Add Helm repositories
run: |
helm repo add bjw-s https://bjw-s-labs.github.io/helm-charts/ --force-update
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts --force-update
helm repo update
- name: Build Helm chart dependencies
run: helm dependency build ./charts/iperf3-monitor
- name: Set Chart Version from Tag
run: |
VERSION=$(echo "${{ github.ref_name }}" | sed 's/^v//')

4
.gitignore vendored
View File

@ -37,7 +37,3 @@ Thumbs.db
# Helm
!charts/iperf3-monitor/.helmignore
charts/iperf3-monitor/charts/
# Rendered Kubernetes manifests (for local testing)
rendered-manifests.yaml
rendered-manifests-updated.yaml

118
README.md
View File

@ -74,123 +74,37 @@ nameOverride: ""
# -- Override the fully qualified app name.
fullnameOverride: ""
# Exporter Configuration (`controllers.exporter`)
# The iperf3 exporter is managed under the `controllers.exporter` section,
# leveraging the `bjw-s/common-library` for robust workload management.
controllers:
exporter:
# -- Enable the exporter controller.
enabled: true
# -- Set the controller type for the exporter.
# Valid options are "deployment" or "daemonset".
# Use "daemonset" for N-to-N node monitoring where an exporter runs on each node (or selected nodes).
# Use "deployment" for a centralized exporter (typically with replicaCount: 1).
# @default -- "deployment"
type: deployment
# -- Number of desired exporter pods. Only used if type is "deployment".
# @default -- 1
replicas: 1
# -- Configuration for the exporter container image.
image:
# -- The container image repository for the exporter.
repository: ghcr.io/malarinv/iperf3-monitor
# -- The container image tag for the exporter. If not set, the chart's appVersion is used.
tag: ""
# -- The image pull policy for the exporter container.
pullPolicy: IfNotPresent
# -- Number of exporter pod replicas. Typically 1 is sufficient.
replicaCount: 1
# -- Application-specific configuration for the iperf3 exporter.
# These values are used to populate environment variables for the exporter container.
appConfig:
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
testInterval: 300
# -- Log level for the iperf3 exporter (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL).
logLevel: INFO
# -- Timeout in seconds for a single iperf3 test run.
testTimeout: 10
# -- Protocol to use for testing (tcp or udp).
testProtocol: tcp
# -- iperf3 server port to connect to. Should match the server's listening port.
serverPort: "5201"
# -- Label selector to find iperf3 server pods.
# This is templated. Default: 'app.kubernetes.io/name=<chart-name>,app.kubernetes.io/instance=<release-name>,app.kubernetes.io/component=server'
serverLabelSelector: 'app.kubernetes.io/name={{ include "iperf3-monitor.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server'
# -- Pod-level configurations for the exporter.
pod:
# -- Annotations for the exporter pod.
annotations: {}
# -- Labels for the exporter pod (the common library adds its own defaults too).
labels: {}
# -- Node selector for scheduling exporter pods. Useful for DaemonSet or specific scheduling with Deployments.
# Example:
# nodeSelector:
# kubernetes.io/os: linux
nodeSelector: {}
# -- Tolerations for scheduling exporter pods.
# Example:
# tolerations:
# - key: "node-role.kubernetes.io/control-plane"
# operator: "Exists"
# effect: "NoSchedule"
tolerations: []
# -- Affinity rules for scheduling exporter pods.
# Example:
# affinity:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: "kubernetes.io/arch"
# operator: In
# values:
# - amd64
affinity: {}
# -- Security context for the exporter pod.
# securityContext:
# fsGroup: 65534
# runAsUser: 65534
# runAsGroup: 65534
# runAsNonRoot: true
securityContext: {}
# -- Automount service account token for the pod.
automountServiceAccountToken: true
# -- Container-level configurations for the main exporter container.
containers:
exporter: # Name of the primary container
image:
repository: ghcr.io/malarinv/iperf3-monitor
tag: "" # Defaults to .Chart.AppVersion
pullPolicy: IfNotPresent
# -- Custom environment variables for the exporter container.
# These are merged with the ones generated from appConfig.
# env:
# MY_CUSTOM_VAR: "my_value"
env: {}
# -- Ports for the exporter container.
ports:
metrics: # Name of the port
port: 9876 # Container port for metrics
protocol: TCP
enabled: true
# -- CPU and memory resource requests and limits.
# resources:
# -- CPU and memory resource requests and limits for the exporter pod.
# @default -- A small default is provided if commented out.
resources: {}
# requests:
# cpu: "100m"
# memory: "128Mi"
# limits:
# cpu: "500m"
# memory: "256Mi"
resources: {}
# -- Probes configuration for the exporter container.
# probes:
# liveness:
# enabled: true # Example: enable liveness probe
# spec: # Customize probe spec if needed
# initialDelaySeconds: 30
# periodSeconds: 15
# timeoutSeconds: 5
# failureThreshold: 3
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
server:
# -- Configuration for the iperf3 server container image (DaemonSet).

View File

@ -1,12 +1,9 @@
dependencies:
- name: kube-prometheus-stack
repository: https://prometheus-community.github.io/helm-charts
version: 75.7.0
version: 75.3.6
- name: prometheus-operator
repository: oci://tccr.io/truecharts
version: 11.5.1
- name: common
repository: https://bjw-s-labs.github.io/helm-charts/
version: 4.1.2
digest: sha256:68485b4e158a6a405073e9c59966d251b62971846cdc9871e41fde46f19aabfe
generated: "2025-07-01T20:32:00.061995907Z"
digest: sha256:3000e63445f8ba8df601cb483f4f77d14c5c4662bff2d16ffcf5cf1f7def314b
generated: "2025-06-20T17:25:44.538372209+05:30"

View File

@ -27,11 +27,8 @@ dependencies:
- name: kube-prometheus-stack # Example dependency if you package the whole stack
version: ">=30.0.0" # Specify a compatible version range
repository: https://prometheus-community.github.io/helm-charts
condition: "dependencies.install, serviceMonitor.enabled, !dependencies.useTrueChartsPrometheusOperator"
condition: "serviceMonitor.enabled, !dependencies.useTrueChartsPrometheusOperator"
- name: prometheus-operator
version: ">=8.11.1"
repository: "oci://tccr.io/truecharts"
condition: "dependencies.install, serviceMonitor.enabled, dependencies.useTrueChartsPrometheusOperator"
- name: common
version: "4.1.2"
repository: "https://bjw-s-labs.github.io/helm-charts/"
condition: "serviceMonitor.enabled, dependencies.useTrueChartsPrometheusOperator"

View File

@ -1,194 +0,0 @@
{
"__inputs": [],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "8.0.0"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"targets": [
{
"expr": "avg(iperf_network_bandwidth_mbps) by (source_node, destination_node)",
"format": "heatmap",
"legendFormat": "{{source_node}} -> {{destination_node}}",
"refId": "A"
}
],
"cards": { "cardPadding": null, "cardRound": null },
"color": {
"mode": "spectrum",
"scheme": "red-yellow-green",
"exponent": 0.5,
"reverse": false
},
"dataFormat": "tsbuckets",
"yAxis": { "show": true, "format": "short" },
"xAxis": { "show": true }
},
{
"title": "Bandwidth Over Time (Source: $source_node, Dest: $destination_node)",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 9
},
"targets": [
{
"expr": "iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=~\"^$protocol$\"}",
"legendFormat": "Bandwidth",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "mbps"
}
}
},
{
"title": "Jitter Over Time (Source: $source_node, Dest: $destination_node)",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 9
},
"targets": [
{
"expr": "iperf_network_jitter_ms{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=\"udp\"}",
"legendFormat": "Jitter",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "ms"
}
}
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": ["iperf3", "network", "kubernetes"],
"templating": {
"list": [
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"definition": "label_values(iperf_network_bandwidth_mbps, source_node)",
"hide": 0,
"includeAll": false,
"multi": false,
"name": "source_node",
"options": [],
"query": "label_values(iperf_network_bandwidth_mbps, source_node)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"definition": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
"hide": 0,
"includeAll": false,
"multi": false,
"name": "destination_node",
"options": [],
"query": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
},
{
"current": { "selected": true, "text": "tcp", "value": "tcp" },
"hide": 0,
"includeAll": false,
"multi": false,
"name": "protocol",
"options": [
{ "selected": true, "text": "tcp", "value": "tcp" },
{ "selected": false, "text": "udp", "value": "udp" }
],
"query": "tcp,udp",
"skipUrlSync": false,
"type": "custom"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Kubernetes iperf3 Network Performance",
"uid": "k8s-iperf3-dashboard",
"version": 1,
"weekStart": ""
}

View File

@ -47,7 +47,7 @@ app.kubernetes.io/instance: {{ .Release.Name }}
Create the name of the service account to use
*/}}
{{- define "iperf3-monitor.serviceAccountName" -}}
{{- if .Values.rbac.create -}}
{{- if .Values.serviceAccount.create -}}
{{- default (include "iperf3-monitor.fullname" .) .Values.serviceAccount.name -}}
{{- else -}}
{{- default "default" .Values.serviceAccount.name -}}

View File

@ -1,140 +0,0 @@
{{- /*
This template is responsible for rendering the 'exporter' controller (Deployment or DaemonSet)
by calling the bjw-s common library.
The primary values for the exporter are expected under .Values.controllers.exporter.
Modifications to environment variables and service account are handled here before
passing the configuration to the common library.
*/}}
{{- /*
Prepare a local, modifiable copy of the .Values. This allows us to adjust the
exporter controller's configuration (like env vars and SA) specifically for this chart's needs
before the common library processes it.
Convert to map[string]interface{} via toYaml/fromYaml to ensure compatibility with 'dig'.
*/}}
{{- $localValues := .Values | toYaml | fromYaml | deepCopy -}}
{{- $chart := .Chart -}}
{{- $release := .Release -}}
{{- $appName := include "iperf3-monitor.name" . -}}
{{- $fullName := include "iperf3-monitor.fullname" . -}}
{{- /*
Define the key for the exporter controller, typically "exporter" as per our values.yaml.
*/}}
{{- $exporterControllerKey := "exporter" -}}
{{- /*
Attempt to get the exporter controller's configuration block.
Proceed with modifications only if the exporter controller is defined.
*/}}
{{- $exporterControllerConfig := get $localValues.controllers $exporterControllerKey -}}
{{- if $exporterControllerConfig -}}
{{- /*
Construct the base set of environment variables required by the iperf3-exporter application.
These are derived from the 'appConfig' section of the exporter's controller configuration.
*/}}
{{- $baseExporterEnv := dict -}}
{{- if $exporterControllerConfig.appConfig -}}
{{- $_ := set $baseExporterEnv "SOURCE_NODE_NAME" (dict "valueFrom" (dict "fieldRef" (dict "fieldPath" "spec.nodeName"))) -}}
{{- $_ := set $baseExporterEnv "IPERF_TEST_INTERVAL" ($exporterControllerConfig.appConfig.testInterval | default "300" | toString) -}}
{{- $_ := set $baseExporterEnv "IPERF_TEST_PROTOCOL" ($exporterControllerConfig.appConfig.testProtocol | default "tcp") -}}
{{- $_ := set $baseExporterEnv "LOG_LEVEL" ($exporterControllerConfig.appConfig.logLevel | default "INFO") -}}
{{- $_ := set $baseExporterEnv "IPERF_SERVER_PORT" ($exporterControllerConfig.appConfig.serverPort | default "5201" | toString) -}}
{{- $_ := set $baseExporterEnv "IPERF_SERVER_NAMESPACE" (dict "valueFrom" (dict "fieldRef" (dict "fieldPath" "metadata.namespace"))) -}}
{{- $_ := set $baseExporterEnv "IPERF_TEST_TIMEOUT" ($exporterControllerConfig.appConfig.testTimeout | default "10" | toString) -}}
{{- $serverLabelSelectorDefault := printf "app.kubernetes.io/name=%s,app.kubernetes.io/instance=%s,app.kubernetes.io/component=server" $appName $release.Name -}}
{{- $serverLabelSelector := tpl ($exporterControllerConfig.appConfig.serverLabelSelector | default $serverLabelSelectorDefault) . -}}
{{- $_ := set $baseExporterEnv "IPERF_SERVER_LABEL_SELECTOR" $serverLabelSelector -}}
{{- end -}}
{{- /*
Merge the base environment variables with any user-defined environment variables.
User-defined variables (from .Values.controllers.exporter.containers.exporter.env)
will take precedence in case of conflicting keys.
*/}}
{{- $userExporterEnv := $exporterControllerConfig.containers.exporter.env | default dict -}}
{{- $finalExporterEnv := mergeOverwrite $baseExporterEnv $userExporterEnv -}}
{{- /*
Ensure the container structure exists and update its 'env' field with the final set.
The common library expects this under controllers.<key>.containers.<containerName>.env
*/}}
{{- if not $exporterControllerConfig.containers -}}
{{- $_ := set $exporterControllerConfig "containers" dict -}}
{{- end -}}
{{- if not $exporterControllerConfig.containers.exporter -}}
{{- $_ := set $exporterControllerConfig.containers "exporter" dict -}}
{{- end -}}
{{- $_ := set $exporterControllerConfig.containers.exporter "env" $finalExporterEnv -}}
{{- /*
Ensure the container image tag is set, defaulting to Chart.AppVersion if empty,
as the common library validation requires it during 'helm template'.
NOTE: BJW-S common library typically handles defaulting image.tag to Chart.appVersion
if image.tag is empty or null in values. The custom logic below prepending "v"
is specific to this chart and might be redundant if the common library's default
is preferred. For now, we keep it as it was the reason for previous errors if tag was not set.
However, if common library handles it, this block could be removed and image.tag in values.yaml set to "" or null.
Forcing the tag to be set (even if to chart.appVersion) ensures the common library doesn't complain.
The issue encountered during `helm template` earlier (empty output) was resolved by
explicitly setting the tag (e.g. via --set or by ensuring values.yaml has it).
The common library's internal validation likely needs *a* tag to be present in the values passed to it,
even if that tag is derived from AppVersion. This block ensures that.
*/}}
{{- $exporterContainerCfg := get $exporterControllerConfig.containers "exporter" -}}
{{- if $exporterContainerCfg -}}
{{- if not $exporterContainerCfg.image.tag -}}
{{- if $chart.AppVersion -}}
{{- $_ := set $exporterContainerCfg.image "tag" (printf "%s" $chart.AppVersion) -}} # Removed "v" prefix
{{- else -}}
{{- fail (printf "Error: Container image tag is not specified for controller '%s', container '%s', and Chart.AppVersion is also empty." $exporterControllerKey "exporter") -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- /*
Configure the Service Account for the exporter controller.
This ensures the controller pod uses the ServiceAccount that is intended by this chart's
RBAC configuration (.Values.rbac.create and .Values.serviceAccount.name).
*/}}
{{- $serviceAccountNameFromValues := $localValues.serviceAccount.name | default (printf "%s-exporter" $fullName) -}}
{{- if not $exporterControllerConfig.serviceAccount -}}
{{- $_ := set $exporterControllerConfig "serviceAccount" dict -}}
{{- end -}}
{{- $_ := set $exporterControllerConfig.serviceAccount "name" $serviceAccountNameFromValues -}}
{{- $_ := set $exporterControllerConfig.serviceAccount "create" $localValues.rbac.create -}}
{{- $_ := set $exporterControllerConfig.serviceAccount "automountServiceAccountToken" ($exporterControllerConfig.pod.automountServiceAccountToken | default true) -}}
{{- /*
Replace the original exporter controller config in our $localValues copy
with the modified version (that now includes the correct env and SA settings).
*/}}
{{- $_ := set $localValues.controllers $exporterControllerKey $exporterControllerConfig -}}
{{- end -}}
{{- /*
Ensure .Values.global exists and is a map, as the common library expects it.
*/}}
{{- if not (get $localValues "global") -}}
{{- $_ := set $localValues "global" dict -}}
{{- else if not (kindIs "map" (get $localValues "global")) -}}
{{- $_ := set $localValues "global" dict -}}
{{- end -}}
{{- /*
Ensure defaultPodOptionsStrategy exists, as common lib expects it at the root of Values.
*/}}
{{- if not (get $localValues "defaultPodOptionsStrategy") -}}
{{- $_ := set $localValues "defaultPodOptionsStrategy" "overwrite" -}}
{{- end -}}
{{- /*
Call the common library's main render function for controllers.
This function iterates through all controllers defined under $localValues.controllers
(in our case, just "exporter") and renders them using their specified type and configuration.
The context passed must mirror the global Helm context, including 'Values', 'Chart', 'Release', 'Capabilities', and 'Template'.
*/}}
{{- include "bjw-s.common.render.controllers" (dict "Values" $localValues "Chart" $chart "Release" $release "Capabilities" .Capabilities "Template" .Template) | nindent 0 -}}

View File

@ -0,0 +1,48 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "iperf3-monitor.fullname" . }}-exporter
labels:
{{- include "iperf3-monitor.labels" . | nindent 4 }}
app.kubernetes.io/component: exporter
spec:
replicas: {{ .Values.exporter.replicaCount }}
selector:
matchLabels:
{{- include "iperf3-monitor.selectorLabels" . | nindent 6 }}
app.kubernetes.io/component: exporter
template:
metadata:
labels:
{{- include "iperf3-monitor.selectorLabels" . | nindent 8 }}
app.kubernetes.io/component: exporter
spec:
serviceAccountName: {{ include "iperf3-monitor.serviceAccountName" . }}
containers:
- name: iperf3-exporter
image: "{{ .Values.exporter.image.repository }}:{{ .Values.exporter.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.exporter.image.pullPolicy }}
ports:
- containerPort: {{ .Values.service.targetPort }}
name: metrics
env:
- name: SOURCE_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: IPERF_TEST_INTERVAL
value: "{{ .Values.exporter.testInterval }}"
- name: IPERF_TEST_PROTOCOL
value: "{{ .Values.exporter.testProtocol }}"
- name: IPERF_SERVER_PORT
value: "5201" # Hardcoded as per server DaemonSet
- name: IPERF_SERVER_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: IPERF_SERVER_LABEL_SELECTOR
value: 'app.kubernetes.io/name={{ include "iperf3-monitor.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server'
{{- with .Values.exporter.resources }}
resources:
{{- toYaml . | nindent 10 }}
{{- end }}

View File

@ -1,13 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Release.Name }}-grafana-dashboard
labels:
grafana_dashboard: "1"
app.kubernetes.io/name: {{ include "iperf3-monitor.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
data:
iperf3-dashboard.json: |
{{ .Files.Get "grafana/iperf3-dashboard.json" | nindent 4 }}

View File

@ -7,10 +7,9 @@ metadata:
{{- include "iperf3-monitor.labels" . | nindent 4 }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
kind: ClusterRole
metadata:
name: {{ include "iperf3-monitor.fullname" . }}-role
namespace: {{ .Release.Namespace }}
labels:
{{- include "iperf3-monitor.labels" . | nindent 4 }}
rules:
@ -19,10 +18,9 @@ rules:
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
kind: ClusterRoleBinding
metadata:
name: {{ include "iperf3-monitor.fullname" . }}-rb
namespace: {{ .Release.Namespace }}
labels:
{{- include "iperf3-monitor.labels" . | nindent 4 }}
subjects:
@ -30,7 +28,7 @@ subjects:
name: {{ include "iperf3-monitor.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: Role # Changed from ClusterRole
kind: ClusterRole
name: {{ include "iperf3-monitor.fullname" . }}-role
apiGroup: rbac.authorization.k8s.io
{{- end -}}

View File

@ -11,7 +11,7 @@ spec:
{{- include "iperf3-monitor.selectorLabels" . | nindent 4 }}
app.kubernetes.io/component: exporter
ports:
- name: metrics # Assuming 'metrics' is the intended name, aligns with values structure
port: {{ .Values.service.main.ports.metrics.port }}
targetPort: {{ .Values.service.main.ports.metrics.targetPort }}
protocol: {{ .Values.service.main.ports.metrics.protocol | default "TCP" }}
- name: metrics
port: {{ .Values.service.port }}
targetPort: {{ .Values.service.targetPort }}
protocol: TCP

View File

@ -8,96 +8,30 @@ nameOverride: ""
# -- Override the fully qualified app name.
fullnameOverride: ""
controllers:
exporter:
# -- Enable the exporter controller.
enabled: true
# -- Set the controller type for the exporter.
# Valid options are "deployment" or "daemonset".
# @default -- "deployment"
type: deployment
# -- Number of desired exporter pods. Only used if type is "deployment".
# @default -- 1
replicas: 1
# -- Application-specific configuration for the iperf3 exporter.
# These values are used to populate environment variables for the exporter container.
appConfig:
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
testInterval: 300
# -- Log level for the iperf3 exporter (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL).
logLevel: INFO
# -- Timeout in seconds for a single iperf3 test run.
testTimeout: 10
# -- Protocol to use for testing (tcp or udp).
testProtocol: tcp
# -- iperf3 server port to connect to. Should match the server's listening port.
# @default -- "5201" (hardcoded in the original chart for server daemonset)
serverPort: "5201"
# -- Label selector to find iperf3 server pods.
# This will be templated in the actual deployment.
# Example default (if not overridden by template logic): 'app.kubernetes.io/component=server'
serverLabelSelector: 'app.kubernetes.io/name={{ include "iperf3-monitor.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server'
# -- Pod-level configurations for the exporter, leveraging bjw-s common library structure.
pod:
# -- Annotations for the exporter pod.
annotations: {}
# -- Labels for the exporter pod.
labels:
app.kubernetes.io/component: exporter # Ensure pods get the component label for service selection
# -- Node selector for scheduling exporter pods.
nodeSelector: {}
# -- Tolerations for scheduling exporter pods.
tolerations: []
# -- Affinity rules for scheduling exporter pods.
affinity: {}
# -- Security context for the exporter pod.
securityContext: {}
# fsGroup: 65534
# runAsUser: 65534
# runAsGroup: 65534
# runAsNonRoot: true
# -- Automount service account token for the pod.
automountServiceAccountToken: true # Default from common lib
# -- Container-level configurations for the main exporter container.
containers:
exporter: # This is the primary container, name it 'exporter'
# -- Configuration for the exporter container image.
image:
# -- The container image repository for the exporter.
repository: ghcr.io/malarinv/iperf3-monitor
# -- The container image tag for the exporter. If not set, the chart's appVersion is used.
tag: "" # Defaults to .Chart.AppVersion via common library
tag: ""
# -- The image pull policy for the exporter container.
pullPolicy: IfNotPresent
# -- Environment variables for the exporter container.
# The actual env map will be constructed in the main chart template
# and passed to the common library. This section is for user overrides
# if they want to directly set other env vars using common lib's env schema.
env: {}
# Example:
# MY_CUSTOM_VAR: "my_value"
# ANOTHER_VAR:
# valueFrom:
# secretKeyRef:
# name: mysecret
# key: mykey
# -- Number of exporter pod replicas. Typically 1 is sufficient.
replicaCount: 1
# -- Ports for the exporter container.
# Expected by Kubernetes and bjw-s common library as a list of objects.
ports:
- name: metrics # Name of the port, referenced by Service's targetPort
# -- Port number for the metrics endpoint on the container.
containerPort: 9876
# -- Protocol for the metrics port.
protocol: TCP
# -- Whether this port definition is enabled. Specific to bjw-s common library.
enabled: true
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
testInterval: 300
# -- CPU and memory resource requests and limits for the exporter container.
# -- Timeout in seconds for a single iperf3 test run.
testTimeout: 10
# -- Protocol to use for testing (tcp or udp).
testProtocol: tcp
# -- CPU and memory resource requests and limits for the exporter pod.
# @default -- A small default is provided if commented out.
resources:
{}
# requests:
@ -107,16 +41,6 @@ controllers:
# cpu: "500m"
# memory: "256Mi"
# -- Probes configuration for the exporter container.
probes:
liveness:
enabled: false
readiness:
enabled: false
startup:
enabled: false
# Server configuration (iperf3 server daemonset)
server:
# -- Configuration for the iperf3 server container image (DaemonSet).
image:
@ -126,6 +50,8 @@ server:
tag: latest
# -- CPU and memory resource requests and limits for the iperf3 server pods (DaemonSet).
# These should be very low as the server is mostly idle.
# @default -- A small default is provided if commented out.
resources:
{}
# requests:
@ -136,9 +62,13 @@ server:
# memory: "128Mi"
# -- Node selector for scheduling iperf3 server pods.
# Use this to restrict the DaemonSet to a subset of nodes.
# @default -- {} (schedule on all nodes)
nodeSelector: {}
# -- Tolerations for scheduling iperf3 server pods on tainted nodes.
# -- Tolerations for scheduling iperf3 server pods on tainted nodes (e.g., control-plane nodes).
# This is often necessary to include master nodes in the test mesh.
# @default -- Tolerates control-plane and master taints.
tolerations:
- key: "node-role.kubernetes.io/control-plane"
operator: "Exists"
@ -147,62 +77,53 @@ server:
operator: "Exists"
effect: "NoSchedule"
# RBAC and ServiceAccount settings
# These are for the exporter. The exporter deployment (managed by common library)
# will need to use the ServiceAccount specified here or one created by the library.
rbac:
# -- If true, create ServiceAccount, ClusterRole, and ClusterRoleBinding for the exporter.
# Set to false if you manage RBAC externally.
create: true
serviceAccount:
# -- The name of the ServiceAccount to use/create for the exporter pod.
# If rbac.create is true, this SA is created. The exporter pod must use this SA.
name: "iperf3-monitor"
# -- The name of the ServiceAccount to use for the exporter pod.
# Only used if rbac.create is false. If not set, it defaults to the chart's fullname.
name: ""
# Service Monitor configuration for Prometheus
serviceMonitor:
# -- If true, create a ServiceMonitor resource.
# -- If true, create a ServiceMonitor resource for integration with Prometheus Operator.
# Requires a running Prometheus Operator in the cluster.
enabled: true
# -- Scrape interval for the ServiceMonitor.
# -- Scrape interval for the ServiceMonitor. How often Prometheus scrapes the exporter metrics.
interval: 60s
# -- Scrape timeout for the ServiceMonitor.
# -- Scrape timeout for the ServiceMonitor. How long Prometheus waits for metrics response.
scrapeTimeout: 30s
# Service configuration for the exporter
# This defines how the exporter is exposed.
# The common library can also manage services, or we can use our own template.
# This structure is compatible with bjw-s common library's service management if we choose to use it.
# -- Configuration for the exporter Service.
service:
main: # A key for the service, 'main' is a common convention.
# -- Enable the exporter service.
enabled: true
# -- Service type.
type: ClusterIP # ClusterIP is typical for internal services scraped by Prometheus.
# -- Ports configuration for the service.
ports:
metrics: # Name of the service port, should align with a container port name.
# -- Port number on which the service is exposed.
# -- Service type. ClusterIP is typically sufficient.
type: ClusterIP
# -- Port on which the exporter service is exposed.
port: 9876
# -- Target port on the exporter pod. Can be a number or name.
# Refers to the 'metrics' port defined in controllers.exporter.containers.exporter.ports.
targetPort: metrics
protocol: TCP
# -- Target port on the exporter pod.
targetPort: 9876
# Network Policy (optional)
# -- Optional configuration for a network policy to allow traffic to the iperf3 server DaemonSet.
# This is often necessary if you are using a network policy controller.
networkPolicy:
# -- If true, create a NetworkPolicy resource.
enabled: false
# -- Source selectors for ingress rules.
# -- Specify source selectors if needed (e.g., pods in a specific namespace).
from: []
# -- Namespace selectors for ingress rules.
# -- Specify namespace selectors if needed.
namespaceSelector: {}
# -- Pod selectors for ingress rules.
# -- Specify pod selectors if needed.
podSelector: {}
# Dependency Configuration (for Prometheus Operator)
# -----------------------------------------------------------------------------
# Dependency Configuration
# -----------------------------------------------------------------------------
dependencies:
# -- Set to false by default. Set to true to install a Prometheus operator dependency (used if serviceMonitor.enabled=true).
# -- If false (default), and serviceMonitor.enabled is true, you must have a compatible Prometheus Operator already running in your cluster.
install: false
# -- If true, use TrueCharts Prometheus Operator instead of kube-prometheus-stack (used if dependencies.install is true).
# -- Set to true to use the TrueCharts Prometheus Operator instead of kube-prometheus-stack.
# This chart's ServiceMonitor resources require a Prometheus Operator to be functional.
# If serviceMonitor.enabled is true, one of these two dependencies will be pulled based on this flag.
useTrueChartsPrometheusOperator: false

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +1,11 @@
# Stage 1: Build stage with dependencies
FROM python:3.9-slim as builder
# Declare TARGETARCH for use in this stage
ARG TARGETARCH
WORKDIR /app
# Minimal dependencies for builder stage if any Python packages had C extensions.
# Assuming requirements.txt does not need gcc or other build tools for now.
# If pip install fails later, add necessary build tools (e.g., gcc, python3-dev) here.
# Install iperf3 and build dependencies
RUN apt-get update && \
# apt-get install -y --no-install-recommends gcc python3-dev # Example if needed
apt-get install -y --no-install-recommends gcc iperf3 libiperf-dev && \
rm -rf /var/lib/apt/lists/*
# Install Python dependencies
@ -21,11 +17,9 @@ FROM python:3.9-slim
WORKDIR /app
# Install iperf3 and its runtime dependency libsctp1 directly in the final stage.
# This simplifies the Dockerfile by removing the need to copy iperf3 components from the builder.
RUN apt-get update && \
apt-get install -y --no-install-recommends iperf3 libsctp1 && \
rm -rf /var/lib/apt/lists/*
# Copy iperf3 binary and library from the builder stage
COPY --from=builder /usr/bin/iperf3 /usr/bin/iperf3
COPY --from=builder /usr/lib/x86_64-linux-gnu/libiperf.so.0 /usr/lib/x86_64-linux-gnu/libiperf.so.0
# Copy installed Python packages from the builder stage
COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages

View File

@ -1,60 +1,28 @@
"""
Prometheus exporter for iperf3 network performance monitoring.
This script runs iperf3 tests between the node it's running on (source) and
other iperf3 server pods discovered in a Kubernetes cluster. It then exposes
these metrics for Prometheus consumption.
Configuration is primarily through environment variables and command-line arguments
for log level.
"""
import os
import time
import logging
import argparse
import sys
from kubernetes import client, config
from prometheus_client import start_http_server, Gauge
import iperf3
# --- Global Configuration & Setup ---
# Argument parsing for log level configuration
# The command-line --log-level argument takes precedence over the LOG_LEVEL env var.
# Defaults to INFO if neither is set.
parser = argparse.ArgumentParser(description="iperf3 Prometheus exporter.")
parser.add_argument(
'--log-level',
default=os.environ.get('LOG_LEVEL', 'INFO').upper(),
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
help='Set the logging level. Overrides LOG_LEVEL environment variable. (Default: INFO)'
)
args = parser.parse_args()
log_level_str = args.log_level
# Convert log level string (e.g., 'INFO') to its numeric representation (e.g., logging.INFO)
numeric_level = getattr(logging, log_level_str.upper(), None)
if not isinstance(numeric_level, int):
# This case should ideally not be reached if choices in argparse are respected.
logging.error(f"Invalid log level: {log_level_str}. Defaulting to INFO.")
numeric_level = logging.INFO
logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
# --- Configuration ---
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# --- Prometheus Metrics Definition ---
# These gauges will be used to expose iperf3 test results.
IPERF_BANDWIDTH_MBPS = Gauge(
'iperf_network_bandwidth_mbps',
'Network bandwidth measured by iperf3 in Megabits per second (Mbps)',
'Network bandwidth measured by iperf3 in Megabits per second',
['source_node', 'destination_node', 'protocol']
)
IPERF_JITTER_MS = Gauge(
'iperf_network_jitter_ms',
'Network jitter measured by iperf3 in milliseconds (ms) for UDP tests',
'Network jitter measured by iperf3 in milliseconds',
['source_node', 'destination_node', 'protocol']
)
IPERF_PACKETS_TOTAL = Gauge(
'iperf_network_packets_total',
'Total packets transmitted/received during the iperf3 UDP test',
'Total packets transmitted or received during the iperf3 test',
['source_node', 'destination_node', 'protocol']
)
IPERF_LOST_PACKETS = Gauge(
@ -70,21 +38,12 @@ IPERF_TEST_SUCCESS = Gauge(
def discover_iperf_servers():
"""
Discovers iperf3 server pods within a Kubernetes cluster.
It uses the in-cluster Kubernetes configuration to connect to the API.
The target namespace and label selector for iperf3 server pods are configured
via environment variables:
- IPERF_SERVER_NAMESPACE (default: 'default')
- IPERF_SERVER_LABEL_SELECTOR (default: 'app=iperf3-server')
Returns:
list: A list of dictionaries, where each dictionary contains the 'ip'
and 'node_name' of a discovered iperf3 server pod. Returns an
empty list if discovery fails or no servers are found.
Discover iperf3 server pods in the cluster using the Kubernetes API.
"""
try:
config.load_incluster_config() # Assumes running inside a Kubernetes pod
# Load in-cluster configuration
# Assumes the exporter runs in a pod with a service account having permissions
config.load_incluster_config()
v1 = client.CoreV1Api()
namespace = os.getenv('IPERF_SERVER_NAMESPACE', 'default')
@ -92,56 +51,67 @@ def discover_iperf_servers():
logging.info(f"Discovering iperf3 servers with label '{label_selector}' in namespace '{namespace}'")
# Use list_namespaced_pod to query only the specified namespace
ret = v1.list_namespaced_pod(namespace=namespace, label_selector=label_selector, watch=False)
# List pods across all namespaces with the specified label selector
# Note: list_pod_for_all_namespaces requires cluster-wide permissions
ret = v1.list_pod_for_all_namespaces(label_selector=label_selector, watch=False)
servers = []
for item in ret.items:
# No need to filter by namespace here as the API call is already namespaced
if item.status.pod_ip and item.status.phase == 'Running':
for i in ret.items:
# Ensure pod has an IP and is running
if i.status.pod_ip and i.status.phase == 'Running':
servers.append({
'ip': item.status.pod_ip,
'node_name': item.spec.node_name # Node where the iperf server pod is running
'ip': i.status.pod_ip,
'node_name': i.spec.node_name
})
logging.info(f"Discovered {len(servers)} iperf3 server pods in namespace '{namespace}'.")
logging.info(f"Discovered {len(servers)} iperf3 server pods.")
return servers
except config.ConfigException as e:
logging.error(f"Kubernetes config error: {e}. Is the exporter running in a cluster with RBAC permissions?")
return []
except Exception as e:
logging.error(f"Error discovering iperf servers: {e}")
return [] # Return empty list on error to avoid crashing the main loop
return [] # Return empty list on error to avoid crashing the loop
def run_iperf_test(server_ip, server_port, protocol, source_node_name, dest_node_name):
def run_iperf_test(server_ip, server_port, protocol, source_node, dest_node):
"""
Runs a single iperf3 test against a specified server and publishes metrics.
Args:
server_ip (str): The IP address of the iperf3 server.
server_port (int): The port number of the iperf3 server.
protocol (str): The protocol to use ('tcp' or 'udp').
source_node_name (str): The name of the source node (where this exporter is running).
dest_node_name (str): The name of the destination node (where the server is running).
The test duration is controlled by the IPERF_TEST_DURATION environment variable
(default: 5 seconds).
Runs a single iperf3 test and updates Prometheus metrics.
"""
logging.info(f"Running iperf3 {protocol.upper()} test from {source_node_name} to {dest_node_name} ({server_ip}:{server_port})")
logging.info(f"Running iperf3 test from {source_node} to {dest_node} ({server_ip}:{server_port}) using {protocol.upper()}")
iperf_client = iperf3.Client()
iperf_client.server_hostname = server_ip
iperf_client.port = server_port
iperf_client.protocol = protocol
iperf_client.duration = int(os.getenv('IPERF_TEST_DURATION', 5)) # Test duration in seconds
iperf_client.json_output = True # Enables easy parsing of results
client = iperf3.Client()
client.server_hostname = server_ip
client.port = server_port
client.protocol = protocol
# Duration of the test (seconds)
client.duration = int(os.getenv('IPERF_TEST_DURATION', 5))
# Output results as JSON for easy parsing
client.json_output = True
result = client.run()
# Parse results and update metrics
parse_and_publish_metrics(result, source_node, dest_node, protocol)
def parse_and_publish_metrics(result, source_node, dest_node, protocol):
"""
Parses the iperf3 result and updates Prometheus gauges.
Handles both successful and failed tests.
"""
labels = {'source_node': source_node, 'destination_node': dest_node, 'protocol': protocol}
if result and result.error:
logging.error(f"Test from {source_node} to {dest_node} failed: {result.error}")
IPERF_TEST_SUCCESS.labels(**labels).set(0)
# Set metrics to 0 on failure
try:
result = iperf_client.run()
parse_and_publish_metrics(result, source_node_name, dest_node_name, protocol)
except Exception as e:
# Catch unexpected errors during client.run() or parsing
logging.error(f"Exception during iperf3 test or metric parsing for {dest_node_name}: {e}")
labels = {'source_node': source_node_name, 'destination_node': dest_node_name, 'protocol': protocol}
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
IPERF_JITTER_MS.labels(**labels).set(0)
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
IPERF_LOST_PACKETS.labels(**labels).set(0)
except KeyError:
# Labels might not be registered yet if this is the first failure
pass
return
if not result:
logging.error(f"Test from {source_node} to {dest_node} failed to return a result object.")
IPERF_TEST_SUCCESS.labels(**labels).set(0)
try:
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
@ -149,151 +119,42 @@ def run_iperf_test(server_ip, server_port, protocol, source_node_name, dest_node
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
IPERF_LOST_PACKETS.labels(**labels).set(0)
except KeyError:
logging.debug(f"KeyError setting failure metrics for {labels} after client.run() exception.")
def parse_and_publish_metrics(result, source_node, dest_node, protocol):
"""
Parses the iperf3 test result and updates Prometheus gauges.
Args:
result (iperf3.TestResult): The result object from the iperf3 client.
source_node (str): Name of the source node.
dest_node (str): Name of the destination node.
protocol (str): Protocol used for the test ('tcp' or 'udp').
"""
labels = {'source_node': source_node, 'destination_node': dest_node, 'protocol': protocol}
# Handle failed tests (e.g., server unreachable) or missing result object
if not result or result.error:
error_message = result.error if result and result.error else "No result object from iperf3 client"
logging.warning(f"Test from {source_node} to {dest_node} ({protocol.upper()}) failed: {error_message}")
IPERF_TEST_SUCCESS.labels(**labels).set(0)
# Set all relevant metrics to 0 on failure to clear stale values from previous successes
try:
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
IPERF_JITTER_MS.labels(**labels).set(0) # Applicable for UDP, zeroed for TCP later
IPERF_PACKETS_TOTAL.labels(**labels).set(0) # Applicable for UDP, zeroed for TCP later
IPERF_LOST_PACKETS.labels(**labels).set(0) # Applicable for UDP, zeroed for TCP later
except KeyError:
# This can happen if labels were never registered due to continuous failures
logging.debug(f"KeyError when setting failure metrics for {labels}. Gauges might not be initialized.")
pass
return
# If we reach here, the test itself was successful in execution
IPERF_TEST_SUCCESS.labels(**labels).set(1)
# Determine bandwidth:
# Order of preference: received_Mbps, sent_Mbps, Mbps, then JSON fallbacks.
# received_Mbps is often most relevant for TCP client perspective.
# sent_Mbps can be relevant for UDP or as a TCP fallback.
# The summary data is typically in result.json['end']['sum_sent'] or result.json['end']['sum_received']
# The iperf3-python client often exposes this directly as attributes like sent_Mbps or received_Mbps
# For TCP, we usually care about the received bandwidth on the client side (which is the exporter)
# For UDP, the client report contains jitter, lost packets, etc.
bandwidth_mbps = 0
if hasattr(result, 'received_Mbps') and result.received_Mbps is not None:
bandwidth_mbps = result.received_Mbps
elif hasattr(result, 'sent_Mbps') and result.sent_Mbps is not None:
# Fallback, though received_Mbps is usually more relevant for TCP client
bandwidth_mbps = result.sent_Mbps
elif hasattr(result, 'Mbps') and result.Mbps is not None: # General attribute from iperf3 library
bandwidth_mbps = result.Mbps
# Fallback to raw JSON if direct attributes are None or missing
elif result.json:
# Prefer received sum, then sent sum from the JSON output's 'end' summary
if 'end' in result.json and 'sum_received' in result.json['end'] and \
result.json['end']['sum_received'].get('bits_per_second') is not None:
bandwidth_mbps = result.json['end']['sum_received']['bits_per_second'] / 1000000.0
elif 'end' in result.json and 'sum_sent' in result.json['end'] and \
result.json['end']['sum_sent'].get('bits_per_second') is not None:
bandwidth_mbps = result.json['end']['sum_sent']['bits_per_second'] / 1000000.0
# Add a check for the raw JSON output structure as a fallback
elif result.json and 'end' in result.json and 'sum_received' in result.json['end'] and result.json['end']['sum_received']['bits_per_second'] is not None:
bandwidth_mbps = result.json['end']['sum_received']['bits_per_second'] / 1000000
elif result.json and 'end' in result.json and 'sum_sent' in result.json['end'] and result.json['end']['sum_sent']['bits_per_second'] is not None:
bandwidth_mbps = result.json['end']['sum_sent']['bits_per_second'] / 1000000
IPERF_BANDWIDTH_MBPS.labels(**labels).set(bandwidth_mbps)
# UDP specific metrics
if protocol == 'udp':
# These attributes are specific to UDP tests in iperf3
IPERF_JITTER_MS.labels(**labels).set(getattr(result, 'jitter_ms', 0) if result.jitter_ms is not None else 0)
IPERF_PACKETS_TOTAL.labels(**labels).set(getattr(result, 'packets', 0) if result.packets is not None else 0)
IPERF_LOST_PACKETS.labels(**labels).set(getattr(result, 'lost_packets', 0) if result.lost_packets is not None else 0)
# iperf3-python exposes UDP results directly
IPERF_JITTER_MS.labels(**labels).set(result.jitter_ms if hasattr(result, 'jitter_ms') and result.jitter_ms is not None else 0)
IPERF_PACKETS_TOTAL.labels(**labels).set(result.packets if hasattr(result, 'packets') and result.packets is not None else 0)
IPERF_LOST_PACKETS.labels(**labels).set(result.lost_packets if hasattr(result, 'lost_packets') and result.lost_packets is not None else 0)
else:
# For TCP tests, ensure UDP-specific metrics are set to 0
# Ensure UDP metrics are zeroed or absent for TCP tests
try:
IPERF_JITTER_MS.labels(**labels).set(0)
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
IPERF_LOST_PACKETS.labels(**labels).set(0)
except KeyError:
# Can occur if labels not yet registered (e.g. first test is TCP)
logging.debug(f"KeyError for {labels} when zeroing UDP metrics for TCP test.")
pass
def main_loop():
"""
Main operational loop of the iperf3 exporter.
This loop periodically:
1. Fetches configuration from environment variables:
- IPERF_TEST_INTERVAL (default: 300s): Time between test cycles.
- IPERF_SERVER_PORT (default: 5201): Port for iperf3 servers.
- IPERF_TEST_PROTOCOL (default: 'tcp'): 'tcp' or 'udp'.
- SOURCE_NODE_NAME (critical): Name of the node this exporter runs on.
2. Discovers iperf3 server pods in the Kubernetes cluster.
3. Runs iperf3 tests against each discovered server (unless it's on the same node).
4. Sleeps for the configured test interval.
If SOURCE_NODE_NAME is not set, the script will log an error and exit.
"""
# Fetch operational configuration from environment variables
test_interval = int(os.getenv('IPERF_TEST_INTERVAL', 300))
server_port = int(os.getenv('IPERF_SERVER_PORT', 5201))
protocol = os.getenv('IPERF_TEST_PROTOCOL', 'tcp').lower() # Ensure lowercase
source_node_name = os.getenv('SOURCE_NODE_NAME')
# SOURCE_NODE_NAME is crucial for labeling metrics correctly.
if not source_node_name:
logging.error("CRITICAL: SOURCE_NODE_NAME environment variable not set. This is required. Exiting.")
sys.exit(1)
logging.info(
f"Exporter configured. Source Node: {source_node_name}, "
f"Test Interval: {test_interval}s, Server Port: {server_port}, Protocol: {protocol.upper()}"
)
while True:
logging.info("Starting new iperf test cycle...")
servers = discover_iperf_servers()
if not servers:
logging.warning("No iperf servers discovered in this cycle. Check K8s setup and RBAC permissions.")
else:
for server in servers:
dest_node_name = server.get('node_name', 'unknown_destination_node') # Default if key missing
server_ip = server.get('ip')
if not server_ip:
logging.warning(f"Discovered server entry missing an IP: {server}. Skipping.")
continue
# Avoid testing a node against itself
if dest_node_name == source_node_name:
logging.info(f"Skipping test to self: {source_node_name} to {server_ip} (on same node: {dest_node_name}).")
continue
run_iperf_test(server_ip, server_port, protocol, source_node_name, dest_node_name)
logging.info(f"Test cycle completed. Sleeping for {test_interval} seconds.")
time.sleep(test_interval)
if __name__ == '__main__':
# Initial logging (like log level) is configured globally at the start of the script.
# Fetch Prometheus exporter listen port from environment variable
listen_port = int(os.getenv('LISTEN_PORT', 9876))
try:
# Start the Prometheus HTTP server to expose metrics.
start_http_server(listen_port)
logging.info(f"Prometheus exporter listening on port {listen_port}")
except Exception as e:
logging.error(f"Failed to start Prometheus HTTP server on port {listen_port}: {e}")
sys.exit(1) # Exit if the metrics server cannot start
# Enter the main operational loop.
# main_loop() contains its own critical checks (e.g., SOURCE_NODE_NAME) and will exit if necessary.
main_loop()

View File

@ -1,347 +0,0 @@
#!/usr/bin/env bash
# Copyright The Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The install script is based off of the MIT-licensed script from glide,
# the package manager for Go: https://github.com/Masterminds/glide.sh/blob/master/get
: ${BINARY_NAME:="helm"}
: ${USE_SUDO:="true"}
: ${DEBUG:="false"}
: ${VERIFY_CHECKSUM:="true"}
: ${VERIFY_SIGNATURES:="false"}
: ${HELM_INSTALL_DIR:="/usr/local/bin"}
: ${GPG_PUBRING:="pubring.kbx"}
HAS_CURL="$(type "curl" &> /dev/null && echo true || echo false)"
HAS_WGET="$(type "wget" &> /dev/null && echo true || echo false)"
HAS_OPENSSL="$(type "openssl" &> /dev/null && echo true || echo false)"
HAS_GPG="$(type "gpg" &> /dev/null && echo true || echo false)"
HAS_GIT="$(type "git" &> /dev/null && echo true || echo false)"
HAS_TAR="$(type "tar" &> /dev/null && echo true || echo false)"
# initArch discovers the architecture for this system.
initArch() {
ARCH=$(uname -m)
case $ARCH in
armv5*) ARCH="armv5";;
armv6*) ARCH="armv6";;
armv7*) ARCH="arm";;
aarch64) ARCH="arm64";;
x86) ARCH="386";;
x86_64) ARCH="amd64";;
i686) ARCH="386";;
i386) ARCH="386";;
esac
}
# initOS discovers the operating system for this system.
initOS() {
OS=$(echo `uname`|tr '[:upper:]' '[:lower:]')
case "$OS" in
# Minimalist GNU for Windows
mingw*|cygwin*) OS='windows';;
esac
}
# runs the given command as root (detects if we are root already)
runAsRoot() {
if [ $EUID -ne 0 -a "$USE_SUDO" = "true" ]; then
sudo "${@}"
else
"${@}"
fi
}
# verifySupported checks that the os/arch combination is supported for
# binary builds, as well whether or not necessary tools are present.
verifySupported() {
local supported="darwin-amd64\ndarwin-arm64\nlinux-386\nlinux-amd64\nlinux-arm\nlinux-arm64\nlinux-ppc64le\nlinux-s390x\nlinux-riscv64\nwindows-amd64\nwindows-arm64"
if ! echo "${supported}" | grep -q "${OS}-${ARCH}"; then
echo "No prebuilt binary for ${OS}-${ARCH}."
echo "To build from source, go to https://github.com/helm/helm"
exit 1
fi
if [ "${HAS_CURL}" != "true" ] && [ "${HAS_WGET}" != "true" ]; then
echo "Either curl or wget is required"
exit 1
fi
if [ "${VERIFY_CHECKSUM}" == "true" ] && [ "${HAS_OPENSSL}" != "true" ]; then
echo "In order to verify checksum, openssl must first be installed."
echo "Please install openssl or set VERIFY_CHECKSUM=false in your environment."
exit 1
fi
if [ "${VERIFY_SIGNATURES}" == "true" ]; then
if [ "${HAS_GPG}" != "true" ]; then
echo "In order to verify signatures, gpg must first be installed."
echo "Please install gpg or set VERIFY_SIGNATURES=false in your environment."
exit 1
fi
if [ "${OS}" != "linux" ]; then
echo "Signature verification is currently only supported on Linux."
echo "Please set VERIFY_SIGNATURES=false or verify the signatures manually."
exit 1
fi
fi
if [ "${HAS_GIT}" != "true" ]; then
echo "[WARNING] Could not find git. It is required for plugin installation."
fi
if [ "${HAS_TAR}" != "true" ]; then
echo "[ERROR] Could not find tar. It is required to extract the helm binary archive."
exit 1
fi
}
# checkDesiredVersion checks if the desired version is available.
checkDesiredVersion() {
if [ "x$DESIRED_VERSION" == "x" ]; then
# Get tag from release URL
local latest_release_url="https://get.helm.sh/helm-latest-version"
local latest_release_response=""
if [ "${HAS_CURL}" == "true" ]; then
latest_release_response=$( curl -L --silent --show-error --fail "$latest_release_url" 2>&1 || true )
elif [ "${HAS_WGET}" == "true" ]; then
latest_release_response=$( wget "$latest_release_url" -q -O - 2>&1 || true )
fi
TAG=$( echo "$latest_release_response" | grep '^v[0-9]' )
if [ "x$TAG" == "x" ]; then
printf "Could not retrieve the latest release tag information from %s: %s\n" "${latest_release_url}" "${latest_release_response}"
exit 1
fi
else
TAG=$DESIRED_VERSION
fi
}
# checkHelmInstalledVersion checks which version of helm is installed and
# if it needs to be changed.
checkHelmInstalledVersion() {
if [[ -f "${HELM_INSTALL_DIR}/${BINARY_NAME}" ]]; then
local version=$("${HELM_INSTALL_DIR}/${BINARY_NAME}" version --template="{{ .Version }}")
if [[ "$version" == "$TAG" ]]; then
echo "Helm ${version} is already ${DESIRED_VERSION:-latest}"
return 0
else
echo "Helm ${TAG} is available. Changing from version ${version}."
return 1
fi
else
return 1
fi
}
# downloadFile downloads the latest binary package and also the checksum
# for that binary.
downloadFile() {
HELM_DIST="helm-$TAG-$OS-$ARCH.tar.gz"
DOWNLOAD_URL="https://get.helm.sh/$HELM_DIST"
CHECKSUM_URL="$DOWNLOAD_URL.sha256"
HELM_TMP_ROOT="$(mktemp -dt helm-installer-XXXXXX)"
HELM_TMP_FILE="$HELM_TMP_ROOT/$HELM_DIST"
HELM_SUM_FILE="$HELM_TMP_ROOT/$HELM_DIST.sha256"
echo "Downloading $DOWNLOAD_URL"
if [ "${HAS_CURL}" == "true" ]; then
curl -SsL "$CHECKSUM_URL" -o "$HELM_SUM_FILE"
curl -SsL "$DOWNLOAD_URL" -o "$HELM_TMP_FILE"
elif [ "${HAS_WGET}" == "true" ]; then
wget -q -O "$HELM_SUM_FILE" "$CHECKSUM_URL"
wget -q -O "$HELM_TMP_FILE" "$DOWNLOAD_URL"
fi
}
# verifyFile verifies the SHA256 checksum of the binary package
# and the GPG signatures for both the package and checksum file
# (depending on settings in environment).
verifyFile() {
if [ "${VERIFY_CHECKSUM}" == "true" ]; then
verifyChecksum
fi
if [ "${VERIFY_SIGNATURES}" == "true" ]; then
verifySignatures
fi
}
# installFile installs the Helm binary.
installFile() {
HELM_TMP="$HELM_TMP_ROOT/$BINARY_NAME"
mkdir -p "$HELM_TMP"
tar xf "$HELM_TMP_FILE" -C "$HELM_TMP"
HELM_TMP_BIN="$HELM_TMP/$OS-$ARCH/helm"
echo "Preparing to install $BINARY_NAME into ${HELM_INSTALL_DIR}"
runAsRoot cp "$HELM_TMP_BIN" "$HELM_INSTALL_DIR/$BINARY_NAME"
echo "$BINARY_NAME installed into $HELM_INSTALL_DIR/$BINARY_NAME"
}
# verifyChecksum verifies the SHA256 checksum of the binary package.
verifyChecksum() {
printf "Verifying checksum... "
local sum=$(openssl sha1 -sha256 ${HELM_TMP_FILE} | awk '{print $2}')
local expected_sum=$(cat ${HELM_SUM_FILE})
if [ "$sum" != "$expected_sum" ]; then
echo "SHA sum of ${HELM_TMP_FILE} does not match. Aborting."
exit 1
fi
echo "Done."
}
# verifySignatures obtains the latest KEYS file from GitHub main branch
# as well as the signature .asc files from the specific GitHub release,
# then verifies that the release artifacts were signed by a maintainer's key.
verifySignatures() {
printf "Verifying signatures... "
local keys_filename="KEYS"
local github_keys_url="https://raw.githubusercontent.com/helm/helm/main/${keys_filename}"
if [ "${HAS_CURL}" == "true" ]; then
curl -SsL "${github_keys_url}" -o "${HELM_TMP_ROOT}/${keys_filename}"
elif [ "${HAS_WGET}" == "true" ]; then
wget -q -O "${HELM_TMP_ROOT}/${keys_filename}" "${github_keys_url}"
fi
local gpg_keyring="${HELM_TMP_ROOT}/keyring.gpg"
local gpg_homedir="${HELM_TMP_ROOT}/gnupg"
mkdir -p -m 0700 "${gpg_homedir}"
local gpg_stderr_device="/dev/null"
if [ "${DEBUG}" == "true" ]; then
gpg_stderr_device="/dev/stderr"
fi
gpg --batch --quiet --homedir="${gpg_homedir}" --import "${HELM_TMP_ROOT}/${keys_filename}" 2> "${gpg_stderr_device}"
gpg --batch --no-default-keyring --keyring "${gpg_homedir}/${GPG_PUBRING}" --export > "${gpg_keyring}"
local github_release_url="https://github.com/helm/helm/releases/download/${TAG}"
if [ "${HAS_CURL}" == "true" ]; then
curl -SsL "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" -o "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc"
curl -SsL "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" -o "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc"
elif [ "${HAS_WGET}" == "true" ]; then
wget -q -O "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc"
wget -q -O "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc"
fi
local error_text="If you think this might be a potential security issue,"
error_text="${error_text}\nplease see here: https://github.com/helm/community/blob/master/SECURITY.md"
local num_goodlines_sha=$(gpg --verify --keyring="${gpg_keyring}" --status-fd=1 "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" 2> "${gpg_stderr_device}" | grep -c -E '^\[GNUPG:\] (GOODSIG|VALIDSIG)')
if [[ ${num_goodlines_sha} -lt 2 ]]; then
echo "Unable to verify the signature of helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256!"
echo -e "${error_text}"
exit 1
fi
local num_goodlines_tar=$(gpg --verify --keyring="${gpg_keyring}" --status-fd=1 "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" 2> "${gpg_stderr_device}" | grep -c -E '^\[GNUPG:\] (GOODSIG|VALIDSIG)')
if [[ ${num_goodlines_tar} -lt 2 ]]; then
echo "Unable to verify the signature of helm-${TAG}-${OS}-${ARCH}.tar.gz!"
echo -e "${error_text}"
exit 1
fi
echo "Done."
}
# fail_trap is executed if an error occurs.
fail_trap() {
result=$?
if [ "$result" != "0" ]; then
if [[ -n "$INPUT_ARGUMENTS" ]]; then
echo "Failed to install $BINARY_NAME with the arguments provided: $INPUT_ARGUMENTS"
help
else
echo "Failed to install $BINARY_NAME"
fi
echo -e "\tFor support, go to https://github.com/helm/helm."
fi
cleanup
exit $result
}
# testVersion tests the installed client to make sure it is working.
testVersion() {
set +e
HELM="$(command -v $BINARY_NAME)"
if [ "$?" = "1" ]; then
echo "$BINARY_NAME not found. Is $HELM_INSTALL_DIR on your "'$PATH?'
exit 1
fi
set -e
}
# help provides possible cli installation arguments
help () {
echo "Accepted cli arguments are:"
echo -e "\t[--help|-h ] ->> prints this help"
echo -e "\t[--version|-v <desired_version>] . When not defined it fetches the latest release tag from the Helm CDN"
echo -e "\te.g. --version v3.0.0 or -v canary"
echo -e "\t[--no-sudo] ->> install without sudo"
}
# cleanup temporary files to avoid https://github.com/helm/helm/issues/2977
cleanup() {
if [[ -d "${HELM_TMP_ROOT:-}" ]]; then
rm -rf "$HELM_TMP_ROOT"
fi
}
# Execution
#Stop execution on any error
trap "fail_trap" EXIT
set -e
# Set debug if desired
if [ "${DEBUG}" == "true" ]; then
set -x
fi
# Parsing input arguments (if any)
export INPUT_ARGUMENTS="${@}"
set -u
while [[ $# -gt 0 ]]; do
case $1 in
'--version'|-v)
shift
if [[ $# -ne 0 ]]; then
export DESIRED_VERSION="${1}"
if [[ "$1" != "v"* ]]; then
echo "Expected version arg ('${DESIRED_VERSION}') to begin with 'v', fixing..."
export DESIRED_VERSION="v${1}"
fi
else
echo -e "Please provide the desired version. e.g. --version v3.0.0 or -v canary"
exit 0
fi
;;
'--no-sudo')
USE_SUDO="false"
;;
'--help'|-h)
help
exit 0
;;
*) exit 1
;;
esac
shift
done
set +u
initArch
initOS
verifySupported
checkDesiredVersion
if ! checkHelmInstalledVersion; then
downloadFile
verifyFile
installFile
fi
testVersion
cleanup