mirror of
https://github.com/malarinv/iperf3-monitor.git
synced 2026-03-08 05:22:35 +00:00
Compare commits
24 Commits
bootstrap
...
add-grafan
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5931965861 | ||
|
|
dcfbc945b1 | ||
|
|
dbb6a161d5 | ||
| e54a02ad8c | |||
| 0a3249f30b | |||
| 1eeb4b20df | |||
| 0c490e95d2 | |||
| 81b771d1ee | |||
| 3e21f978ee | |||
| 458b786ff4 | |||
| 96be13a23c | |||
| 8d51afc24e | |||
| a2d57908f6 | |||
|
|
4298031a2d | ||
| e6d1a8fb91 | |||
|
|
a9f2a49549 | ||
| 7f0784d382 | |||
| 050fbcbf3c | |||
| e22d2ff71d | |||
| 1487901337 | |||
| fec4cf64b9 | |||
|
|
c08f4a5667 | ||
| f6c26c02b1 | |||
| 774afbab70 |
83
.github/workflows/ci.yaml
vendored
Normal file
83
.github/workflows/ci.yaml
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: ["main"] # Or your main development branch
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
validate-chart:
|
||||
name: Validate Helm Chart
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@v3
|
||||
with:
|
||||
version: v3.10.0
|
||||
|
||||
- name: Helm Lint
|
||||
run: helm lint ./charts/iperf3-monitor
|
||||
|
||||
build:
|
||||
name: Build Docker Image
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read # Needed to checkout the repository
|
||||
packages: write # Needed to push Docker images to GHCR
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
# Tag with the PR number if it's a pull request event
|
||||
type=match,pattern=pull_request,value=pr-{{number}}
|
||||
# Tag with the git SHA
|
||||
type=sha,prefix=
|
||||
# Tag with 'latest' if on the main branch (though this workflow only runs on PRs to main)
|
||||
type=ref,event=branch,pattern=main,value=latest
|
||||
|
||||
- name: Log in to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build Docker image
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: ./exporter
|
||||
# Push the image if the event is a pull request.
|
||||
# The workflow currently only triggers on pull_request events.
|
||||
push: ${{ github.event_name == 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
test:
|
||||
name: Run Tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
# Replace this step with your actual test command(s)
|
||||
- name: Placeholder Test Step
|
||||
run: echo "No tests configured yet. Add your test commands here."
|
||||
11
.github/workflows/release.yml
vendored
11
.github/workflows/release.yml
vendored
@@ -36,6 +36,12 @@ jobs:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Log in to GitHub Container Registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
@@ -56,6 +62,7 @@ jobs:
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
package-and-publish-chart:
|
||||
name: Package and Publish Helm Chart
|
||||
@@ -82,8 +89,8 @@ jobs:
|
||||
- name: Set Chart Version from Tag
|
||||
run: |
|
||||
VERSION=$(echo "${{ github.ref_name }}" | sed 's/^v//')
|
||||
yq e -i '.version = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
||||
yq e -i '.appVersion = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
||||
VERSION=$VERSION yq e -i '.version = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
||||
VERSION=$VERSION yq e -i '.appVersion = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
||||
cat ./charts/iperf3-monitor/Chart.yaml # Optional: print updated Chart.yaml
|
||||
|
||||
- name: Publish Helm chart
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -36,4 +36,4 @@ Thumbs.db
|
||||
|
||||
# Helm
|
||||
!charts/iperf3-monitor/.helmignore
|
||||
charts/*.tgz # Ignore packaged chart files
|
||||
charts/iperf3-monitor/charts/
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
10
README.md
10
README.md
@@ -37,7 +37,7 @@ This separation of concerns ensures scalability, resilience, and aligns with Kub
|
||||
1. Add the Helm chart repository (replace with your actual repo URL once published):
|
||||
|
||||
```/dev/null/helm-install.sh#L1-1
|
||||
helm repo add iperf3-monitor https://your-github-org.github.io/iperf3-monitor/
|
||||
helm repo add iperf3-monitor https://malarinv.github.io/iperf3-monitor/
|
||||
```
|
||||
|
||||
2. Update your Helm repositories:
|
||||
@@ -78,7 +78,7 @@ exporter:
|
||||
# -- Configuration for the exporter container image.
|
||||
image:
|
||||
# -- The container image repository for the exporter.
|
||||
repository: ghcr.io/my-org/iperf3-prometheus-exporter # Replace with your repo URL
|
||||
repository: ghcr.io/malarinv/iperf3-monitor
|
||||
# -- The container image tag for the exporter. If not set, the chart's appVersion is used.
|
||||
tag: ""
|
||||
# -- The image pull policy for the exporter container.
|
||||
@@ -430,8 +430,4 @@ The project includes a GitHub Actions workflow (`.github/workflows/release.yml`)
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the terms defined in the `LICENSE` file.
|
||||
|
||||
```iperf3-monitor/LICENSE
|
||||
This project is currently unlicensed. Please see the project's documentation or repository for licensing information when it becomes available.
|
||||
```
|
||||
This project is licensed under the GNU Affero General Public License v3. See the `LICENSE` file for details.
|
||||
|
||||
9
charts/iperf3-monitor/Chart.lock
Normal file
9
charts/iperf3-monitor/Chart.lock
Normal file
@@ -0,0 +1,9 @@
|
||||
dependencies:
|
||||
- name: kube-prometheus-stack
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
version: 75.3.6
|
||||
- name: prometheus-operator
|
||||
repository: oci://tccr.io/truecharts
|
||||
version: 11.5.1
|
||||
digest: sha256:3000e63445f8ba8df601cb483f4f77d14c5c4662bff2d16ffcf5cf1f7def314b
|
||||
generated: "2025-06-20T17:25:44.538372209+05:30"
|
||||
@@ -12,19 +12,23 @@ keywords:
|
||||
- kubernetes
|
||||
- prometheus
|
||||
- grafana
|
||||
home: https://github.com/malarinv/iperf3-monitor # Replace with your repo URL
|
||||
home: https://github.com/malarinv/iperf3-monitor
|
||||
sources:
|
||||
- https://github.com/malarinv/iperf3-monitor # Replace with your repo URL
|
||||
- https://github.com/malarinv/iperf3-monitor
|
||||
maintainers:
|
||||
- name: Malar Invention # Replace with your name
|
||||
email: malarkannan.invention@gmail.com # Replace with your email
|
||||
- name: Malar Invention
|
||||
email: malarkannan.invention@gmail.com
|
||||
icon: https://raw.githubusercontent.com/malarinv/iperf3-monitor/main/icon.png # Optional icon URL
|
||||
annotations:
|
||||
artifacthub.io/changes: |
|
||||
- Add initial Helm chart structure.
|
||||
artifacthub.io/category: networking
|
||||
dependencies:
|
||||
- name: prometheus-community/kube-prometheus-stack # Example dependency if you package the whole stack
|
||||
- name: kube-prometheus-stack # Example dependency if you package the whole stack
|
||||
version: ">=30.0.0" # Specify a compatible version range
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
condition: serviceMonitor.enabled # Only include if ServiceMonitor is enabled (assuming Prometheus Operator)
|
||||
condition: "dependencies.install, serviceMonitor.enabled, !dependencies.useTrueChartsPrometheusOperator"
|
||||
- name: prometheus-operator
|
||||
version: ">=8.11.1"
|
||||
repository: "oci://tccr.io/truecharts"
|
||||
condition: "dependencies.install, serviceMonitor.enabled, dependencies.useTrueChartsPrometheusOperator"
|
||||
|
||||
194
charts/iperf3-monitor/grafana/iperf3-dashboard.json
Normal file
194
charts/iperf3-monitor/grafana/iperf3-dashboard.json
Normal file
@@ -0,0 +1,194 @@
|
||||
{
|
||||
"__inputs": [],
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "8.0.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "prometheus",
|
||||
"name": "Prometheus",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(iperf_network_bandwidth_mbps) by (source_node, destination_node)",
|
||||
"format": "heatmap",
|
||||
"legendFormat": "{{source_node}} -> {{destination_node}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"cards": { "cardPadding": null, "cardRound": null },
|
||||
"color": {
|
||||
"mode": "spectrum",
|
||||
"scheme": "red-yellow-green",
|
||||
"exponent": 0.5,
|
||||
"reverse": false
|
||||
},
|
||||
"dataFormat": "tsbuckets",
|
||||
"yAxis": { "show": true, "format": "short" },
|
||||
"xAxis": { "show": true }
|
||||
},
|
||||
{
|
||||
"title": "Bandwidth Over Time (Source: $source_node, Dest: $destination_node)",
|
||||
"type": "timeseries",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=~\"^$protocol$\"}",
|
||||
"legendFormat": "Bandwidth",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "mbps"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Jitter Over Time (Source: $source_node, Dest: $destination_node)",
|
||||
"type": "timeseries",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 9
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "iperf_network_jitter_ms{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=\"udp\"}",
|
||||
"legendFormat": "Jitter",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": ["iperf3", "network", "kubernetes"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"definition": "label_values(iperf_network_bandwidth_mbps, source_node)",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"multi": false,
|
||||
"name": "source_node",
|
||||
"options": [],
|
||||
"query": "label_values(iperf_network_bandwidth_mbps, source_node)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"definition": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"multi": false,
|
||||
"name": "destination_node",
|
||||
"options": [],
|
||||
"query": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": { "selected": true, "text": "tcp", "value": "tcp" },
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"multi": false,
|
||||
"name": "protocol",
|
||||
"options": [
|
||||
{ "selected": true, "text": "tcp", "value": "tcp" },
|
||||
{ "selected": false, "text": "udp", "value": "udp" }
|
||||
],
|
||||
"query": "tcp,udp",
|
||||
"skipUrlSync": false,
|
||||
"type": "custom"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Kubernetes iperf3 Network Performance",
|
||||
"uid": "k8s-iperf3-dashboard",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -29,9 +29,9 @@ Create chart's labels
|
||||
{{- define "iperf3-monitor.labels" -}}
|
||||
helm.sh/chart: {{ include "iperf3-monitor.name" . }}-{{ .Chart.Version | replace "+" "_" }}
|
||||
{{ include "iperf3-monitor.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion -}}
|
||||
{{ if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end -}}
|
||||
{{ end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end -}}
|
||||
|
||||
@@ -41,13 +41,13 @@ Selector labels
|
||||
{{- define "iperf3-monitor.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "iperf3-monitor.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end -}}
|
||||
{{ end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "iperf3-monitor.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create -}}
|
||||
{{- if .Values.rbac.create -}}
|
||||
{{- default (include "iperf3-monitor.fullname" .) .Values.serviceAccount.name -}}
|
||||
{{- else -}}
|
||||
{{- default "default" .Values.serviceAccount.name -}}
|
||||
|
||||
@@ -34,6 +34,8 @@ spec:
|
||||
value: "{{ .Values.exporter.testInterval }}"
|
||||
- name: IPERF_TEST_PROTOCOL
|
||||
value: "{{ .Values.exporter.testProtocol }}"
|
||||
- name: LOG_LEVEL
|
||||
value: "{{ .Values.exporter.logLevel }}"
|
||||
- name: IPERF_SERVER_PORT
|
||||
value: "5201" # Hardcoded as per server DaemonSet
|
||||
- name: IPERF_SERVER_NAMESPACE
|
||||
@@ -41,7 +43,7 @@ spec:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: IPERF_SERVER_LABEL_SELECTOR
|
||||
value: "app.kubernetes.io/name={{ include \"iperf3-monitor.name\" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server"
|
||||
value: 'app.kubernetes.io/name={{ include "iperf3-monitor.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server'
|
||||
{{- with .Values.exporter.resources }}
|
||||
resources:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-grafana-dashboard
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
app.kubernetes.io/name: {{ include "iperf3-monitor.name" . }}
|
||||
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
data:
|
||||
iperf3-dashboard.json: |
|
||||
{{ .Files.Get "grafana/iperf3-dashboard.json" | nindent 4 }}
|
||||
@@ -12,7 +12,7 @@ exporter:
|
||||
# -- Configuration for the exporter container image.
|
||||
image:
|
||||
# -- The container image repository for the exporter.
|
||||
repository: ghcr.io/malarinv/iperf3-prometheus-exporter # Replace with your repo URL
|
||||
repository: ghcr.io/malarinv/iperf3-monitor
|
||||
# -- The container image tag for the exporter. If not set, the chart's appVersion is used.
|
||||
tag: ""
|
||||
# -- The image pull policy for the exporter container.
|
||||
@@ -24,6 +24,9 @@ exporter:
|
||||
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
|
||||
testInterval: 300
|
||||
|
||||
# -- Log level for the iperf3 exporter (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
||||
logLevel: INFO
|
||||
|
||||
# -- Timeout in seconds for a single iperf3 test run.
|
||||
testTimeout: 10
|
||||
|
||||
@@ -85,7 +88,7 @@ rbac:
|
||||
serviceAccount:
|
||||
# -- The name of the ServiceAccount to use for the exporter pod.
|
||||
# Only used if rbac.create is false. If not set, it defaults to the chart's fullname.
|
||||
name: ""
|
||||
name: "iperf3-monitor"
|
||||
|
||||
serviceMonitor:
|
||||
# -- If true, create a ServiceMonitor resource for integration with Prometheus Operator.
|
||||
@@ -118,3 +121,19 @@ networkPolicy:
|
||||
namespaceSelector: {}
|
||||
# -- Specify pod selectors if needed.
|
||||
podSelector: {}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Dependency Configuration
|
||||
# -----------------------------------------------------------------------------
|
||||
dependencies:
|
||||
# -- Set to true to install Prometheus operator dependency if serviceMonitor.enabled is also true.
|
||||
# -- Set to false to disable the installation of Prometheus operator dependency,
|
||||
# -- regardless of serviceMonitor.enabled. This is useful if you have Prometheus
|
||||
# -- Operator installed and managed separately in your cluster.
|
||||
install: true
|
||||
|
||||
# -- Set to true to use the TrueCharts Prometheus Operator instead of kube-prometheus-stack.
|
||||
# This chart's ServiceMonitor resources require a Prometheus Operator to be functional.
|
||||
# If serviceMonitor.enabled is true and dependencies.install is true,
|
||||
# one of these two dependencies will be pulled based on this flag.
|
||||
useTrueChartsPrometheusOperator: false
|
||||
|
||||
14
devbox.json
Normal file
14
devbox.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"$schema": "https://raw.githubusercontent.com/jetify-com/devbox/0.13.7/.schema/devbox.schema.json",
|
||||
"packages": [],
|
||||
"shell": {
|
||||
"init_hook": [
|
||||
"echo 'Welcome to devbox!' > /dev/null"
|
||||
],
|
||||
"scripts": {
|
||||
"test": [
|
||||
"echo \"Error: no test specified\" && exit 1"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
4
devbox.lock
Normal file
4
devbox.lock
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"lockfile_version": "1",
|
||||
"packages": {}
|
||||
}
|
||||
@@ -1,11 +1,15 @@
|
||||
# Stage 1: Build stage with dependencies
|
||||
FROM python:3.9-slim as builder
|
||||
|
||||
# Declare TARGETARCH for use in this stage
|
||||
ARG TARGETARCH
|
||||
WORKDIR /app
|
||||
|
||||
# Install iperf3 and build dependencies
|
||||
# Minimal dependencies for builder stage if any Python packages had C extensions.
|
||||
# Assuming requirements.txt does not need gcc or other build tools for now.
|
||||
# If pip install fails later, add necessary build tools (e.g., gcc, python3-dev) here.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends gcc iperf3 libiperf-dev && \
|
||||
# apt-get install -y --no-install-recommends gcc python3-dev # Example if needed
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
@@ -17,9 +21,11 @@ FROM python:3.9-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy iperf3 binary and library from the builder stage
|
||||
COPY --from=builder /usr/bin/iperf3 /usr/bin/iperf3
|
||||
COPY --from=builder /usr/lib/x86_64-linux-gnu/libiperf.so.0 /usr/lib/x86_64-linux-gnu/libiperf.so.0
|
||||
# Install iperf3 and its runtime dependency libsctp1 directly in the final stage.
|
||||
# This simplifies the Dockerfile by removing the need to copy iperf3 components from the builder.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends iperf3 libsctp1 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy installed Python packages from the builder stage
|
||||
COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
|
||||
|
||||
@@ -1,28 +1,60 @@
|
||||
"""
|
||||
Prometheus exporter for iperf3 network performance monitoring.
|
||||
|
||||
This script runs iperf3 tests between the node it's running on (source) and
|
||||
other iperf3 server pods discovered in a Kubernetes cluster. It then exposes
|
||||
these metrics for Prometheus consumption.
|
||||
|
||||
Configuration is primarily through environment variables and command-line arguments
|
||||
for log level.
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
import argparse
|
||||
import sys
|
||||
from kubernetes import client, config
|
||||
from prometheus_client import start_http_server, Gauge
|
||||
import iperf3
|
||||
|
||||
# --- Configuration ---
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
# --- Global Configuration & Setup ---
|
||||
|
||||
# Argument parsing for log level configuration
|
||||
# The command-line --log-level argument takes precedence over the LOG_LEVEL env var.
|
||||
# Defaults to INFO if neither is set.
|
||||
parser = argparse.ArgumentParser(description="iperf3 Prometheus exporter.")
|
||||
parser.add_argument(
|
||||
'--log-level',
|
||||
default=os.environ.get('LOG_LEVEL', 'INFO').upper(),
|
||||
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
help='Set the logging level. Overrides LOG_LEVEL environment variable. (Default: INFO)'
|
||||
)
|
||||
args = parser.parse_args()
|
||||
log_level_str = args.log_level
|
||||
|
||||
# Convert log level string (e.g., 'INFO') to its numeric representation (e.g., logging.INFO)
|
||||
numeric_level = getattr(logging, log_level_str.upper(), None)
|
||||
if not isinstance(numeric_level, int):
|
||||
# This case should ideally not be reached if choices in argparse are respected.
|
||||
logging.error(f"Invalid log level: {log_level_str}. Defaulting to INFO.")
|
||||
numeric_level = logging.INFO
|
||||
logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
# --- Prometheus Metrics Definition ---
|
||||
# These gauges will be used to expose iperf3 test results.
|
||||
IPERF_BANDWIDTH_MBPS = Gauge(
|
||||
'iperf_network_bandwidth_mbps',
|
||||
'Network bandwidth measured by iperf3 in Megabits per second',
|
||||
'Network bandwidth measured by iperf3 in Megabits per second (Mbps)',
|
||||
['source_node', 'destination_node', 'protocol']
|
||||
)
|
||||
IPERF_JITTER_MS = Gauge(
|
||||
'iperf_network_jitter_ms',
|
||||
'Network jitter measured by iperf3 in milliseconds',
|
||||
'Network jitter measured by iperf3 in milliseconds (ms) for UDP tests',
|
||||
['source_node', 'destination_node', 'protocol']
|
||||
)
|
||||
IPERF_PACKETS_TOTAL = Gauge(
|
||||
'iperf_network_packets_total',
|
||||
'Total packets transmitted or received during the iperf3 test',
|
||||
'Total packets transmitted/received during the iperf3 UDP test',
|
||||
['source_node', 'destination_node', 'protocol']
|
||||
)
|
||||
IPERF_LOST_PACKETS = Gauge(
|
||||
@@ -38,12 +70,21 @@ IPERF_TEST_SUCCESS = Gauge(
|
||||
|
||||
def discover_iperf_servers():
|
||||
"""
|
||||
Discover iperf3 server pods in the cluster using the Kubernetes API.
|
||||
Discovers iperf3 server pods within a Kubernetes cluster.
|
||||
|
||||
It uses the in-cluster Kubernetes configuration to connect to the API.
|
||||
The target namespace and label selector for iperf3 server pods are configured
|
||||
via environment variables:
|
||||
- IPERF_SERVER_NAMESPACE (default: 'default')
|
||||
- IPERF_SERVER_LABEL_SELECTOR (default: 'app=iperf3-server')
|
||||
|
||||
Returns:
|
||||
list: A list of dictionaries, where each dictionary contains the 'ip'
|
||||
and 'node_name' of a discovered iperf3 server pod. Returns an
|
||||
empty list if discovery fails or no servers are found.
|
||||
"""
|
||||
try:
|
||||
# Load in-cluster configuration
|
||||
# Assumes the exporter runs in a pod with a service account having permissions
|
||||
config.load_incluster_config()
|
||||
config.load_incluster_config() # Assumes running inside a Kubernetes pod
|
||||
v1 = client.CoreV1Api()
|
||||
|
||||
namespace = os.getenv('IPERF_SERVER_NAMESPACE', 'default')
|
||||
@@ -51,110 +92,206 @@ def discover_iperf_servers():
|
||||
|
||||
logging.info(f"Discovering iperf3 servers with label '{label_selector}' in namespace '{namespace}'")
|
||||
|
||||
# List pods across all namespaces with the specified label selector
|
||||
# Note: list_pod_for_all_namespaces requires cluster-wide permissions
|
||||
ret = v1.list_pod_for_all_namespaces(label_selector=label_selector, watch=False)
|
||||
|
||||
servers = []
|
||||
for i in ret.items:
|
||||
# Ensure pod has an IP and is running
|
||||
if i.status.pod_ip and i.status.phase == 'Running':
|
||||
for item in ret.items:
|
||||
if item.status.pod_ip and item.status.phase == 'Running':
|
||||
servers.append({
|
||||
'ip': i.status.pod_ip,
|
||||
'node_name': i.spec.node_name
|
||||
'ip': item.status.pod_ip,
|
||||
'node_name': item.spec.node_name # Node where the iperf server pod is running
|
||||
})
|
||||
logging.info(f"Discovered {len(servers)} iperf3 server pods.")
|
||||
return servers
|
||||
except config.ConfigException as e:
|
||||
logging.error(f"Kubernetes config error: {e}. Is the exporter running in a cluster with RBAC permissions?")
|
||||
return []
|
||||
except Exception as e:
|
||||
logging.error(f"Error discovering iperf servers: {e}")
|
||||
return [] # Return empty list on error to avoid crashing the loop
|
||||
return [] # Return empty list on error to avoid crashing the main loop
|
||||
|
||||
def run_iperf_test(server_ip, server_port, protocol, source_node, dest_node):
|
||||
def run_iperf_test(server_ip, server_port, protocol, source_node_name, dest_node_name):
|
||||
"""
|
||||
Runs a single iperf3 test and updates Prometheus metrics.
|
||||
Runs a single iperf3 test against a specified server and publishes metrics.
|
||||
|
||||
Args:
|
||||
server_ip (str): The IP address of the iperf3 server.
|
||||
server_port (int): The port number of the iperf3 server.
|
||||
protocol (str): The protocol to use ('tcp' or 'udp').
|
||||
source_node_name (str): The name of the source node (where this exporter is running).
|
||||
dest_node_name (str): The name of the destination node (where the server is running).
|
||||
|
||||
The test duration is controlled by the IPERF_TEST_DURATION environment variable
|
||||
(default: 5 seconds).
|
||||
"""
|
||||
logging.info(f"Running iperf3 test from {source_node} to {dest_node} ({server_ip}:{server_port}) using {protocol.upper()}")
|
||||
logging.info(f"Running iperf3 {protocol.upper()} test from {source_node_name} to {dest_node_name} ({server_ip}:{server_port})")
|
||||
|
||||
client = iperf3.Client()
|
||||
client.server_hostname = server_ip
|
||||
client.port = server_port
|
||||
client.protocol = protocol
|
||||
# Duration of the test (seconds)
|
||||
client.duration = int(os.getenv('IPERF_TEST_DURATION', 5))
|
||||
# Output results as JSON for easy parsing
|
||||
client.json_output = True
|
||||
iperf_client = iperf3.Client()
|
||||
iperf_client.server_hostname = server_ip
|
||||
iperf_client.port = server_port
|
||||
iperf_client.protocol = protocol
|
||||
iperf_client.duration = int(os.getenv('IPERF_TEST_DURATION', 5)) # Test duration in seconds
|
||||
iperf_client.json_output = True # Enables easy parsing of results
|
||||
|
||||
result = client.run()
|
||||
|
||||
# Parse results and update metrics
|
||||
parse_and_publish_metrics(result, source_node, dest_node, protocol)
|
||||
|
||||
def parse_and_publish_metrics(result, source_node, dest_node, protocol):
|
||||
"""
|
||||
Parses the iperf3 result and updates Prometheus gauges.
|
||||
Handles both successful and failed tests.
|
||||
"""
|
||||
labels = {'source_node': source_node, 'destination_node': dest_node, 'protocol': protocol}
|
||||
|
||||
if result and result.error:
|
||||
logging.error(f"Test from {source_node} to {dest_node} failed: {result.error}")
|
||||
try:
|
||||
result = iperf_client.run()
|
||||
parse_and_publish_metrics(result, source_node_name, dest_node_name, protocol)
|
||||
except Exception as e:
|
||||
# Catch unexpected errors during client.run() or parsing
|
||||
logging.error(f"Exception during iperf3 test or metric parsing for {dest_node_name}: {e}")
|
||||
labels = {'source_node': source_node_name, 'destination_node': dest_node_name, 'protocol': protocol}
|
||||
IPERF_TEST_SUCCESS.labels(**labels).set(0)
|
||||
# Set metrics to 0 on failure
|
||||
try:
|
||||
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
|
||||
IPERF_JITTER_MS.labels(**labels).set(0)
|
||||
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
||||
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
||||
except KeyError:
|
||||
# Labels might not be registered yet if this is the first failure
|
||||
pass
|
||||
logging.debug(f"KeyError setting failure metrics for {labels} after client.run() exception.")
|
||||
|
||||
|
||||
def parse_and_publish_metrics(result, source_node, dest_node, protocol):
|
||||
"""
|
||||
Parses the iperf3 test result and updates Prometheus gauges.
|
||||
|
||||
Args:
|
||||
result (iperf3.TestResult): The result object from the iperf3 client.
|
||||
source_node (str): Name of the source node.
|
||||
dest_node (str): Name of the destination node.
|
||||
protocol (str): Protocol used for the test ('tcp' or 'udp').
|
||||
"""
|
||||
labels = {'source_node': source_node, 'destination_node': dest_node, 'protocol': protocol}
|
||||
|
||||
# Handle failed tests (e.g., server unreachable) or missing result object
|
||||
if not result or result.error:
|
||||
error_message = result.error if result and result.error else "No result object from iperf3 client"
|
||||
logging.warning(f"Test from {source_node} to {dest_node} ({protocol.upper()}) failed: {error_message}")
|
||||
IPERF_TEST_SUCCESS.labels(**labels).set(0)
|
||||
# Set all relevant metrics to 0 on failure to clear stale values from previous successes
|
||||
try:
|
||||
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
|
||||
IPERF_JITTER_MS.labels(**labels).set(0) # Applicable for UDP, zeroed for TCP later
|
||||
IPERF_PACKETS_TOTAL.labels(**labels).set(0) # Applicable for UDP, zeroed for TCP later
|
||||
IPERF_LOST_PACKETS.labels(**labels).set(0) # Applicable for UDP, zeroed for TCP later
|
||||
except KeyError:
|
||||
# This can happen if labels were never registered due to continuous failures
|
||||
logging.debug(f"KeyError when setting failure metrics for {labels}. Gauges might not be initialized.")
|
||||
return
|
||||
|
||||
if not result:
|
||||
logging.error(f"Test from {source_node} to {dest_node} failed to return a result object.")
|
||||
IPERF_TEST_SUCCESS.labels(**labels).set(0)
|
||||
try:
|
||||
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
|
||||
IPERF_JITTER_MS.labels(**labels).set(0)
|
||||
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
||||
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
||||
except KeyError:
|
||||
pass
|
||||
return
|
||||
|
||||
|
||||
# If we reach here, the test itself was successful in execution
|
||||
IPERF_TEST_SUCCESS.labels(**labels).set(1)
|
||||
|
||||
# The summary data is typically in result.json['end']['sum_sent'] or result.json['end']['sum_received']
|
||||
# The iperf3-python client often exposes this directly as attributes like sent_Mbps or received_Mbps
|
||||
# For TCP, we usually care about the received bandwidth on the client side (which is the exporter)
|
||||
# For UDP, the client report contains jitter, lost packets, etc.
|
||||
# Determine bandwidth:
|
||||
# Order of preference: received_Mbps, sent_Mbps, Mbps, then JSON fallbacks.
|
||||
# received_Mbps is often most relevant for TCP client perspective.
|
||||
# sent_Mbps can be relevant for UDP or as a TCP fallback.
|
||||
bandwidth_mbps = 0
|
||||
if hasattr(result, 'received_Mbps') and result.received_Mbps is not None:
|
||||
bandwidth_mbps = result.received_Mbps
|
||||
elif hasattr(result, 'sent_Mbps') and result.sent_Mbps is not None:
|
||||
# Fallback, though received_Mbps is usually more relevant for TCP client
|
||||
bandwidth_mbps = result.sent_Mbps
|
||||
# Add a check for the raw JSON output structure as a fallback
|
||||
elif result.json and 'end' in result.json and 'sum_received' in result.json['end'] and result.json['end']['sum_received']['bits_per_second'] is not None:
|
||||
bandwidth_mbps = result.json['end']['sum_received']['bits_per_second'] / 1000000
|
||||
elif result.json and 'end' in result.json and 'sum_sent' in result.json['end'] and result.json['end']['sum_sent']['bits_per_second'] is not None:
|
||||
bandwidth_mbps = result.json['end']['sum_sent']['bits_per_second'] / 1000000
|
||||
|
||||
elif hasattr(result, 'Mbps') and result.Mbps is not None: # General attribute from iperf3 library
|
||||
bandwidth_mbps = result.Mbps
|
||||
# Fallback to raw JSON if direct attributes are None or missing
|
||||
elif result.json:
|
||||
# Prefer received sum, then sent sum from the JSON output's 'end' summary
|
||||
if 'end' in result.json and 'sum_received' in result.json['end'] and \
|
||||
result.json['end']['sum_received'].get('bits_per_second') is not None:
|
||||
bandwidth_mbps = result.json['end']['sum_received']['bits_per_second'] / 1000000.0
|
||||
elif 'end' in result.json and 'sum_sent' in result.json['end'] and \
|
||||
result.json['end']['sum_sent'].get('bits_per_second') is not None:
|
||||
bandwidth_mbps = result.json['end']['sum_sent']['bits_per_second'] / 1000000.0
|
||||
|
||||
IPERF_BANDWIDTH_MBPS.labels(**labels).set(bandwidth_mbps)
|
||||
|
||||
# UDP specific metrics
|
||||
if protocol == 'udp':
|
||||
# iperf3-python exposes UDP results directly
|
||||
IPERF_JITTER_MS.labels(**labels).set(result.jitter_ms if hasattr(result, 'jitter_ms') and result.jitter_ms is not None else 0)
|
||||
IPERF_PACKETS_TOTAL.labels(**labels).set(result.packets if hasattr(result, 'packets') and result.packets is not None else 0)
|
||||
IPERF_LOST_PACKETS.labels(**labels).set(result.lost_packets if hasattr(result, 'lost_packets') and result.lost_packets is not None else 0)
|
||||
# These attributes are specific to UDP tests in iperf3
|
||||
IPERF_JITTER_MS.labels(**labels).set(getattr(result, 'jitter_ms', 0) if result.jitter_ms is not None else 0)
|
||||
IPERF_PACKETS_TOTAL.labels(**labels).set(getattr(result, 'packets', 0) if result.packets is not None else 0)
|
||||
IPERF_LOST_PACKETS.labels(**labels).set(getattr(result, 'lost_packets', 0) if result.lost_packets is not None else 0)
|
||||
else:
|
||||
# Ensure UDP metrics are zeroed or absent for TCP tests
|
||||
# For TCP tests, ensure UDP-specific metrics are set to 0
|
||||
try:
|
||||
IPERF_JITTER_MS.labels(**labels).set(0)
|
||||
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
||||
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
||||
except KeyError:
|
||||
# Can occur if labels not yet registered (e.g. first test is TCP)
|
||||
logging.debug(f"KeyError for {labels} when zeroing UDP metrics for TCP test.")
|
||||
pass
|
||||
|
||||
def main_loop():
|
||||
"""
|
||||
Main operational loop of the iperf3 exporter.
|
||||
|
||||
This loop periodically:
|
||||
1. Fetches configuration from environment variables:
|
||||
- IPERF_TEST_INTERVAL (default: 300s): Time between test cycles.
|
||||
- IPERF_SERVER_PORT (default: 5201): Port for iperf3 servers.
|
||||
- IPERF_TEST_PROTOCOL (default: 'tcp'): 'tcp' or 'udp'.
|
||||
- SOURCE_NODE_NAME (critical): Name of the node this exporter runs on.
|
||||
2. Discovers iperf3 server pods in the Kubernetes cluster.
|
||||
3. Runs iperf3 tests against each discovered server (unless it's on the same node).
|
||||
4. Sleeps for the configured test interval.
|
||||
|
||||
If SOURCE_NODE_NAME is not set, the script will log an error and exit.
|
||||
"""
|
||||
# Fetch operational configuration from environment variables
|
||||
test_interval = int(os.getenv('IPERF_TEST_INTERVAL', 300))
|
||||
server_port = int(os.getenv('IPERF_SERVER_PORT', 5201))
|
||||
protocol = os.getenv('IPERF_TEST_PROTOCOL', 'tcp').lower() # Ensure lowercase
|
||||
source_node_name = os.getenv('SOURCE_NODE_NAME')
|
||||
|
||||
# SOURCE_NODE_NAME is crucial for labeling metrics correctly.
|
||||
if not source_node_name:
|
||||
logging.error("CRITICAL: SOURCE_NODE_NAME environment variable not set. This is required. Exiting.")
|
||||
sys.exit(1)
|
||||
|
||||
logging.info(
|
||||
f"Exporter configured. Source Node: {source_node_name}, "
|
||||
f"Test Interval: {test_interval}s, Server Port: {server_port}, Protocol: {protocol.upper()}"
|
||||
)
|
||||
|
||||
while True:
|
||||
logging.info("Starting new iperf test cycle...")
|
||||
servers = discover_iperf_servers()
|
||||
|
||||
if not servers:
|
||||
logging.warning("No iperf servers discovered in this cycle. Check K8s setup and RBAC permissions.")
|
||||
else:
|
||||
for server in servers:
|
||||
dest_node_name = server.get('node_name', 'unknown_destination_node') # Default if key missing
|
||||
server_ip = server.get('ip')
|
||||
|
||||
if not server_ip:
|
||||
logging.warning(f"Discovered server entry missing an IP: {server}. Skipping.")
|
||||
continue
|
||||
|
||||
# Avoid testing a node against itself
|
||||
if dest_node_name == source_node_name:
|
||||
logging.info(f"Skipping test to self: {source_node_name} to {server_ip} (on same node: {dest_node_name}).")
|
||||
continue
|
||||
|
||||
run_iperf_test(server_ip, server_port, protocol, source_node_name, dest_node_name)
|
||||
|
||||
logging.info(f"Test cycle completed. Sleeping for {test_interval} seconds.")
|
||||
time.sleep(test_interval)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Initial logging (like log level) is configured globally at the start of the script.
|
||||
|
||||
# Fetch Prometheus exporter listen port from environment variable
|
||||
listen_port = int(os.getenv('LISTEN_PORT', 9876))
|
||||
|
||||
try:
|
||||
# Start the Prometheus HTTP server to expose metrics.
|
||||
start_http_server(listen_port)
|
||||
logging.info(f"Prometheus exporter listening on port {listen_port}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to start Prometheus HTTP server on port {listen_port}: {e}")
|
||||
sys.exit(1) # Exit if the metrics server cannot start
|
||||
|
||||
# Enter the main operational loop.
|
||||
# main_loop() contains its own critical checks (e.g., SOURCE_NODE_NAME) and will exit if necessary.
|
||||
main_loop()
|
||||
|
||||
Reference in New Issue
Block a user