Merge pull request #1 from malarinv/bootstrap
feat: Introduce iperf3 Kubernetes network monitorbootstrap
commit
14ec3f6581
|
|
@ -0,0 +1,95 @@
|
||||||
|
name: Release iperf3-monitor
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- 'v*.*.*'
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME: ${{ github.repository }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lint-and-test:
|
||||||
|
name: Lint and Test
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Check out code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Helm
|
||||||
|
uses: azure/setup-helm@v3
|
||||||
|
with:
|
||||||
|
version: v3.10.0
|
||||||
|
|
||||||
|
- name: Helm Lint
|
||||||
|
run: helm lint ./charts/iperf3-monitor
|
||||||
|
|
||||||
|
build-and-publish-image:
|
||||||
|
name: Build and Publish Docker Image
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: lint-and-test
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
steps:
|
||||||
|
- name: Check out code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Log in to GitHub Container Registry
|
||||||
|
uses: docker/login-action@v2
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract metadata (tags, labels) for Docker
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v4
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v4
|
||||||
|
with:
|
||||||
|
context: ./exporter
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
|
||||||
|
package-and-publish-chart:
|
||||||
|
name: Package and Publish Helm Chart
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build-and-publish-image
|
||||||
|
permissions:
|
||||||
|
contents: write # Needed by stefanprodan/helm-gh-pages to push to gh-pages branch
|
||||||
|
steps:
|
||||||
|
- name: Check out code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
fetch-depth: 0 # Fetch all history for helm-gh-pages to calculate chart index
|
||||||
|
|
||||||
|
- name: Set up Helm
|
||||||
|
uses: azure/setup-helm@v3
|
||||||
|
with:
|
||||||
|
version: v3.10.0
|
||||||
|
|
||||||
|
- name: Install yq
|
||||||
|
run: |
|
||||||
|
sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq &&\
|
||||||
|
sudo chmod +x /usr/bin/yq
|
||||||
|
|
||||||
|
- name: Set Chart Version from Tag
|
||||||
|
run: |
|
||||||
|
VERSION=$(echo "${{ github.ref_name }}" | sed 's/^v//')
|
||||||
|
yq e -i '.version = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
||||||
|
yq e -i '.appVersion = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
|
||||||
|
cat ./charts/iperf3-monitor/Chart.yaml # Optional: print updated Chart.yaml
|
||||||
|
|
||||||
|
- name: Publish Helm chart
|
||||||
|
uses: stefanprodan/helm-gh-pages@v1.6.0
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
charts_dir: ./charts
|
||||||
|
charts_url: https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }}
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
```
|
||||||
|
# Byte-code files
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
*.pyd
|
||||||
|
__pycache__/
|
||||||
|
|
||||||
|
# Distribution / build outputs
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
*.egg-info/
|
||||||
|
.tox/
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
|
||||||
|
# Editor/IDE specific files
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# Logs and temporary files
|
||||||
|
*.log
|
||||||
|
*.tmp
|
||||||
|
|
||||||
|
# OS generated files
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
!Dockerfile
|
||||||
|
.dockerignore
|
||||||
|
|
||||||
|
# Helm
|
||||||
|
!charts/iperf3-monitor/.helmignore
|
||||||
|
charts/*.tgz # Ignore packaged chart files
|
||||||
|
```
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,437 @@
|
||||||
|
# Kubernetes-Native Network Performance Monitoring Service
|
||||||
|
|
||||||
|
This project provides a comprehensive solution for continuous network validation within a Kubernetes cluster. Leveraging industry-standard tools like `iperf3`, `Prometheus`, and `Grafana`, it offers proactive monitoring of network performance between nodes, helping to identify and troubleshoot latency, bandwidth, and packet loss issues before they impact applications.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
* **Continuous N-to-N Testing:** Automatically measures network performance between all nodes in the cluster.
|
||||||
|
* **Kubernetes-Native:** Deploys as standard Kubernetes workloads (DaemonSet, Deployment).
|
||||||
|
* **Dynamic Discovery:** Exporter automatically discovers iperf3 server pods using the Kubernetes API.
|
||||||
|
* **Prometheus Integration:** Translates iperf3 results into standard Prometheus metrics for time-series storage.
|
||||||
|
* **Grafana Visualization:** Provides a rich, interactive dashboard with heatmaps and time-series graphs.
|
||||||
|
* **Helm Packaging:** Packaged as a Helm chart for easy deployment and configuration management.
|
||||||
|
* **Automated CI/CD:** Includes a GitHub Actions workflow for building and publishing the exporter image and Helm chart.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
The service is based on a decoupled architecture:
|
||||||
|
|
||||||
|
1. **iperf3-server DaemonSet:** Deploys an `iperf3` server pod on every node to act as a test endpoint. Running on the host network to measure raw node performance.
|
||||||
|
2. **iperf3-exporter Deployment:** A centralized service that uses the Kubernetes API to discover server pods, orchestrates `iperf3` client tests against them, parses the JSON output, and exposes performance metrics via an HTTP endpoint.
|
||||||
|
3. **Prometheus & Grafana Stack:** A standard monitoring backend (like `kube-prometheus-stack`) that scrapes the exporter's metrics and visualizes them in a custom dashboard.
|
||||||
|
|
||||||
|
This separation of concerns ensures scalability, resilience, and aligns with Kubernetes operational principles.
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
* A running Kubernetes cluster.
|
||||||
|
* `kubectl` configured to connect to your cluster.
|
||||||
|
* Helm v3+ installed.
|
||||||
|
* A Prometheus instance configured to scrape services (ideally using the Prometheus Operator and ServiceMonitors).
|
||||||
|
* A Grafana instance accessible and configured with Prometheus as a data source.
|
||||||
|
|
||||||
|
### Installation with Helm
|
||||||
|
|
||||||
|
1. Add the Helm chart repository (replace with your actual repo URL once published):
|
||||||
|
|
||||||
|
```/dev/null/helm-install.sh#L1-1
|
||||||
|
helm repo add iperf3-monitor https://your-github-org.github.io/iperf3-monitor/
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Update your Helm repositories:
|
||||||
|
|
||||||
|
```/dev/null/helm-install.sh#L3-3
|
||||||
|
helm repo update
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Install the chart:
|
||||||
|
|
||||||
|
```/dev/null/helm-install.sh#L5-8
|
||||||
|
helm install iperf3-monitor iperf3-monitor/iperf3-monitor \
|
||||||
|
--namespace monitoring # Or your preferred namespace \
|
||||||
|
--create-namespace \
|
||||||
|
--values values.yaml # Optional: Use a custom values file
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Note:** Ensure your Prometheus instance is configured to scrape services in the namespace where you install the chart and that it recognizes `ServiceMonitor` resources with the label `release: prometheus-operator` (if using the standard `kube-prometheus-stack` setup).
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
The Helm chart is highly configurable via the `values.yaml` file. You can override default settings by creating your own `values.yaml` and passing it during installation (`--values my-values.yaml`).
|
||||||
|
|
||||||
|
Refer to the comments in the default `values.yaml` for a detailed explanation of each parameter:
|
||||||
|
|
||||||
|
```iperf3-monitor/charts/iperf3-monitor/values.yaml
|
||||||
|
# Default values for iperf3-monitor.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
# -- Override the name of the chart.
|
||||||
|
nameOverride: ""
|
||||||
|
|
||||||
|
# -- Override the fully qualified app name.
|
||||||
|
fullnameOverride: ""
|
||||||
|
|
||||||
|
exporter:
|
||||||
|
# -- Configuration for the exporter container image.
|
||||||
|
image:
|
||||||
|
# -- The container image repository for the exporter.
|
||||||
|
repository: ghcr.io/my-org/iperf3-prometheus-exporter # Replace with your repo URL
|
||||||
|
# -- The container image tag for the exporter. If not set, the chart's appVersion is used.
|
||||||
|
tag: ""
|
||||||
|
# -- The image pull policy for the exporter container.
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
# -- Number of exporter pod replicas. Typically 1 is sufficient.
|
||||||
|
replicaCount: 1
|
||||||
|
|
||||||
|
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
|
||||||
|
testInterval: 300
|
||||||
|
|
||||||
|
# -- Timeout in seconds for a single iperf3 test run.
|
||||||
|
testTimeout: 10
|
||||||
|
|
||||||
|
# -- Protocol to use for testing (tcp or udp).
|
||||||
|
testProtocol: tcp
|
||||||
|
|
||||||
|
# -- CPU and memory resource requests and limits for the exporter pod.
|
||||||
|
# @default -- A small default is provided if commented out.
|
||||||
|
resources: {}
|
||||||
|
# requests:
|
||||||
|
# cpu: "100m"
|
||||||
|
# memory: "128Mi"
|
||||||
|
# limits:
|
||||||
|
# cpu: "500m"
|
||||||
|
# memory: "256Mi"
|
||||||
|
|
||||||
|
server:
|
||||||
|
# -- Configuration for the iperf3 server container image (DaemonSet).
|
||||||
|
image:
|
||||||
|
# -- The container image repository for the iperf3 server.
|
||||||
|
repository: networkstatic/iperf3
|
||||||
|
# -- The container image tag for the iperf3 server.
|
||||||
|
tag: latest
|
||||||
|
|
||||||
|
# -- CPU and memory resource requests and limits for the iperf3 server pods (DaemonSet).
|
||||||
|
# These should be very low as the server is mostly idle.
|
||||||
|
# @default -- A small default is provided if commented out.
|
||||||
|
resources: {}
|
||||||
|
# requests:
|
||||||
|
# cpu: "50m"
|
||||||
|
# memory: "64Mi"
|
||||||
|
# limits:
|
||||||
|
# cpu: "100m"
|
||||||
|
# memory: "128Mi"
|
||||||
|
|
||||||
|
# -- Node selector for scheduling iperf3 server pods.
|
||||||
|
# Use this to restrict the DaemonSet to a subset of nodes.
|
||||||
|
# @default -- {} (schedule on all nodes)
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
# -- Tolerations for scheduling iperf3 server pods on tainted nodes (e.g., control-plane nodes).
|
||||||
|
# This is often necessary to include master nodes in the test mesh.
|
||||||
|
# @default -- Tolerates control-plane and master taints.
|
||||||
|
tolerations:
|
||||||
|
- key: "node-role.kubernetes.io/control-plane"
|
||||||
|
operator: "Exists"
|
||||||
|
effect: "NoSchedule"
|
||||||
|
- key: "node-role.kubernetes.io/master"
|
||||||
|
operator: "Exists"
|
||||||
|
effect: "NoSchedule"
|
||||||
|
|
||||||
|
rbac:
|
||||||
|
# -- If true, create ServiceAccount, ClusterRole, and ClusterRoleBinding for the exporter.
|
||||||
|
# Set to false if you manage RBAC externally.
|
||||||
|
create: true
|
||||||
|
|
||||||
|
serviceAccount:
|
||||||
|
# -- The name of the ServiceAccount to use for the exporter pod.
|
||||||
|
# Only used if rbac.create is false. If not set, it defaults to the chart's fullname.
|
||||||
|
name: ""
|
||||||
|
|
||||||
|
serviceMonitor:
|
||||||
|
# -- If true, create a ServiceMonitor resource for integration with Prometheus Operator.
|
||||||
|
# Requires a running Prometheus Operator in the cluster.
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
# -- Scrape interval for the ServiceMonitor. How often Prometheus scrapes the exporter metrics.
|
||||||
|
interval: 60s
|
||||||
|
|
||||||
|
# -- Scrape timeout for the ServiceMonitor. How long Prometheus waits for metrics response.
|
||||||
|
scrapeTimeout: 30s
|
||||||
|
|
||||||
|
# -- Configuration for the exporter Service.
|
||||||
|
service:
|
||||||
|
# -- Service type. ClusterIP is typically sufficient.
|
||||||
|
type: ClusterIP
|
||||||
|
# -- Port on which the exporter service is exposed.
|
||||||
|
port: 9876
|
||||||
|
# -- Target port on the exporter pod.
|
||||||
|
targetPort: 9876
|
||||||
|
|
||||||
|
# -- Optional configuration for a network policy to allow traffic to the iperf3 server DaemonSet.
|
||||||
|
# This is often necessary if you are using a network policy controller.
|
||||||
|
networkPolicy:
|
||||||
|
# -- If true, create a NetworkPolicy resource.
|
||||||
|
enabled: false
|
||||||
|
# -- Specify source selectors if needed (e.g., pods in a specific namespace).
|
||||||
|
from: []
|
||||||
|
# -- Specify namespace selectors if needed.
|
||||||
|
namespaceSelector: {}
|
||||||
|
# -- Specify pod selectors if needed.
|
||||||
|
podSelector: {}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Grafana Dashboard
|
||||||
|
|
||||||
|
A custom Grafana dashboard is provided to visualize the collected `iperf3` metrics.
|
||||||
|
|
||||||
|
1. Log in to your Grafana instance.
|
||||||
|
2. Navigate to `Dashboards` -> `Import`.
|
||||||
|
3. Paste the full JSON model provided below into the text area and click `Load`.
|
||||||
|
4. Select your Prometheus data source and click `Import`.
|
||||||
|
|
||||||
|
```/dev/null/grafana-dashboard.json
|
||||||
|
{
|
||||||
|
"__inputs": [],
|
||||||
|
"__requires": [
|
||||||
|
{
|
||||||
|
"type": "grafana",
|
||||||
|
"id": "grafana",
|
||||||
|
"name": "Grafana",
|
||||||
|
"version": "8.0.0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "datasource",
|
||||||
|
"id": "prometheus",
|
||||||
|
"name": "Prometheus",
|
||||||
|
"version": "1.0.0"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"builtIn": 1,
|
||||||
|
"datasource": {
|
||||||
|
"type": "grafana",
|
||||||
|
"uid": "-- Grafana --"
|
||||||
|
},
|
||||||
|
"enable": true,
|
||||||
|
"hide": true,
|
||||||
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||||||
|
"name": "Annotations & Alerts",
|
||||||
|
"type": "dashboard"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"gnetId": null,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 9,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "avg(iperf_network_bandwidth_mbps) by (source_node, destination_node)",
|
||||||
|
"format": "heatmap",
|
||||||
|
"legendFormat": "{{source_node}} -> {{destination_node}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cards": { "cardPadding": null, "cardRound": null },
|
||||||
|
"color": {
|
||||||
|
"mode": "spectrum",
|
||||||
|
"scheme": "red-yellow-green",
|
||||||
|
"exponent": 0.5,
|
||||||
|
"reverse": false
|
||||||
|
},
|
||||||
|
"dataFormat": "tsbuckets",
|
||||||
|
"yAxis": { "show": true, "format": "short" },
|
||||||
|
"xAxis": { "show": true }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Bandwidth Over Time (Source: $source_node, Dest: $destination_node)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 9
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=~\"^$protocol$\"}",
|
||||||
|
"legendFormat": "Bandwidth",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "mbps"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Jitter Over Time (Source: $source_node, Dest: $destination_node)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 9
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "iperf_network_jitter_ms{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=\"udp\"}",
|
||||||
|
"legendFormat": "Jitter",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "ms"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 36,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": ["iperf3", "network", "kubernetes"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"current": {},
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"definition": "label_values(iperf_network_bandwidth_mbps, source_node)",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"multi": false,
|
||||||
|
"name": "source_node",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(iperf_network_bandwidth_mbps, source_node)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"sort": 1,
|
||||||
|
"type": "query"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"current": {},
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"definition": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"multi": false,
|
||||||
|
"name": "destination_node",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"sort": 1,
|
||||||
|
"type": "query"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"current": { "selected": true, "text": "tcp", "value": "tcp" },
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"multi": false,
|
||||||
|
"name": "protocol",
|
||||||
|
"options": [
|
||||||
|
{ "selected": true, "text": "tcp", "value": "tcp" },
|
||||||
|
{ "selected": false, "text": "udp", "value": "udp" }
|
||||||
|
],
|
||||||
|
"query": "tcp,udp",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"type": "custom"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-1h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Kubernetes iperf3 Network Performance",
|
||||||
|
"uid": "k8s-iperf3-dashboard",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Repository Structure
|
||||||
|
|
||||||
|
The project follows a standard structure:
|
||||||
|
|
||||||
|
```/dev/null/repo-structure.txt
|
||||||
|
.
|
||||||
|
├── .github/
|
||||||
|
│ └── workflows/
|
||||||
|
│ └── release.yml # GitHub Actions workflow for CI/CD
|
||||||
|
├── charts/
|
||||||
|
│ └── iperf3-monitor/ # The Helm chart for the service
|
||||||
|
│ ├── Chart.yaml
|
||||||
|
│ ├── values.yaml
|
||||||
|
│ └── templates/
|
||||||
|
│ ├── _helpers.tpl
|
||||||
|
│ ├── server-daemonset.yaml
|
||||||
|
│ ├── exporter-deployment.yaml
|
||||||
|
│ ├── rbac.yaml
|
||||||
|
│ ├── service.yaml
|
||||||
|
│ └── servicemonitor.yaml
|
||||||
|
└── exporter/
|
||||||
|
├── Dockerfile # Dockerfile for the exporter
|
||||||
|
├── requirements.txt # Python dependencies
|
||||||
|
└── exporter.py # Exporter source code
|
||||||
|
├── .gitignore # Specifies intentionally untracked files
|
||||||
|
├── LICENSE # Project license
|
||||||
|
└── README.md # This file
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development and CI/CD
|
||||||
|
|
||||||
|
The project includes a GitHub Actions workflow (`.github/workflows/release.yml`) triggered on Git tags (`v*.*.*`) to automate:
|
||||||
|
|
||||||
|
1. Linting the Helm chart.
|
||||||
|
2. Building and publishing the Docker image for the exporter to GitHub Container Registry (`ghcr.io`).
|
||||||
|
3. Updating the Helm chart version based on the Git tag.
|
||||||
|
4. Packaging and publishing the Helm chart to GitHub Pages.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is licensed under the terms defined in the `LICENSE` file.
|
||||||
|
|
||||||
|
```iperf3-monitor/LICENSE
|
||||||
|
This project is currently unlicensed. Please see the project's documentation or repository for licensing information when it becomes available.
|
||||||
|
```
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,30 @@
|
||||||
|
apiVersion: v2
|
||||||
|
name: iperf3-monitor
|
||||||
|
version: 0.1.0
|
||||||
|
appVersion: "0.1.0"
|
||||||
|
description: A Helm chart for deploying a Kubernetes-native iperf3 network performance monitoring service with Prometheus and Grafana.
|
||||||
|
type: application
|
||||||
|
keywords:
|
||||||
|
- iperf3
|
||||||
|
- network
|
||||||
|
- performance
|
||||||
|
- monitoring
|
||||||
|
- kubernetes
|
||||||
|
- prometheus
|
||||||
|
- grafana
|
||||||
|
home: https://github.com/malarinv/iperf3-monitor # Replace with your repo URL
|
||||||
|
sources:
|
||||||
|
- https://github.com/malarinv/iperf3-monitor # Replace with your repo URL
|
||||||
|
maintainers:
|
||||||
|
- name: Malar Invention # Replace with your name
|
||||||
|
email: malarkannan.invention@gmail.com # Replace with your email
|
||||||
|
icon: https://raw.githubusercontent.com/malarinv/iperf3-monitor/main/icon.png # Optional icon URL
|
||||||
|
annotations:
|
||||||
|
artifacthub.io/changes: |
|
||||||
|
- Add initial Helm chart structure.
|
||||||
|
artifacthub.io/category: networking
|
||||||
|
dependencies:
|
||||||
|
- name: prometheus-community/kube-prometheus-stack # Example dependency if you package the whole stack
|
||||||
|
version: ">=30.0.0" # Specify a compatible version range
|
||||||
|
repository: https://prometheus-community.github.io/helm-charts
|
||||||
|
condition: serviceMonitor.enabled # Only include if ServiceMonitor is enabled (assuming Prometheus Operator)
|
||||||
|
|
@ -0,0 +1,55 @@
|
||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "iperf3-monitor.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "iperf3-monitor.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride -}}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||||
|
{{- if contains $name .Release.Name -}}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart's labels
|
||||||
|
*/}}
|
||||||
|
{{- define "iperf3-monitor.labels" -}}
|
||||||
|
helm.sh/chart: {{ include "iperf3-monitor.name" . }}-{{ .Chart.Version | replace "+" "_" }}
|
||||||
|
{{ include "iperf3-monitor.selectorLabels" . }}
|
||||||
|
{{- if .Chart.AppVersion -}}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end -}}
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Selector labels
|
||||||
|
*/}}
|
||||||
|
{{- define "iperf3-monitor.selectorLabels" -}}
|
||||||
|
app.kubernetes.io/name: {{ include "iperf3-monitor.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create the name of the service account to use
|
||||||
|
*/}}
|
||||||
|
{{- define "iperf3-monitor.serviceAccountName" -}}
|
||||||
|
{{- if .Values.serviceAccount.create -}}
|
||||||
|
{{- default (include "iperf3-monitor.fullname" .) .Values.serviceAccount.name -}}
|
||||||
|
{{- else -}}
|
||||||
|
{{- default "default" .Values.serviceAccount.name -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ include "iperf3-monitor.fullname" . }}-exporter
|
||||||
|
labels:
|
||||||
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
spec:
|
||||||
|
replicas: {{ .Values.exporter.replicaCount }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "iperf3-monitor.selectorLabels" . | nindent 6 }}
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{- include "iperf3-monitor.selectorLabels" . | nindent 8 }}
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
spec:
|
||||||
|
serviceAccountName: {{ include "iperf3-monitor.serviceAccountName" . }}
|
||||||
|
containers:
|
||||||
|
- name: iperf3-exporter
|
||||||
|
image: "{{ .Values.exporter.image.repository }}:{{ .Values.exporter.image.tag | default .Chart.AppVersion }}"
|
||||||
|
imagePullPolicy: {{ .Values.exporter.image.pullPolicy }}
|
||||||
|
ports:
|
||||||
|
- containerPort: {{ .Values.service.targetPort }}
|
||||||
|
name: metrics
|
||||||
|
env:
|
||||||
|
- name: SOURCE_NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
- name: IPERF_TEST_INTERVAL
|
||||||
|
value: "{{ .Values.exporter.testInterval }}"
|
||||||
|
- name: IPERF_TEST_PROTOCOL
|
||||||
|
value: "{{ .Values.exporter.testProtocol }}"
|
||||||
|
- name: IPERF_SERVER_PORT
|
||||||
|
value: "5201" # Hardcoded as per server DaemonSet
|
||||||
|
- name: IPERF_SERVER_NAMESPACE
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.namespace
|
||||||
|
- name: IPERF_SERVER_LABEL_SELECTOR
|
||||||
|
value: "app.kubernetes.io/name={{ include \"iperf3-monitor.name\" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server"
|
||||||
|
{{- with .Values.exporter.resources }}
|
||||||
|
resources:
|
||||||
|
{{- toYaml . | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
{{- if .Values.rbac.create -}}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: {{ include "iperf3-monitor.serviceAccountName" . }}
|
||||||
|
labels:
|
||||||
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: {{ include "iperf3-monitor.fullname" . }}-role
|
||||||
|
labels:
|
||||||
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["pods"]
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: {{ include "iperf3-monitor.fullname" . }}-rb
|
||||||
|
labels:
|
||||||
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: {{ include "iperf3-monitor.serviceAccountName" . }}
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
roleRef:
|
||||||
|
kind: ClusterRole
|
||||||
|
name: {{ include "iperf3-monitor.fullname" . }}-role
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
{{- end -}}
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: {{ include "iperf3-monitor.fullname" . }}-server
|
||||||
|
labels:
|
||||||
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: server
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "iperf3-monitor.selectorLabels" . | nindent 6 }}
|
||||||
|
app.kubernetes.io/component: server
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{- include "iperf3-monitor.selectorLabels" . | nindent 8 }}
|
||||||
|
app.kubernetes.io/component: server
|
||||||
|
spec:
|
||||||
|
# Run on the host network to measure raw node-to-node performance
|
||||||
|
hostNetwork: true
|
||||||
|
{{- with .Values.server.nodeSelector }}
|
||||||
|
nodeSelector:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.server.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
containers:
|
||||||
|
- name: iperf3-server
|
||||||
|
image: "{{ .Values.server.image.repository }}:{{ .Values.server.image.tag }}"
|
||||||
|
imagePullPolicy: {{ .Values.server.image.pullPolicy }}
|
||||||
|
args: ["-s"] # Start in server mode
|
||||||
|
ports:
|
||||||
|
- containerPort: 5201
|
||||||
|
name: iperf3-tcp
|
||||||
|
protocol: TCP
|
||||||
|
- containerPort: 5201
|
||||||
|
name: iperf3-udp
|
||||||
|
protocol: UDP
|
||||||
|
{{- with .Values.server.resources }}
|
||||||
|
resources:
|
||||||
|
{{- toYaml . | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ include "iperf3-monitor.fullname" . }}-exporter-svc
|
||||||
|
labels:
|
||||||
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
spec:
|
||||||
|
type: {{ .Values.service.type }}
|
||||||
|
selector:
|
||||||
|
{{- include "iperf3-monitor.selectorLabels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
port: {{ .Values.service.port }}
|
||||||
|
targetPort: {{ .Values.service.targetPort }}
|
||||||
|
protocol: TCP
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{{- if .Values.serviceMonitor.enabled -}}
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: {{ include "iperf3-monitor.fullname" . }}-sm
|
||||||
|
labels:
|
||||||
|
{{- include "iperf3-monitor.labels" . | nindent 4 }}
|
||||||
|
release: prometheus-operator # Standard label for Prometheus Operator discovery
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "iperf3-monitor.selectorLabels" . | nindent 6 }}
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
interval: {{ .Values.serviceMonitor.interval }}
|
||||||
|
scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }}
|
||||||
|
path: /metrics
|
||||||
|
{{- end -}}
|
||||||
|
|
@ -0,0 +1,120 @@
|
||||||
|
# Default values for iperf3-monitor.
|
||||||
|
# This is a YAML-formatted file.
|
||||||
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
|
# -- Override the name of the chart.
|
||||||
|
nameOverride: ""
|
||||||
|
|
||||||
|
# -- Override the fully qualified app name.
|
||||||
|
fullnameOverride: ""
|
||||||
|
|
||||||
|
exporter:
|
||||||
|
# -- Configuration for the exporter container image.
|
||||||
|
image:
|
||||||
|
# -- The container image repository for the exporter.
|
||||||
|
repository: ghcr.io/malarinv/iperf3-prometheus-exporter # Replace with your repo URL
|
||||||
|
# -- The container image tag for the exporter. If not set, the chart's appVersion is used.
|
||||||
|
tag: ""
|
||||||
|
# -- The image pull policy for the exporter container.
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
# -- Number of exporter pod replicas. Typically 1 is sufficient.
|
||||||
|
replicaCount: 1
|
||||||
|
|
||||||
|
# -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
|
||||||
|
testInterval: 300
|
||||||
|
|
||||||
|
# -- Timeout in seconds for a single iperf3 test run.
|
||||||
|
testTimeout: 10
|
||||||
|
|
||||||
|
# -- Protocol to use for testing (tcp or udp).
|
||||||
|
testProtocol: tcp
|
||||||
|
|
||||||
|
# -- CPU and memory resource requests and limits for the exporter pod.
|
||||||
|
# @default -- A small default is provided if commented out.
|
||||||
|
resources:
|
||||||
|
{}
|
||||||
|
# requests:
|
||||||
|
# cpu: "100m"
|
||||||
|
# memory: "128Mi"
|
||||||
|
# limits:
|
||||||
|
# cpu: "500m"
|
||||||
|
# memory: "256Mi"
|
||||||
|
|
||||||
|
server:
|
||||||
|
# -- Configuration for the iperf3 server container image (DaemonSet).
|
||||||
|
image:
|
||||||
|
# -- The container image repository for the iperf3 server.
|
||||||
|
repository: networkstatic/iperf3
|
||||||
|
# -- The container image tag for the iperf3 server.
|
||||||
|
tag: latest
|
||||||
|
|
||||||
|
# -- CPU and memory resource requests and limits for the iperf3 server pods (DaemonSet).
|
||||||
|
# These should be very low as the server is mostly idle.
|
||||||
|
# @default -- A small default is provided if commented out.
|
||||||
|
resources:
|
||||||
|
{}
|
||||||
|
# requests:
|
||||||
|
# cpu: "50m"
|
||||||
|
# memory: "64Mi"
|
||||||
|
# limits:
|
||||||
|
# cpu: "100m"
|
||||||
|
# memory: "128Mi"
|
||||||
|
|
||||||
|
# -- Node selector for scheduling iperf3 server pods.
|
||||||
|
# Use this to restrict the DaemonSet to a subset of nodes.
|
||||||
|
# @default -- {} (schedule on all nodes)
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
# -- Tolerations for scheduling iperf3 server pods on tainted nodes (e.g., control-plane nodes).
|
||||||
|
# This is often necessary to include master nodes in the test mesh.
|
||||||
|
# @default -- Tolerates control-plane and master taints.
|
||||||
|
tolerations:
|
||||||
|
- key: "node-role.kubernetes.io/control-plane"
|
||||||
|
operator: "Exists"
|
||||||
|
effect: "NoSchedule"
|
||||||
|
- key: "node-role.kubernetes.io/master"
|
||||||
|
operator: "Exists"
|
||||||
|
effect: "NoSchedule"
|
||||||
|
|
||||||
|
rbac:
|
||||||
|
# -- If true, create ServiceAccount, ClusterRole, and ClusterRoleBinding for the exporter.
|
||||||
|
# Set to false if you manage RBAC externally.
|
||||||
|
create: true
|
||||||
|
|
||||||
|
serviceAccount:
|
||||||
|
# -- The name of the ServiceAccount to use for the exporter pod.
|
||||||
|
# Only used if rbac.create is false. If not set, it defaults to the chart's fullname.
|
||||||
|
name: ""
|
||||||
|
|
||||||
|
serviceMonitor:
|
||||||
|
# -- If true, create a ServiceMonitor resource for integration with Prometheus Operator.
|
||||||
|
# Requires a running Prometheus Operator in the cluster.
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
# -- Scrape interval for the ServiceMonitor. How often Prometheus scrapes the exporter metrics.
|
||||||
|
interval: 60s
|
||||||
|
|
||||||
|
# -- Scrape timeout for the ServiceMonitor. How long Prometheus waits for metrics response.
|
||||||
|
scrapeTimeout: 30s
|
||||||
|
|
||||||
|
# -- Configuration for the exporter Service.
|
||||||
|
service:
|
||||||
|
# -- Service type. ClusterIP is typically sufficient.
|
||||||
|
type: ClusterIP
|
||||||
|
# -- Port on which the exporter service is exposed.
|
||||||
|
port: 9876
|
||||||
|
# -- Target port on the exporter pod.
|
||||||
|
targetPort: 9876
|
||||||
|
|
||||||
|
# -- Optional configuration for a network policy to allow traffic to the iperf3 server DaemonSet.
|
||||||
|
# This is often necessary if you are using a network policy controller.
|
||||||
|
networkPolicy:
|
||||||
|
# -- If true, create a NetworkPolicy resource.
|
||||||
|
enabled: false
|
||||||
|
# -- Specify source selectors if needed (e.g., pods in a specific namespace).
|
||||||
|
from: []
|
||||||
|
# -- Specify namespace selectors if needed.
|
||||||
|
namespaceSelector: {}
|
||||||
|
# -- Specify pod selectors if needed.
|
||||||
|
podSelector: {}
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
# Stage 1: Build stage with dependencies
|
||||||
|
FROM python:3.9-slim as builder
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install iperf3 and build dependencies
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends gcc iperf3 libiperf-dev && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Stage 2: Final runtime stage
|
||||||
|
FROM python:3.9-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy iperf3 binary and library from the builder stage
|
||||||
|
COPY --from=builder /usr/bin/iperf3 /usr/bin/iperf3
|
||||||
|
COPY --from=builder /usr/lib/x86_64-linux-gnu/libiperf.so.0 /usr/lib/x86_64-linux-gnu/libiperf.so.0
|
||||||
|
|
||||||
|
# Copy installed Python packages from the builder stage
|
||||||
|
COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
|
||||||
|
|
||||||
|
# Copy the exporter application code
|
||||||
|
COPY exporter.py .
|
||||||
|
|
||||||
|
# Expose the metrics port
|
||||||
|
EXPOSE 9876
|
||||||
|
|
||||||
|
# Set the entrypoint
|
||||||
|
CMD ["python", "exporter.py"]
|
||||||
|
|
@ -0,0 +1,159 @@
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from kubernetes import client, config
|
||||||
|
from prometheus_client import start_http_server, Gauge
|
||||||
|
import iperf3
|
||||||
|
|
||||||
|
# --- Configuration ---
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
|
# --- Prometheus Metrics Definition ---
|
||||||
|
IPERF_BANDWIDTH_MBPS = Gauge(
|
||||||
|
'iperf_network_bandwidth_mbps',
|
||||||
|
'Network bandwidth measured by iperf3 in Megabits per second',
|
||||||
|
['source_node', 'destination_node', 'protocol']
|
||||||
|
)
|
||||||
|
IPERF_JITTER_MS = Gauge(
|
||||||
|
'iperf_network_jitter_ms',
|
||||||
|
'Network jitter measured by iperf3 in milliseconds',
|
||||||
|
['source_node', 'destination_node', 'protocol']
|
||||||
|
)
|
||||||
|
IPERF_PACKETS_TOTAL = Gauge(
|
||||||
|
'iperf_network_packets_total',
|
||||||
|
'Total packets transmitted or received during the iperf3 test',
|
||||||
|
['source_node', 'destination_node', 'protocol']
|
||||||
|
)
|
||||||
|
IPERF_LOST_PACKETS = Gauge(
|
||||||
|
'iperf_network_lost_packets_total',
|
||||||
|
'Total lost packets during the iperf3 UDP test',
|
||||||
|
['source_node', 'destination_node', 'protocol']
|
||||||
|
)
|
||||||
|
IPERF_TEST_SUCCESS = Gauge(
|
||||||
|
'iperf_test_success',
|
||||||
|
'Indicates if the iperf3 test was successful (1) or failed (0)',
|
||||||
|
['source_node', 'destination_node', 'protocol']
|
||||||
|
)
|
||||||
|
|
||||||
|
def discover_iperf_servers():
|
||||||
|
"""
|
||||||
|
Discover iperf3 server pods in the cluster using the Kubernetes API.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Load in-cluster configuration
|
||||||
|
# Assumes the exporter runs in a pod with a service account having permissions
|
||||||
|
config.load_incluster_config()
|
||||||
|
v1 = client.CoreV1Api()
|
||||||
|
|
||||||
|
namespace = os.getenv('IPERF_SERVER_NAMESPACE', 'default')
|
||||||
|
label_selector = os.getenv('IPERF_SERVER_LABEL_SELECTOR', 'app=iperf3-server')
|
||||||
|
|
||||||
|
logging.info(f"Discovering iperf3 servers with label '{label_selector}' in namespace '{namespace}'")
|
||||||
|
|
||||||
|
# List pods across all namespaces with the specified label selector
|
||||||
|
# Note: list_pod_for_all_namespaces requires cluster-wide permissions
|
||||||
|
ret = v1.list_pod_for_all_namespaces(label_selector=label_selector, watch=False)
|
||||||
|
|
||||||
|
servers = []
|
||||||
|
for i in ret.items:
|
||||||
|
# Ensure pod has an IP and is running
|
||||||
|
if i.status.pod_ip and i.status.phase == 'Running':
|
||||||
|
servers.append({
|
||||||
|
'ip': i.status.pod_ip,
|
||||||
|
'node_name': i.spec.node_name
|
||||||
|
})
|
||||||
|
logging.info(f"Discovered {len(servers)} iperf3 server pods.")
|
||||||
|
return servers
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error discovering iperf servers: {e}")
|
||||||
|
return [] # Return empty list on error to avoid crashing the loop
|
||||||
|
|
||||||
|
def run_iperf_test(server_ip, server_port, protocol, source_node, dest_node):
|
||||||
|
"""
|
||||||
|
Runs a single iperf3 test and updates Prometheus metrics.
|
||||||
|
"""
|
||||||
|
logging.info(f"Running iperf3 test from {source_node} to {dest_node} ({server_ip}:{server_port}) using {protocol.upper()}")
|
||||||
|
|
||||||
|
client = iperf3.Client()
|
||||||
|
client.server_hostname = server_ip
|
||||||
|
client.port = server_port
|
||||||
|
client.protocol = protocol
|
||||||
|
# Duration of the test (seconds)
|
||||||
|
client.duration = int(os.getenv('IPERF_TEST_DURATION', 5))
|
||||||
|
# Output results as JSON for easy parsing
|
||||||
|
client.json_output = True
|
||||||
|
|
||||||
|
result = client.run()
|
||||||
|
|
||||||
|
# Parse results and update metrics
|
||||||
|
parse_and_publish_metrics(result, source_node, dest_node, protocol)
|
||||||
|
|
||||||
|
def parse_and_publish_metrics(result, source_node, dest_node, protocol):
|
||||||
|
"""
|
||||||
|
Parses the iperf3 result and updates Prometheus gauges.
|
||||||
|
Handles both successful and failed tests.
|
||||||
|
"""
|
||||||
|
labels = {'source_node': source_node, 'destination_node': dest_node, 'protocol': protocol}
|
||||||
|
|
||||||
|
if result and result.error:
|
||||||
|
logging.error(f"Test from {source_node} to {dest_node} failed: {result.error}")
|
||||||
|
IPERF_TEST_SUCCESS.labels(**labels).set(0)
|
||||||
|
# Set metrics to 0 on failure
|
||||||
|
try:
|
||||||
|
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
|
||||||
|
IPERF_JITTER_MS.labels(**labels).set(0)
|
||||||
|
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
||||||
|
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
||||||
|
except KeyError:
|
||||||
|
# Labels might not be registered yet if this is the first failure
|
||||||
|
pass
|
||||||
|
return
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
logging.error(f"Test from {source_node} to {dest_node} failed to return a result object.")
|
||||||
|
IPERF_TEST_SUCCESS.labels(**labels).set(0)
|
||||||
|
try:
|
||||||
|
IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
|
||||||
|
IPERF_JITTER_MS.labels(**labels).set(0)
|
||||||
|
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
||||||
|
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
IPERF_TEST_SUCCESS.labels(**labels).set(1)
|
||||||
|
|
||||||
|
# The summary data is typically in result.json['end']['sum_sent'] or result.json['end']['sum_received']
|
||||||
|
# The iperf3-python client often exposes this directly as attributes like sent_Mbps or received_Mbps
|
||||||
|
# For TCP, we usually care about the received bandwidth on the client side (which is the exporter)
|
||||||
|
# For UDP, the client report contains jitter, lost packets, etc.
|
||||||
|
bandwidth_mbps = 0
|
||||||
|
if hasattr(result, 'received_Mbps') and result.received_Mbps is not None:
|
||||||
|
bandwidth_mbps = result.received_Mbps
|
||||||
|
elif hasattr(result, 'sent_Mbps') and result.sent_Mbps is not None:
|
||||||
|
# Fallback, though received_Mbps is usually more relevant for TCP client
|
||||||
|
bandwidth_mbps = result.sent_Mbps
|
||||||
|
# Add a check for the raw JSON output structure as a fallback
|
||||||
|
elif result.json and 'end' in result.json and 'sum_received' in result.json['end'] and result.json['end']['sum_received']['bits_per_second'] is not None:
|
||||||
|
bandwidth_mbps = result.json['end']['sum_received']['bits_per_second'] / 1000000
|
||||||
|
elif result.json and 'end' in result.json and 'sum_sent' in result.json['end'] and result.json['end']['sum_sent']['bits_per_second'] is not None:
|
||||||
|
bandwidth_mbps = result.json['end']['sum_sent']['bits_per_second'] / 1000000
|
||||||
|
|
||||||
|
|
||||||
|
IPERF_BANDWIDTH_MBPS.labels(**labels).set(bandwidth_mbps)
|
||||||
|
|
||||||
|
# UDP specific metrics
|
||||||
|
if protocol == 'udp':
|
||||||
|
# iperf3-python exposes UDP results directly
|
||||||
|
IPERF_JITTER_MS.labels(**labels).set(result.jitter_ms if hasattr(result, 'jitter_ms') and result.jitter_ms is not None else 0)
|
||||||
|
IPERF_PACKETS_TOTAL.labels(**labels).set(result.packets if hasattr(result, 'packets') and result.packets is not None else 0)
|
||||||
|
IPERF_LOST_PACKETS.labels(**labels).set(result.lost_packets if hasattr(result, 'lost_packets') and result.lost_packets is not None else 0)
|
||||||
|
else:
|
||||||
|
# Ensure UDP metrics are zeroed or absent for TCP tests
|
||||||
|
try:
|
||||||
|
IPERF_JITTER_MS.labels(**labels).set(0)
|
||||||
|
IPERF_PACKETS_TOTAL.labels(**labels).set(0)
|
||||||
|
IPERF_LOST_PACKETS.labels(**labels).set(0)
|
||||||
|
except KeyError:
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
prometheus-client
|
||||||
|
iperf3
|
||||||
|
kubernetes
|
||||||
Loading…
Reference in New Issue