diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..5c7d37c
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,95 @@
+name: Release iperf3-monitor
+
+on:
+  push:
+    tags:
+      - 'v*.*.*'
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  lint-and-test:
+    name: Lint and Test
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+
+      - name: Set up Helm
+        uses: azure/setup-helm@v3
+        with:
+          version: v3.10.0
+
+      - name: Helm Lint
+        run: helm lint ./charts/iperf3-monitor
+
+  build-and-publish-image:
+    name: Build and Publish Docker Image
+    runs-on: ubuntu-latest
+    needs: lint-and-test
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v4
+        with:
+          context: ./exporter
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+  package-and-publish-chart:
+    name: Package and Publish Helm Chart
+    runs-on: ubuntu-latest
+    needs: build-and-publish-image
+    permissions:
+      contents: write # Needed by stefanprodan/helm-gh-pages to push to gh-pages branch
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0 # Fetch all history for helm-gh-pages to calculate chart index
+
+      - name: Set up Helm
+        uses: azure/setup-helm@v3
+        with:
+          version: v3.10.0
+
+      - name: Install yq
+        run: |
+          sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq &&\
+          sudo chmod +x /usr/bin/yq
+
+      - name: Set Chart Version from Tag
+        run: |
+          VERSION=$(echo "${{ github.ref_name }}" | sed 's/^v//')
+          yq e -i '.version = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
+          yq e -i '.appVersion = strenv(VERSION)' ./charts/iperf3-monitor/Chart.yaml
+          cat ./charts/iperf3-monitor/Chart.yaml # Optional: print updated Chart.yaml
+
+      - name: Publish Helm chart
+        uses: stefanprodan/helm-gh-pages@v1.6.0
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          charts_dir: ./charts
+          charts_url: https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }}
+```
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e58ae8f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,40 @@
+```
+# Byte-code files
+*.pyc
+*.pyo
+*.pyd
+__pycache__/
+
+# Distribution / build outputs
+dist/
+build/
+*.egg-info/
+.tox/
+
+# Virtual environments
+.venv/
+venv/
+env/
+
+# Editor/IDE specific files
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Logs and temporary files
+*.log
+*.tmp
+
+# OS generated files
+.DS_Store
+Thumbs.db
+
+# Docker
+!Dockerfile
+.dockerignore
+
+# Helm
+!charts/iperf3-monitor/.helmignore
+charts/*.tgz # Ignore packaged chart files
+```
\ No newline at end of file
diff --git a/Kubernetes Network Performance Monitoring.md b/Kubernetes Network Performance Monitoring.md
new file mode 100755
index 0000000..2868fa5
--- /dev/null
+++ b/Kubernetes Network Performance Monitoring.md	
@@ -0,0 +1,1505 @@
+# **Architecting a Kubernetes-Native Network Performance Monitoring Service with iperf3, Prometheus, and Helm**
+
+## **Section 1: Architectural Blueprint for Continuous Network Validation**
+
+### **1.1 Introduction to Proactive Network Monitoring in Kubernetes** {#introduction-to-proactive-network-monitoring-in-kubernetes}
+
+In modern cloud-native infrastructures, Kubernetes has emerged as the de
+facto standard for container orchestration, simplifying the deployment,
+scaling, and management of complex applications.^1^ However, the very
+dynamism and abstraction that make Kubernetes powerful also introduce
+significant challenges in diagnosing network performance issues. The
+ephemeral nature of pods, the complexity of overlay networks provided by
+Container Network Interfaces (CNIs), and the multi-layered traffic
+routing through Services and Ingress controllers can obscure the root
+causes of latency, packet loss, and throughput degradation.
+
+Traditional, reactive troubleshooting---investigating network problems
+only after an application has failed---is insufficient in these
+environments. Performance bottlenecks can be subtle, intermittent, and
+difficult to reproduce, often manifesting as degraded user experience
+long before they trigger hard failures.^1^ To maintain the reliability
+and performance of critical workloads, engineering teams must shift from
+a reactive to a proactive stance. This requires a system that performs
+continuous, automated validation of the underlying network fabric,
+treating network health not as an assumption but as a measurable,
+time-series metric.
+
+This document outlines the architecture and implementation of a
+comprehensive, Kubernetes-native network performance monitoring service.
+The solution leverages a suite of industry-standard, open-source tools
+to provide continuous, actionable insights into cluster network health.
+The core components are:
+
+- **iperf3:** A widely adopted tool for active network performance
+  > measurement, used to generate traffic and measure maximum achievable
+  > bandwidth, jitter, and packet loss between two points.^2^
+
+- **Prometheus:** A powerful, open-source monitoring and alerting system
+  > that has become the standard for collecting and storing time-series
+  > metrics in the Kubernetes ecosystem.^3^
+
+- **Grafana:** A leading visualization tool for creating rich,
+  > interactive dashboards from various data sources, including
+  > Prometheus, enabling intuitive analysis of complex datasets.^4^
+
+By combining these components into a cohesive, automated service, we can
+transform abstract network performance into a concrete, queryable, and
+visualizable stream of data, enabling teams to detect and address
+infrastructure-level issues before they impact end-users.^6^
+
+### **1.2 The Core Architectural Pattern: Decoupled Test Endpoints and a Central Orchestrator** {#the-core-architectural-pattern-decoupled-test-endpoints-and-a-central-orchestrator}
+
+The foundation of this monitoring service is a robust, decoupled
+architectural pattern designed for scalability and resilience within a
+dynamic Kubernetes environment. The design separates the passive test
+endpoints from the active test orchestrator, a critical distinction that
+ensures the system is both efficient and aligned with Kubernetes
+operational principles.
+
+The data flow and component interaction can be visualized as follows:
+
+1.  A **DaemonSet** deploys an iperf3 server pod onto every node in the
+    > cluster, creating a mesh of passive test targets.
+
+2.  A central **Deployment**, the iperf3-exporter, uses the Kubernetes
+    > API to discover the IP addresses of all iperf3 server pods.
+
+3.  The iperf3-exporter periodically orchestrates tests, running an
+    > iperf3 client to connect to each server pod and measure network
+    > performance.
+
+4.  The exporter parses the JSON output from iperf3, transforms the
+    > results into Prometheus metrics, and exposes them on a /metrics
+    > HTTP endpoint.
+
+5.  A **Prometheus** server, configured via a **ServiceMonitor**,
+    > scrapes the /metrics endpoint of the exporter, ingesting the
+    > performance data into its time-series database.
+
+6.  A **Grafana** instance, using Prometheus as a data source,
+    > visualizes the metrics in a purpose-built dashboard, providing
+    > heatmaps and time-series graphs of node-to-node bandwidth, jitter,
+    > and packet loss.
+
+This architecture is composed of three primary logical components:
+
+- **Component 1: The iperf3-server DaemonSet.** To accurately measure
+  > network performance between any two nodes (N-to-N), an iperf3 server
+  > process must be running and accessible on every node. The DaemonSet
+  > is the canonical Kubernetes controller for this exact use case. It
+  > guarantees that a copy of a specific pod runs on all, or a selected
+  > subset of, nodes within the cluster.^7^ When a new node joins the
+  > cluster, the  
+  > DaemonSet controller automatically deploys an iperf3-server pod to
+  > it; conversely, when a node is removed, the pod is garbage
+  > collected. This ensures the mesh of test endpoints is always in sync
+  > with the state of the cluster, requiring zero manual
+  > intervention.^9^ This pattern of using a  
+  > DaemonSet to deploy iperf3 across a cluster is a well-established
+  > practice for network validation.^11^
+
+- **Component 2: The iperf3-exporter Deployment.** A separate,
+  > centralized component is required to act as the test orchestrator.
+  > This component is responsible for initiating the iperf3 client
+  > connections, executing the tests, parsing the results, and exposing
+  > them as Prometheus metrics. Since this is a stateless service whose
+  > primary function is to perform a periodic task, a Deployment is the
+  > ideal controller.^8^ A  
+  > Deployment ensures a specified number of replicas are running,
+  > provides mechanisms for rolling updates, and allows for independent
+  > resource management and lifecycle control, decoupled from the
+  > iperf3-server pods it tests against.^10^
+
+- **Component 3: The Prometheus & Grafana Stack.** The monitoring
+  > backend is provided by the kube-prometheus-stack, a comprehensive
+  > Helm chart that deploys Prometheus, Grafana, Alertmanager, and the
+  > necessary exporters for cluster monitoring.^4^ Our custom monitoring
+  > service is designed to integrate seamlessly with this stack,
+  > leveraging its Prometheus Operator for automatic scrape
+  > configuration and its Grafana instance for visualization.
+
+### **1.3 Architectural Justification and Design Rationale** {#architectural-justification-and-design-rationale}
+
+The primary strength of this architecture lies in its deliberate
+separation of concerns, a design choice that yields significant benefits
+in resilience, scalability, and operational efficiency. The DaemonSet is
+responsible for the *presence* of test endpoints, while the Deployment
+handles the *orchestration* of the tests. This decoupling is not
+arbitrary; it is a direct consequence of applying Kubernetes-native
+principles to the problem.
+
+The logical progression is as follows: The requirement to continuously
+measure N-to-N node bandwidth necessitates that iperf3 server processes
+are available on all N nodes to act as targets. The most reliable,
+self-healing, and automated method to achieve this \"one-pod-per-node\"
+pattern in Kubernetes is to use a DaemonSet.^7^ This makes the server
+deployment automatically scale with the cluster itself. Next, a process
+is needed to trigger the tests against these servers. This
+\"orchestrator\" is a logically distinct, active service. It needs to be
+reliable and potentially scalable, but it does not need to run on every
+single node. The standard Kubernetes object for managing such stateless
+services is a
+
+Deployment.^8^
+
+This separation allows for independent and appropriate resource
+allocation. The iperf3-server pods are extremely lightweight, consuming
+minimal resources while idle. The iperf3-exporter, however, may be more
+CPU-intensive during the brief periods it is actively running tests. By
+placing them in different workload objects (DaemonSet and Deployment),
+we can configure their resource requests and limits independently. This
+prevents the monitoring workload from interfering with or being starved
+by application workloads, a crucial consideration for any
+production-grade system. This design is fundamentally more robust and
+scalable than simpler, monolithic approaches, such as a single script
+that attempts to manage both server and client lifecycles.^12^
+
+## **Section 2: Implementing the iperf3-prometheus-exporter**
+
+The heart of this monitoring solution is the iperf3-prometheus-exporter,
+a custom application responsible for orchestrating the network tests and
+translating their results into a format that Prometheus can ingest. This
+section provides a detailed breakdown of its implementation, from
+technology selection to the final container image.
+
+### **2.1 Technology Selection: Python for Agility and Ecosystem** {#technology-selection-python-for-agility-and-ecosystem}
+
+Python was selected as the implementation language for the exporter due
+to its powerful ecosystem and rapid development capabilities. The
+availability of mature, well-maintained libraries for interacting with
+both Prometheus and Kubernetes significantly accelerates the development
+of a robust, cloud-native application.
+
+The key libraries leveraged are:
+
+- **prometheus-client:** The official Python client library for
+  > instrumenting applications with Prometheus metrics. It provides a
+  > simple API for defining metrics (Gauges, Counters, etc.) and
+  > exposing them via an HTTP server, handling much of the boilerplate
+  > required for creating a valid exporter.^13^
+
+- **iperf3-python:** A clean, high-level Python wrapper around the
+  > iperf3 C library. It allows for programmatic control of iperf3
+  > clients and servers, and it can directly parse the JSON output of a
+  > test into a convenient Python object, eliminating the need for
+  > manual process management and output parsing.^15^
+
+- **kubernetes:** The official Python client library for the Kubernetes
+  > API. This library is essential for the exporter to become
+  > \"Kubernetes-aware,\" enabling it to dynamically discover the
+  > iperf3-server pods it needs to test against by querying the API
+  > server directly.
+
+### **2.2 Core Exporter Logic (Annotated Python Code)** {#core-exporter-logic-annotated-python-code}
+
+The exporter\'s logic can be broken down into five distinct steps, which
+together form a continuous loop of discovery, testing, and reporting.
+
+#### **Step 1: Initialization and Metric Definition**
+
+The application begins by importing the necessary libraries and defining
+the Prometheus metrics that will be exposed. We use a Gauge metric, as
+bandwidth is a value that can go up or down. Labels are crucial for
+providing context; they allow us to slice and dice the data in
+Prometheus and Grafana.
+
+> Python
+
+import os  
+import time  
+import logging  
+from kubernetes import client, config  
+from prometheus_client import start_http_server, Gauge  
+import iperf3  
+  
+\# \-\-- Configuration \-\--  
+\# Configure logging  
+logging.basicConfig(level=logging.INFO, format=\'%(asctime)s -
+%(levelname)s - %(message)s\')  
+  
+\# \-\-- Prometheus Metrics Definition \-\--  
+IPERF_BANDWIDTH_MBPS = Gauge(  
+\'iperf_network_bandwidth_mbps\',  
+\'Network bandwidth measured by iperf3 in Megabits per second\',  
+\[\'source_node\', \'destination_node\', \'protocol\'\]  
+)  
+IPERF_JITTER_MS = Gauge(  
+\'iperf_network_jitter_ms\',  
+\'Network jitter measured by iperf3 in milliseconds\',  
+\[\'source_node\', \'destination_node\', \'protocol\'\]  
+)  
+IPERF_PACKETS_TOTAL = Gauge(  
+\'iperf_network_packets_total\',  
+\'Total packets transmitted or received during the iperf3 test\',  
+\[\'source_node\', \'destination_node\', \'protocol\'\]  
+)  
+IPERF_LOST_PACKETS = Gauge(  
+\'iperf_network_lost_packets_total\',  
+\'Total lost packets during the iperf3 UDP test\',  
+\[\'source_node\', \'destination_node\', \'protocol\'\]  
+)  
+IPERF_TEST_SUCCESS = Gauge(  
+\'iperf_test_success\',  
+\'Indicates if the iperf3 test was successful (1) or failed (0)\',  
+\[\'source_node\', \'destination_node\', \'protocol\'\]  
+)
+
+#### **Step 2: Kubernetes-Aware Target Discovery**
+
+A static list of test targets is an anti-pattern in a dynamic
+environment like Kubernetes.^16^ The exporter must dynamically discover
+its targets. This is achieved by using the Kubernetes Python client to
+query the API server for all pods that match the label selector of our
+
+iperf3-server DaemonSet (e.g., app=iperf3-server). The function returns
+a list of dictionaries, each containing the pod\'s IP address and the
+name of the node it is running on.
+
+This dynamic discovery is what transforms the exporter from a simple
+script into a resilient, automated service. It adapts to cluster scaling
+events without any manual intervention. The logical path is clear:
+Kubernetes clusters are dynamic, so a hardcoded list of IPs would become
+stale instantly. The API server is the single source of truth for the
+cluster\'s state. Therefore, the exporter must query this API, which in
+turn necessitates including the Kubernetes client library and
+configuring the appropriate Role-Based Access Control (RBAC) permissions
+for its ServiceAccount.
+
+> Python
+
+def discover_iperf_servers():  
+\"\"\"  
+Discover iperf3 server pods in the cluster using the Kubernetes API.  
+\"\"\"  
+try:  
+\# Load in-cluster configuration  
+config.load_incluster_config()  
+v1 = client.CoreV1Api()  
+  
+namespace = os.getenv(\'IPERF_SERVER_NAMESPACE\', \'default\')  
+label_selector = os.getenv(\'IPERF_SERVER_LABEL_SELECTOR\',
+\'app=iperf3-server\')  
+  
+logging.info(f\"Discovering iperf3 servers with label
+\'{label_selector}\' in namespace \'{namespace}\'\")  
+  
+ret = v1.list_pod_for_all_namespaces(label_selector=label_selector,
+watch=False)  
+  
+servers =  
+for i in ret.items:  
+\# Ensure pod has an IP and is running  
+if i.status.pod_ip and i.status.phase == \'Running\':  
+servers.append({  
+\'ip\': i.status.pod_ip,  
+\'node_name\': i.spec.node_name  
+})  
+logging.info(f\"Discovered {len(servers)} iperf3 server pods.\")  
+return servers  
+except Exception as e:  
+logging.error(f\"Error discovering iperf servers: {e}\")  
+return
+
+#### **Step 3: The Test Orchestration Loop**
+
+The main function of the application contains an infinite while True
+loop that orchestrates the entire process. It periodically discovers the
+servers, creates a list of test pairs (node-to-node), and then executes
+an iperf3 test for each pair.
+
+> Python
+
+def run_iperf_test(server_ip, server_port, protocol, source_node,
+dest_node):  
+\"\"\"  
+Runs a single iperf3 test and updates Prometheus metrics.  
+\"\"\"  
+logging.info(f\"Running iperf3 test from {source_node} to {dest_node}
+({server_ip}:{server_port}) using {protocol.upper()}\")  
+  
+client = iperf3.Client()  
+client.server_hostname = server_ip  
+client.port = server_port  
+client.protocol = protocol  
+client.duration = int(os.getenv(\'IPERF_TEST_DURATION\', 5))  
+client.json_output = True \# Critical for parsing  
+  
+result = client.run()  
+  
+\# Parse results and update metrics  
+parse_and_publish_metrics(result, source_node, dest_node, protocol)  
+  
+def main_loop():  
+\"\"\"  
+Main orchestration loop.  
+\"\"\"  
+test_interval = int(os.getenv(\'IPERF_TEST_INTERVAL\', 300))  
+server_port = int(os.getenv(\'IPERF_SERVER_PORT\', 5201))  
+protocol = os.getenv(\'IPERF_TEST_PROTOCOL\', \'tcp\').lower()  
+source_node_name = os.getenv(\'SOURCE_NODE_NAME\') \# Injected via
+Downward API  
+  
+if not source_node_name:  
+logging.error(\"SOURCE_NODE_NAME environment variable not set.
+Exiting.\")  
+return  
+  
+while True:  
+servers = discover_iperf_servers()  
+  
+for server in servers:  
+\# Avoid testing a node against itself  
+if server\[\'node_name\'\] == source_node_name:  
+continue  
+  
+run_iperf_test(server\[\'ip\'\], server_port, protocol,
+source_node_name, server\[\'node_name\'\])  
+  
+logging.info(f\"Completed test cycle. Sleeping for {test_interval}
+seconds.\")  
+time.sleep(test_interval)
+
+#### **Step 4: Parsing and Publishing Metrics**
+
+After each test run, a dedicated function parses the JSON result object
+provided by the iperf3-python library.^15^ It extracts the key
+performance indicators and uses them to set the value of the
+corresponding Prometheus
+
+Gauge, applying the correct labels for source and destination nodes.
+Robust error handling ensures that failed tests are also recorded as a
+metric, which is vital for alerting.
+
+> Python
+
+def parse_and_publish_metrics(result, source_node, dest_node,
+protocol):  
+\"\"\"  
+Parses the iperf3 result and updates Prometheus gauges.  
+\"\"\"  
+labels = {\'source_node\': source_node, \'destination_node\': dest_node,
+\'protocol\': protocol}  
+  
+if result.error:  
+logging.error(f\"Test from {source_node} to {dest_node} failed:
+{result.error}\")  
+IPERF_TEST_SUCCESS.labels(\*\*labels).set(0)  
+\# Clear previous successful metrics for this path  
+IPERF_BANDWIDTH_MBPS.labels(\*\*labels).set(0)  
+IPERF_JITTER_MS.labels(\*\*labels).set(0)  
+return  
+  
+IPERF_TEST_SUCCESS.labels(\*\*labels).set(1)  
+  
+\# The summary data is in result.sent_Mbps or result.received_Mbps
+depending on direction  
+\# For simplicity, we check for available attributes.  
+if hasattr(result, \'sent_Mbps\'):  
+bandwidth_mbps = result.sent_Mbps  
+elif hasattr(result, \'received_Mbps\'):  
+bandwidth_mbps = result.received_Mbps  
+else:  
+\# Fallback for different iperf3 versions/outputs  
+bandwidth_mbps = result.Mbps if hasattr(result, \'Mbps\') else 0  
+  
+IPERF_BANDWIDTH_MBPS.labels(\*\*labels).set(bandwidth_mbps)  
+  
+if protocol == \'udp\':  
+IPERF_JITTER_MS.labels(\*\*labels).set(result.jitter_ms if
+hasattr(result, \'jitter_ms\') else 0)  
+IPERF_PACKETS_TOTAL.labels(\*\*labels).set(result.packets if
+hasattr(result, \'packets\') else 0)  
+IPERF_LOST_PACKETS.labels(\*\*labels).set(result.lost_packets if
+hasattr(result, \'lost_packets\') else 0)
+
+#### **Step 5: Exposing the /metrics Endpoint**
+
+Finally, the main execution block starts a simple HTTP server using the
+prometheus-client library. This server exposes the collected metrics on
+the standard /metrics path, ready to be scraped by Prometheus.^13^
+
+> Python
+
+if \_\_name\_\_ == \'\_\_main\_\_\':  
+\# Start the Prometheus metrics server  
+listen_port = int(os.getenv(\'LISTEN_PORT\', 9876))  
+start_http_server(listen_port)  
+logging.info(f\"Prometheus exporter listening on port {listen_port}\")  
+  
+\# Start the main orchestration loop  
+main_loop()
+
+### **2.3 Containerizing the Exporter (Dockerfile)** {#containerizing-the-exporter-dockerfile}
+
+To deploy the exporter in Kubernetes, it must be packaged into a
+container image. A multi-stage Dockerfile is used to create a minimal
+and more secure final image by separating the build environment from the
+runtime environment. This is a standard best practice for producing
+production-ready containers.^14^
+
+> Dockerfile
+
+\# Stage 1: Build stage with dependencies  
+FROM python:3.9-slim as builder  
+  
+WORKDIR /app  
+  
+\# Install iperf3 and build dependencies  
+RUN apt-get update && \\  
+apt-get install -y \--no-install-recommends gcc iperf3 libiperf-dev &&
+\\  
+rm -rf /var/lib/apt/lists/\*  
+  
+\# Install Python dependencies  
+COPY requirements.txt.  
+RUN pip install \--no-cache-dir -r requirements.txt  
+  
+\# Stage 2: Final runtime stage  
+FROM python:3.9-slim  
+  
+WORKDIR /app  
+  
+\# Copy iperf3 binary and library from the builder stage  
+COPY \--from=builder /usr/bin/iperf3 /usr/bin/iperf3  
+COPY \--from=builder /usr/lib/x86_64-linux-gnu/libiperf.so.0
+/usr/lib/x86_64-linux-gnu/libiperf.so.0  
+  
+\# Copy installed Python packages from the builder stage  
+COPY \--from=builder /usr/local/lib/python3.9/site-packages
+/usr/local/lib/python3.9/site-packages  
+  
+\# Copy the exporter application code  
+COPY exporter.py.  
+  
+\# Expose the metrics port  
+EXPOSE 9876  
+  
+\# Set the entrypoint  
+CMD \[\"python\", \"exporter.py\"\]
+
+The corresponding requirements.txt would contain:
+
+prometheus-client  
+iperf3  
+kubernetes
+
+## **Section 3: Kubernetes Manifests and Deployment Strategy**
+
+With the architectural blueprint defined and the exporter application
+containerized, the next step is to translate this design into
+declarative Kubernetes manifests. These YAML files define the necessary
+Kubernetes objects to deploy, configure, and manage the monitoring
+service. Using static manifests here provides a clear foundation before
+they are parameterized into a Helm chart in the next section.
+
+### **3.1 The iperf3-server DaemonSet** {#the-iperf3-server-daemonset}
+
+The iperf3-server component is deployed as a DaemonSet to ensure an
+instance of the server pod runs on every eligible node in the
+cluster.^7^ This creates the ubiquitous grid of test endpoints required
+for comprehensive N-to-N testing.
+
+Key fields in this manifest include:
+
+- **spec.selector**: Connects the DaemonSet to the pods it manages via
+  > labels.
+
+- **spec.template.metadata.labels**: The label app: iperf3-server is
+  > applied to the pods, which is crucial for discovery by both the
+  > iperf3-exporter and Kubernetes Services.
+
+- **spec.template.spec.containers**: Defines the iperf3 container, using
+  > a public image and running the iperf3 -s command to start it in
+  > server mode.
+
+- **spec.template.spec.tolerations**: This is often necessary to allow
+  > the DaemonSet to schedule pods on control-plane (master) nodes,
+  > which may have taints preventing normal workloads from running
+  > there. This ensures the entire cluster, including masters, is part
+  > of the test mesh.
+
+- **spec.template.spec.hostNetwork: true**: This is a critical setting.
+  > By running the server pods on the host\'s network namespace, we
+  > bypass the Kubernetes network overlay (CNI) for the server side.
+  > This allows the test to measure the raw performance of the
+  > underlying node network interface, which is often the primary goal
+  > of infrastructure-level testing.
+
+> YAML
+
+apiVersion: apps/v1  
+kind: DaemonSet  
+metadata:  
+name: iperf3-server  
+labels:  
+app: iperf3-server  
+spec:  
+selector:  
+matchLabels:  
+app: iperf3-server  
+template:  
+metadata:  
+labels:  
+app: iperf3-server  
+spec:  
+\# Run on the host network to measure raw node-to-node performance  
+hostNetwork: true  
+\# Tolerations to allow scheduling on control-plane nodes  
+tolerations:  
+- key: \"node-role.kubernetes.io/control-plane\"  
+operator: \"Exists\"  
+effect: \"NoSchedule\"  
+- key: \"node-role.kubernetes.io/master\"  
+operator: \"Exists\"  
+effect: \"NoSchedule\"  
+containers:  
+- name: iperf3-server  
+image: networkstatic/iperf3:latest  
+args: \[\"-s\"\] \# Start in server mode  
+ports:  
+- containerPort: 5201  
+name: iperf3  
+protocol: TCP  
+- containerPort: 5201  
+name: iperf3-udp  
+protocol: UDP  
+resources:  
+requests:  
+cpu: \"50m\"  
+memory: \"64Mi\"  
+limits:  
+cpu: \"100m\"  
+memory: \"128Mi\"
+
+### **3.2 The iperf3-exporter Deployment** {#the-iperf3-exporter-deployment}
+
+The iperf3-exporter is deployed as a Deployment, as it is a stateless
+application that orchestrates the tests.^14^ Only one replica is
+typically needed, as it can sequentially test all nodes.
+
+Key fields in this manifest are:
+
+- **spec.replicas: 1**: A single instance is sufficient for most
+  > clusters.
+
+- **spec.template.spec.serviceAccountName**: This assigns the custom
+  > ServiceAccount (defined next) to the pod, granting it the necessary
+  > permissions to talk to the Kubernetes API.
+
+- **spec.template.spec.containers.env**: The SOURCE_NODE_NAME
+  > environment variable is populated using the Downward API. This is
+  > how the exporter pod knows which node *it* is running on, allowing
+  > it to skip testing against itself.
+
+- **spec.template.spec.containers.image**: This points to the custom
+  > exporter image built in the previous section.
+
+> YAML
+
+apiVersion: apps/v1  
+kind: Deployment  
+metadata:  
+name: iperf3-exporter  
+labels:  
+app: iperf3-exporter  
+spec:  
+replicas: 1  
+selector:  
+matchLabels:  
+app: iperf3-exporter  
+template:  
+metadata:  
+labels:  
+app: iperf3-exporter  
+spec:  
+serviceAccountName: iperf3-exporter-sa  
+containers:  
+- name: iperf3-exporter  
+image: your-repo/iperf3-prometheus-exporter:latest \# Replace with your
+image  
+ports:  
+- containerPort: 9876  
+name: metrics  
+env:  
+\# Use the Downward API to inject the node name this pod is running on  
+- name: SOURCE_NODE_NAME  
+valueFrom:  
+fieldRef:  
+fieldPath: spec.nodeName  
+\# Other configurations for the exporter script  
+- name: IPERF_TEST_INTERVAL  
+value: \"300\"  
+- name: IPERF_SERVER_LABEL_SELECTOR  
+value: \"app=iperf3-server\"  
+resources:  
+requests:  
+cpu: \"100m\"  
+memory: \"128Mi\"  
+limits:  
+cpu: \"500m\"  
+memory: \"256Mi\"
+
+### **3.3 RBAC: Granting Necessary Permissions** {#rbac-granting-necessary-permissions}
+
+For the exporter to perform its dynamic discovery of iperf3-server pods,
+it must be granted specific, limited permissions to read information
+from the Kubernetes API. This is accomplished through a ServiceAccount,
+a ClusterRole, and a ClusterRoleBinding.
+
+- **ServiceAccount**: Provides an identity for the exporter pod within
+  > the cluster.
+
+- **ClusterRole**: Defines a set of permissions. Here, we grant get,
+  > list, and watch access to pods. These are the minimum required
+  > permissions for the discovery function to work. The role is a
+  > ClusterRole because the exporter needs to find pods across all
+  > namespaces where servers might be running.
+
+- **ClusterRoleBinding**: Links the ServiceAccount to the ClusterRole,
+  > effectively granting the permissions to any pod that uses the
+  > ServiceAccount.
+
+> YAML
+
+apiVersion: v1  
+kind: ServiceAccount  
+metadata:  
+name: iperf3-exporter-sa  
+\-\--  
+apiVersion: rbac.authorization.k8s.io/v1  
+kind: ClusterRole  
+metadata:  
+name: iperf3-exporter-role  
+rules:  
+- apiGroups: \[\"\"\]  
+resources: \[\"pods\"\]  
+verbs: \[\"get\", \"list\", \"watch\"\]  
+\-\--  
+apiVersion: rbac.authorization.k8s.io/v1  
+kind: ClusterRoleBinding  
+metadata:  
+name: iperf3-exporter-rb  
+subjects:  
+- kind: ServiceAccount  
+name: iperf3-exporter-sa  
+namespace: default \# The namespace where the exporter is deployed  
+roleRef:  
+kind: ClusterRole  
+name: iperf3-exporter-role  
+apiGroup: rbac.authorization.k8s.io
+
+### **3.4 Network Exposure: Service and ServiceMonitor** {#network-exposure-service-and-servicemonitor}
+
+To make the exporter\'s metrics available to Prometheus, we need two
+final objects. The Service exposes the exporter pod\'s metrics port
+within the cluster, and the ServiceMonitor tells the Prometheus Operator
+how to find and scrape that service.
+
+This ServiceMonitor-based approach is the linchpin for a GitOps-friendly
+integration. Instead of manually editing the central Prometheus
+configuration file---a brittle and non-declarative process---we deploy a
+ServiceMonitor custom resource alongside our application.^14^ The
+Prometheus Operator, a key component of the
+
+kube-prometheus-stack, continuously watches for these objects. When it
+discovers our iperf3-exporter-sm, it automatically generates the
+necessary scrape configuration and reloads Prometheus without any manual
+intervention.^4^ This empowers the application team to define
+
+*how their application should be monitored* as part of the
+application\'s own deployment package, a cornerstone of scalable, \"you
+build it, you run it\" observability.
+
+> YAML
+
+apiVersion: v1  
+kind: Service  
+metadata:  
+name: iperf3-exporter-svc  
+labels:  
+app: iperf3-exporter  
+spec:  
+selector:  
+app: iperf3-exporter  
+ports:  
+- name: metrics  
+port: 9876  
+targetPort: metrics  
+protocol: TCP  
+\-\--  
+apiVersion: monitoring.coreos.com/v1  
+kind: ServiceMonitor  
+metadata:  
+name: iperf3-exporter-sm  
+labels:  
+\# Label for Prometheus Operator to discover this ServiceMonitor  
+release: prometheus-operator  
+spec:  
+selector:  
+matchLabels:  
+\# This must match the labels on the Service object above  
+app: iperf3-exporter  
+endpoints:  
+- port: metrics  
+interval: 60s  
+scrapeTimeout: 30s
+
+## **Section 4: Packaging with Helm for Reusability and Distribution**
+
+While static YAML manifests are excellent for defining Kubernetes
+resources, they lack the flexibility needed for easy configuration,
+distribution, and lifecycle management. Helm, the package manager for
+Kubernetes, solves this by bundling applications into
+version-controlled, reusable packages called charts.^17^ This section
+details how to package the entire
+
+iperf3 monitoring service into a professional, flexible, and
+distributable Helm chart.
+
+### **4.1 Helm Chart Structure** {#helm-chart-structure}
+
+A well-organized Helm chart follows a standard directory structure. This
+convention makes charts easier to understand and maintain.^19^
+
+iperf3-monitor/  
+├── Chart.yaml \# Metadata about the chart (name, version, etc.)  
+├── values.yaml \# Default configuration values for the chart  
+├── charts/ \# Directory for sub-chart dependencies (empty for this
+project)  
+├── templates/ \# Directory containing the templated Kubernetes
+manifests  
+│ ├── \_helpers.tpl \# A place for reusable template helpers  
+│ ├── server-daemonset.yaml  
+│ ├── exporter-deployment.yaml  
+│ ├── rbac.yaml  
+│ ├── service.yaml  
+│ └── servicemonitor.yaml  
+└── README.md \# Documentation for the chart
+
+### **4.2 Templating the Kubernetes Manifests** {#templating-the-kubernetes-manifests}
+
+The core of Helm\'s power lies in its templating engine, which uses Go
+templates. We convert the static manifests from Section 3 into dynamic
+templates by replacing hardcoded values with references to variables
+defined in the values.yaml file.
+
+A crucial best practice is to use a \_helpers.tpl file to define common
+functions and partial templates, especially for generating resource
+names and labels. This reduces boilerplate, ensures consistency, and
+makes the chart easier to manage.^19^
+
+**Example: templates/\_helpers.tpl**
+
+> Code snippet
+
+{{/\*  
+Expand the name of the chart.  
+\*/}}  
+{{- define \"iperf3-monitor.name\" -}}  
+{{- default.Chart.Name.Values.nameOverride \| trunc 63 \| trimSuffix
+\"-\" }}  
+{{- end -}}  
+  
+{{/\*  
+Create a default fully qualified app name.  
+We truncate at 63 chars because some Kubernetes name fields are limited
+to this (by the DNS naming spec).  
+\*/}}  
+{{- define \"iperf3-monitor.fullname\" -}}  
+{{- if.Values.fullnameOverride }}  
+{{-.Values.fullnameOverride \| trunc 63 \| trimSuffix \"-\" }}  
+{{- else }}  
+{{- \$name := default.Chart.Name.Values.nameOverride }}  
+{{- if contains \$name.Release.Name }}  
+{{-.Release.Name \| trunc 63 \| trimSuffix \"-\" }}  
+{{- else }}  
+{{- printf \"%s-%s\".Release.Name \$name \| trunc 63 \| trimSuffix \"-\"
+}}  
+{{- end }}  
+{{- end }}  
+{{- end -}}  
+  
+{{/\*  
+Common labels  
+\*/}}  
+{{- define \"iperf3-monitor.labels\" -}}  
+helm.sh/chart: {{ include \"iperf3-monitor.name\". }}  
+{{ include \"iperf3-monitor.selectorLabels\". }}  
+{{- if.Chart.AppVersion }}  
+app.kubernetes.io/version: {{.Chart.AppVersion \| quote }}  
+{{- end }}  
+app.kubernetes.io/managed-by: {{.Release.Service }}  
+{{- end -}}  
+  
+{{/\*  
+Selector labels  
+\*/}}  
+{{- define \"iperf3-monitor.selectorLabels\" -}}  
+app.kubernetes.io/name: {{ include \"iperf3-monitor.name\". }}  
+app.kubernetes.io/instance: {{.Release.Name }}  
+{{- end -}}
+
+**Example: Templated exporter-deployment.yaml**
+
+> YAML
+
+apiVersion: apps/v1  
+kind: Deployment  
+metadata:  
+name: {{ include \"iperf3-monitor.fullname\". }}-exporter  
+labels:  
+{{- include \"iperf3-monitor.labels\". \| nindent 4 }}  
+app.kubernetes.io/component: exporter  
+spec:  
+replicas: {{.Values.exporter.replicaCount }}  
+selector:  
+matchLabels:  
+{{- include \"iperf3-monitor.selectorLabels\". \| nindent 6 }}  
+app.kubernetes.io/component: exporter  
+template:  
+metadata:  
+labels:  
+{{- include \"iperf3-monitor.selectorLabels\". \| nindent 8 }}  
+app.kubernetes.io/component: exporter  
+spec:  
+{{- if.Values.rbac.create }}  
+serviceAccountName: {{ include \"iperf3-monitor.fullname\". }}-sa  
+{{- else }}  
+serviceAccountName: {{.Values.serviceAccount.name }}  
+{{- end }}  
+containers:  
+- name: iperf3-exporter  
+image: \"{{.Values.exporter.image.repository
+}}:{{.Values.exporter.image.tag \| default.Chart.AppVersion }}\"  
+imagePullPolicy: {{.Values.exporter.image.pullPolicy }}  
+ports:  
+- containerPort: 9876  
+name: metrics  
+env:  
+- name: SOURCE_NODE_NAME  
+valueFrom:  
+fieldRef:  
+fieldPath: spec.nodeName  
+- name: IPERF_TEST_INTERVAL  
+value: \"{{.Values.exporter.testInterval }}\"  
+resources:  
+{{- toYaml.Values.exporter.resources \| nindent 10 }}
+
+### **4.3 Designing a Comprehensive values.yaml** {#designing-a-comprehensive-values.yaml}
+
+The values.yaml file is the public API of a Helm chart. A well-designed
+values file is intuitive, clearly documented, and provides users with
+the flexibility to adapt the chart to their specific needs. Best
+practices include using clear, camelCase naming conventions and
+providing comments for every parameter.^21^
+
+A particularly powerful feature of Helm is conditional logic. By
+wrapping entire resource definitions in if blocks based on boolean flags
+in values.yaml (e.g., {{- if.Values.rbac.create }}), the chart becomes
+highly adaptable. A user in a high-security environment can disable the
+automatic creation of ClusterRoles by setting rbac.create: false,
+allowing them to manage permissions manually without causing the Helm
+installation to fail.^20^ Similarly, a user not running the Prometheus
+Operator can set
+
+serviceMonitor.enabled: false. This adaptability transforms the chart
+from a rigid, all-or-nothing package into a flexible building block,
+dramatically increasing its utility across different organizations and
+security postures.
+
+The following table documents the comprehensive set of configurable
+parameters for the iperf3-monitor chart. This serves as the primary
+documentation for any user wishing to install and customize the service.
+
+| Parameter                    | Description                                                          | Type    | Default                                   |
+|------------------------------|----------------------------------------------------------------------|---------|-------------------------------------------|
+| nameOverride                 | Override the name of the chart.                                      | string  | \"\"                                      |
+| fullnameOverride             | Override the fully qualified app name.                               | string  | \"\"                                      |
+| exporter.image.repository    | The container image repository for the exporter.                     | string  | ghcr.io/my-org/iperf3-prometheus-exporter |
+| exporter.image.tag           | The container image tag for the exporter.                            | string  | (Chart.AppVersion)                        |
+| exporter.image.pullPolicy    | The image pull policy for the exporter.                              | string  | IfNotPresent                              |
+| exporter.replicaCount        | Number of exporter pod replicas.                                     | integer | 1                                         |
+| exporter.testInterval        | Interval in seconds between test cycles.                             | integer | 300                                       |
+| exporter.testTimeout         | Timeout in seconds for a single iperf3 test.                         | integer | 10                                        |
+| exporter.testProtocol        | Protocol to use for testing (tcp or udp).                            | string  | tcp                                       |
+| exporter.resources           | CPU/memory resource requests and limits for the exporter.            | object  | {}                                        |
+| server.image.repository      | The container image repository for the iperf3 server.                | string  | networkstatic/iperf3                      |
+| server.image.tag             | The container image tag for the iperf3 server.                       | string  | latest                                    |
+| server.resources             | CPU/memory resource requests and limits for the server pods.         | object  | {}                                        |
+| server.nodeSelector          | Node selector for scheduling server pods.                            | object  | {}                                        |
+| server.tolerations           | Tolerations for scheduling server pods on tainted nodes.             | array   | \`\`                                      |
+| rbac.create                  | If true, create ServiceAccount, ClusterRole, and ClusterRoleBinding. | boolean | true                                      |
+| serviceAccount.name          | The name of the ServiceAccount to use. Used if rbac.create is false. | string  | \"\"                                      |
+| serviceMonitor.enabled       | If true, create a ServiceMonitor for Prometheus Operator.            | boolean | true                                      |
+| serviceMonitor.interval      | Scrape interval for the ServiceMonitor.                              | string  | 60s                                       |
+| serviceMonitor.scrapeTimeout | Scrape timeout for the ServiceMonitor.                               | string  | 30s                                       |
+
+## **Section 5: Visualizing Network Performance with a Custom Grafana Dashboard**
+
+The final piece of the user experience is a purpose-built Grafana
+dashboard that transforms the raw, time-series metrics from Prometheus
+into intuitive, actionable visualizations. A well-designed dashboard
+does more than just display data; it tells a story, guiding an operator
+from a high-level overview of cluster health to a deep-dive analysis of
+a specific problematic network path.^5^
+
+### **5.1 Dashboard Design Principles** {#dashboard-design-principles}
+
+The primary goals for this network performance dashboard are:
+
+1.  **At-a-Glance Overview:** Provide an immediate, cluster-wide view of
+    > network health, allowing operators to quickly spot systemic issues
+    > or anomalies.
+
+2.  **Intuitive Drill-Down:** Enable users to seamlessly transition from
+    > a high-level view to a detailed analysis of performance between
+    > specific nodes.
+
+3.  **Correlation:** Display multiple related metrics (bandwidth,
+    > jitter, packet loss) on the same timeline to help identify causal
+    > relationships.
+
+4.  **Clarity and Simplicity:** Avoid clutter and overly complex panels
+    > that can obscure meaningful data.^4^
+
+### **5.2 Key Visualizations and Panels** {#key-visualizations-and-panels}
+
+The dashboard is constructed from several key panel types, each serving
+a specific analytical purpose.
+
+- **Panel 1: Node-to-Node Bandwidth Heatmap.** This is the centerpiece
+  > of the dashboard\'s overview. It uses Grafana\'s \"Heatmap\"
+  > visualization to create a matrix of network performance.
+
+  - **Y-Axis:** Source Node (source_node label).
+
+  - **X-Axis:** Destination Node (destination_node label).
+
+  - **Cell Color:** The value of the iperf_network_bandwidth_mbps
+    > metric.
+
+  - PromQL Query: avg(iperf_network_bandwidth_mbps) by (source_node,
+    > destination_node)  
+    > This panel provides an instant visual summary of the entire
+    > cluster\'s network fabric. A healthy cluster will show a uniformly
+    > \"hot\" (high bandwidth) grid, while any \"cold\" spots
+    > immediately draw attention to underperforming network paths.
+
+- **Panel 2: Time-Series Performance Graphs.** These panels use the
+  > \"Time series\" visualization to plot performance over time,
+  > allowing for trend analysis and historical investigation.
+
+  - **Bandwidth (Mbps):** Plots
+    > iperf_network_bandwidth_mbps{source_node=\"\$source_node\",
+    > destination_node=\"\$destination_node\"}.
+
+  - **Jitter (ms):** Plots
+    > iperf_network_jitter_ms{source_node=\"\$source_node\",
+    > destination_node=\"\$destination_node\", protocol=\"udp\"}.
+
+  - Packet Loss (%): Plots (iperf_network_lost_packets_total{\...} /
+    > iperf_network_packets_total{\...}) \* 100.  
+    > These graphs are filtered by the dashboard variables, enabling the
+    > drill-down analysis.
+
+- **Panel 3: Stat Panels.** These panels use the \"Stat\" visualization
+  > to display single, key performance indicators (KPIs) for the
+  > selected time range and nodes.
+
+  - **Average Bandwidth:** avg(iperf_network_bandwidth_mbps{\...})
+
+  - **Minimum Bandwidth:** min(iperf_network_bandwidth_mbps{\...})
+
+  - **Maximum Jitter:** max(iperf_network_jitter_ms{\...})
+
+### **5.3 Enabling Interactivity with Grafana Variables** {#enabling-interactivity-with-grafana-variables}
+
+The dashboard\'s interactivity is powered by Grafana\'s template
+variables. These variables are dynamically populated from Prometheus and
+are used to filter the data displayed in the panels.^4^
+
+- **\$source_node**: A dropdown variable populated by the PromQL query
+  > label_values(iperf_network_bandwidth_mbps, source_node).
+
+- **\$destination_node**: A dropdown variable populated by
+  > label_values(iperf_network_bandwidth_mbps{source_node=\"\$source_node\"},
+  > destination_node). This query is cascaded, meaning it only shows
+  > destinations relevant to the selected source.
+
+- **\$protocol**: A custom variable with the options tcp and udp.
+
+This combination of a high-level heatmap with interactive,
+variable-driven drill-down graphs creates a powerful analytical
+workflow. An operator can begin with a bird\'s-eye view of the cluster.
+Upon spotting an anomaly on the heatmap (e.g., a low-bandwidth link
+between Node-5 and Node-8), they can use the \$source_node and
+\$destination_node dropdowns to select that specific path. All the
+time-series panels will instantly update to show the detailed
+performance history for that link, allowing the operator to correlate
+bandwidth drops with jitter spikes or other events. This workflow
+transforms raw data into actionable insight, dramatically reducing the
+Mean Time to Identification (MTTI) for network issues.
+
+### **5.4 The Complete Grafana Dashboard JSON Model** {#the-complete-grafana-dashboard-json-model}
+
+To facilitate easy deployment, the entire dashboard is defined in a
+single JSON model. This model can be imported directly into any Grafana
+instance.
+
+> JSON
+
+{  
+\"\_\_inputs\":,  
+\"\_\_requires\": \[  
+{  
+\"type\": \"grafana\",  
+\"id\": \"grafana\",  
+\"name\": \"Grafana\",  
+\"version\": \"8.0.0\"  
+},  
+{  
+\"type\": \"datasource\",  
+\"id\": \"prometheus\",  
+\"name\": \"Prometheus\",  
+\"version\": \"1.0.0\"  
+}  
+\],  
+\"annotations\": {  
+\"list\": \[  
+{  
+\"builtIn\": 1,  
+\"datasource\": {  
+\"type\": \"grafana\",  
+\"uid\": \"\-- Grafana \--\"  
+},  
+\"enable\": true,  
+\"hide\": true,  
+\"iconColor\": \"rgba(0, 211, 255, 1)\",  
+\"name\": \"Annotations & Alerts\",  
+\"type\": \"dashboard\"  
+}  
+\]  
+},  
+\"editable\": true,  
+\"fiscalYearStartMonth\": 0,  
+\"gnetId\": null,  
+\"graphTooltip\": 0,  
+\"id\": null,  
+\"links\":,  
+\"panels\":)\",  
+\"format\": \"heatmap\",  
+\"legendFormat\": \"{{source_node}} -\> {{destination_node}}\",  
+\"refId\": \"A\"  
+}  
+\],  
+\"cards\": { \"cardPadding\": null, \"cardRound\": null },  
+\"color\": {  
+\"mode\": \"spectrum\",  
+\"scheme\": \"red-yellow-green\",  
+\"exponent\": 0.5,  
+\"reverse\": false  
+},  
+\"dataFormat\": \"tsbuckets\",  
+\"yAxis\": { \"show\": true, \"format\": \"short\" },  
+\"xAxis\": { \"show\": true }  
+},  
+{  
+\"title\": \"Bandwidth Over Time (Source: \$source_node, Dest:
+\$destination_node)\",  
+\"type\": \"timeseries\",  
+\"datasource\": {  
+\"type\": \"prometheus\",  
+\"uid\": \"prometheus\"  
+},  
+\"gridPos\": { \"h\": 8, \"w\": 12, \"x\": 0, \"y\": 9 },  
+\"targets\":,  
+\"fieldConfig\": {  
+\"defaults\": {  
+\"unit\": \"mbps\"  
+}  
+}  
+},  
+{  
+\"title\": \"Jitter Over Time (Source: \$source_node, Dest:
+\$destination_node)\",  
+\"type\": \"timeseries\",  
+\"datasource\": {  
+\"type\": \"prometheus\",  
+\"uid\": \"prometheus\"  
+},  
+\"gridPos\": { \"h\": 8, \"w\": 12, \"x\": 12, \"y\": 9 },  
+\"targets\": \[  
+{  
+\"expr\": \"iperf_network_jitter_ms{source_node=\\\$source_node\\,
+destination_node=\\\$destination_node\\, protocol=\\udp\\}\",  
+\"legendFormat\": \"Jitter\",  
+\"refId\": \"A\"  
+}  
+\],  
+\"fieldConfig\": {  
+\"defaults\": {  
+\"unit\": \"ms\"  
+}  
+}  
+}  
+\],  
+\"refresh\": \"30s\",  
+\"schemaVersion\": 36,  
+\"style\": \"dark\",  
+\"tags\": \[\"iperf3\", \"network\", \"kubernetes\"\],  
+\"templating\": {  
+\"list\": \[  
+{  
+\"current\": {},  
+\"datasource\": {  
+\"type\": \"prometheus\",  
+\"uid\": \"prometheus\"  
+},  
+\"definition\": \"label_values(iperf_network_bandwidth_mbps,
+source_node)\",  
+\"hide\": 0,  
+\"includeAll\": false,  
+\"multi\": false,  
+\"name\": \"source_node\",  
+\"options\":,  
+\"query\": \"label_values(iperf_network_bandwidth_mbps,
+source_node)\",  
+\"refresh\": 1,  
+\"regex\": \"\",  
+\"skipUrlSync\": false,  
+\"sort\": 1,  
+\"type\": \"query\"  
+},  
+{  
+\"current\": {},  
+\"datasource\": {  
+\"type\": \"prometheus\",  
+\"uid\": \"prometheus\"  
+},  
+\"definition\":
+\"label_values(iperf_network_bandwidth_mbps{source_node=\\\$source_node\\},
+destination_node)\",  
+\"hide\": 0,  
+\"includeAll\": false,  
+\"multi\": false,  
+\"name\": \"destination_node\",  
+\"options\":,  
+\"query\":
+\"label_values(iperf_network_bandwidth_mbps{source_node=\\\$source_node\\},
+destination_node)\",  
+\"refresh\": 1,  
+\"regex\": \"\",  
+\"skipUrlSync\": false,  
+\"sort\": 1,  
+\"type\": \"query\"  
+},  
+{  
+\"current\": { \"selected\": true, \"text\": \"tcp\", \"value\": \"tcp\"
+},  
+\"hide\": 0,  
+\"includeAll\": false,  
+\"multi\": false,  
+\"name\": \"protocol\",  
+\"options\": \[  
+{ \"selected\": true, \"text\": \"tcp\", \"value\": \"tcp\" },  
+{ \"selected\": false, \"text\": \"udp\", \"value\": \"udp\" }  
+\],  
+\"query\": \"tcp,udp\",  
+\"skipUrlSync\": false,  
+\"type\": \"custom\"  
+}  
+\]  
+},  
+\"time\": {  
+\"from\": \"now-1h\",  
+\"to\": \"now\"  
+},  
+\"timepicker\": {},  
+\"timezone\": \"browser\",  
+\"title\": \"Kubernetes iperf3 Network Performance\",  
+\"uid\": \"k8s-iperf3-dashboard\",  
+\"version\": 1,  
+\"weekStart\": \"\"  
+}
+
+## **Section 6: GitHub Repository Structure and CI/CD Workflow**
+
+To deliver this monitoring service as a professional, open-source-ready
+project, it is essential to package it within a well-structured GitHub
+repository and implement a robust Continuous Integration and Continuous
+Deployment (CI/CD) pipeline. This automates the build, test, and release
+process, ensuring that every version of the software is consistent,
+trustworthy, and easy for consumers to adopt.
+
+### **6.1 Recommended Repository Structure** {#recommended-repository-structure}
+
+A clean, logical directory structure is fundamental for project
+maintainability and ease of navigation for contributors and users.
+
+.  
+├──.github/  
+│ └── workflows/  
+│ └── release.yml \# GitHub Actions workflow for CI/CD  
+├── charts/  
+│ └── iperf3-monitor/ \# The Helm chart for the service  
+│ ├── Chart.yaml  
+│ ├── values.yaml  
+│ └── templates/  
+│ └──\...  
+└── exporter/  
+├── Dockerfile \# Dockerfile for the exporter  
+├── requirements.txt \# Python dependencies  
+└── exporter.py \# Exporter source code  
+├──.gitignore  
+├── LICENSE  
+└── README.md
+
+This structure cleanly separates the exporter application code
+(/exporter) from its deployment packaging (/charts/iperf3-monitor), and
+its release automation (/.github/workflows).
+
+### **6.2 CI/CD Pipeline with GitHub Actions** {#cicd-pipeline-with-github-actions}
+
+A fully automated CI/CD pipeline is the hallmark of a mature software
+project. It eliminates manual, error-prone release steps and provides
+strong guarantees about the integrity of the published artifacts. By
+triggering the pipeline on the creation of a Git tag (e.g., v1.2.3), we
+use the tag as a single source of truth for versioning both the Docker
+image and the Helm chart. This ensures that chart version 1.2.3 is built
+to use image version 1.2.3, and that both have been validated before
+release. This automated, atomic release process provides trust and
+velocity, elevating the project from a collection of files into a
+reliable, distributable piece of software.
+
+The following GitHub Actions workflow automates the entire release
+process:
+
+> YAML
+
+\#.github/workflows/release.yml  
+name: Release iperf3-monitor  
+  
+on:  
+push:  
+tags:  
+- \'v\*.\*.\*\'  
+  
+env:  
+REGISTRY: ghcr.io  
+IMAGE_NAME: \${{ github.repository }}  
+  
+jobs:  
+lint-and-test:  
+name: Lint and Test  
+runs-on: ubuntu-latest  
+steps:  
+- name: Check out code  
+uses: actions/checkout@v3  
+  
+- name: Set up Helm  
+uses: azure/setup-helm@v3  
+with:  
+version: v3.10.0  
+  
+- name: Helm Lint  
+run: helm lint./charts/iperf3-monitor  
+  
+build-and-publish-image:  
+name: Build and Publish Docker Image  
+runs-on: ubuntu-latest  
+needs: lint-and-test  
+permissions:  
+contents: read  
+packages: write  
+steps:  
+- name: Check out code  
+uses: actions/checkout@v3  
+  
+- name: Log in to GitHub Container Registry  
+uses: docker/login-action@v2  
+with:  
+registry: \${{ env.REGISTRY }}  
+username: \${{ github.actor }}  
+password: \${{ secrets.GITHUB_TOKEN }}  
+  
+- name: Extract metadata (tags, labels) for Docker  
+id: meta  
+uses: docker/metadata-action@v4  
+with:  
+images: \${{ env.REGISTRY }}/\${{ env.IMAGE_NAME }}  
+  
+- name: Build and push Docker image  
+uses: docker/build-push-action@v4  
+with:  
+context:./exporter  
+push: true  
+tags: \${{ steps.meta.outputs.tags }}  
+labels: \${{ steps.meta.outputs.labels }}  
+  
+package-and-publish-chart:  
+name: Package and Publish Helm Chart  
+runs-on: ubuntu-latest  
+needs: build-and-publish-image  
+permissions:  
+contents: write  
+steps:  
+- name: Check out code  
+uses: actions/checkout@v3  
+with:  
+fetch-depth: 0  
+  
+- name: Set up Helm  
+uses: azure/setup-helm@v3  
+with:  
+version: v3.10.0  
+  
+- name: Set Chart Version  
+run: \|  
+VERSION=\$(echo \"\${{ github.ref_name }}\" \| sed \'s/\^v//\')  
+helm-docs \--sort-values-order file  
+yq e -i \'.version =
+strenv(VERSION)\'./charts/iperf3-monitor/Chart.yaml  
+yq e -i \'.appVersion =
+strenv(VERSION)\'./charts/iperf3-monitor/Chart.yaml  
+  
+- name: Publish Helm chart  
+uses: stefanprodan/helm-gh-pages@v1.6.0  
+with:  
+token: \${{ secrets.GITHUB_TOKEN }}  
+charts_dir:./charts  
+charts_url: https://\${{ github.repository_owner }}.github.io/\${{
+github.event.repository.name }}
+
+### **6.3 Documentation and Usability** {#documentation-and-usability}
+
+The final, and arguably most critical, component for project success is
+high-quality documentation. The README.md file at the root of the
+repository is the primary entry point for any user. It should clearly
+explain what the project does, its architecture, and how to deploy and
+use it.
+
+A common failure point in software projects is documentation that falls
+out of sync with the code. For Helm charts, the values.yaml file
+frequently changes, adding new parameters and options. To combat this,
+it is a best practice to automate the documentation of these parameters.
+The helm-docs tool can be integrated directly into the CI/CD pipeline to
+automatically generate the \"Parameters\" section of the README.md by
+parsing the comments directly from the values.yaml file.^20^ This
+ensures that the documentation is always an accurate reflection of the
+chart\'s configurable options, providing a seamless and trustworthy
+experience for users.
+
+## **Conclusion**
+
+The proliferation of distributed microservices on Kubernetes has made
+network performance a critical, yet often opaque, component of overall
+application health. This report has detailed a comprehensive,
+production-grade solution for establishing continuous network validation
+within a Kubernetes cluster. By architecting a system around the robust,
+decoupled pattern of an iperf3-server DaemonSet and a Kubernetes-aware
+iperf3-exporter Deployment, this service provides a resilient and
+automated foundation for network observability.
+
+The implementation leverages industry-standard tools---Python for the
+exporter, Prometheus for metrics storage, and Grafana for
+visualization---to create a powerful and flexible monitoring pipeline.
+The entire service is packaged into a professional Helm chart, following
+best practices for templating, configuration, and adaptability. This
+allows for simple, version-controlled deployment across a wide range of
+environments. The final Grafana dashboard transforms the collected data
+into an intuitive, interactive narrative, enabling engineers to move
+swiftly from high-level anomaly detection to root-cause analysis.
+
+Ultimately, by treating network performance not as a given but as a
+continuously measured metric, organizations can proactively identify and
+resolve infrastructure bottlenecks, enhance application reliability, and
+ensure a consistent, high-quality experience for their users in the
+dynamic world of Kubernetes.
+
+#### Works cited
+
+1.  How to Identify Performance Issues in Kubernetes - LabEx, accessed
+    > June 17, 2025,
+    > [[https://labex.io/questions/how-to-identify-performance-issues-in-kubernetes-11358]{.underline}](https://labex.io/questions/how-to-identify-performance-issues-in-kubernetes-11358)
+
+2.  Performing large-scale network testing on Red Hat OpenShift: A 100
+    > Gbps approach, accessed June 17, 2025,
+    > [[https://www.redhat.com/en/blog/performing-large-scale-network-testing-red-hat-openshift]{.underline}](https://www.redhat.com/en/blog/performing-large-scale-network-testing-red-hat-openshift)
+
+3.  How to Implement Full-Stack Monitoring with Prometheus/Grafana on
+    > FreeBSD Operating System \| Siberoloji, accessed June 17, 2025,
+    > [[https://www.siberoloji.com/how-to-implement-full-stack-monitoring-with-prometheusgrafana-on-freebsd-operating-system/]{.underline}](https://www.siberoloji.com/how-to-implement-full-stack-monitoring-with-prometheusgrafana-on-freebsd-operating-system/)
+
+4.  Kubernetes Metrics and Monitoring with Prometheus and Grafana - DEV
+    > Community, accessed June 17, 2025,
+    > [[https://dev.to/abhay_yt_52a8e72b213be229/kubernetes-metrics-and-monitoring-with-prometheus-and-grafana-4e9n]{.underline}](https://dev.to/abhay_yt_52a8e72b213be229/kubernetes-metrics-and-monitoring-with-prometheus-and-grafana-4e9n)
+
+5.  The Top 30 Grafana Dashboard Examples - Logit.io, accessed June 17,
+    > 2025,
+    > [[https://logit.io/blog/post/top-grafana-dashboards-and-visualisations/]{.underline}](https://logit.io/blog/post/top-grafana-dashboards-and-visualisations/)
+
+6.  Autopilot Metrics \| Grafana Labs, accessed June 17, 2025,
+    > [[https://grafana.com/grafana/dashboards/23123-autopilot-metrics/]{.underline}](https://grafana.com/grafana/dashboards/23123-autopilot-metrics/)
+
+7.  Kubernetes DaemonSet: Practical Guide to Monitoring in Kubernetes -
+    > Cast AI, accessed June 17, 2025,
+    > [[https://cast.ai/blog/kubernetes-daemonset-practical-guide-to-monitoring-in-kubernetes/]{.underline}](https://cast.ai/blog/kubernetes-daemonset-practical-guide-to-monitoring-in-kubernetes/)
+
+8.  Kubernetes DaemonSets vs Deployments: Key Differences and Use
+    > Cases - RubixKube™, accessed June 17, 2025,
+    > [[https://www.rubixkube.io/blog/kubernetes-daemonsets-vs-deployments-key-differences-and-use-cases-4a5i]{.underline}](https://www.rubixkube.io/blog/kubernetes-daemonsets-vs-deployments-key-differences-and-use-cases-4a5i)
+
+9.  Kubernetes DaemonSet: Examples, Use Cases & Best Practices -
+    > groundcover, accessed June 17, 2025,
+    > [[https://www.groundcover.com/blog/kubernetes-daemonset]{.underline}](https://www.groundcover.com/blog/kubernetes-daemonset)
+
+10. Complete Comparison of Kubernetes Daemonset Vs Deployment \|
+    > Zeet.co, accessed June 17, 2025,
+    > [[https://zeet.co/blog/kubernetes-daemonset-vs-deployment]{.underline}](https://zeet.co/blog/kubernetes-daemonset-vs-deployment)
+
+11. Testing Connectivity Between Kubernetes Pods with Iperf3 \|
+    > Support - SUSE, accessed June 17, 2025,
+    > [[https://www.suse.com/support/kb/doc/?id=000020954]{.underline}](https://www.suse.com/support/kb/doc/?id=000020954)
+
+12. Pharb/kubernetes-iperf3: Simple wrapper around iperf3 to \... -
+    > GitHub, accessed June 17, 2025,
+    > [[https://github.com/Pharb/kubernetes-iperf3]{.underline}](https://github.com/Pharb/kubernetes-iperf3)
+
+13. Building a Custom Prometheus Exporter in Python - SysOpsPro,
+    > accessed June 17, 2025,
+    > [[https://sysopspro.com/how-to-build-your-own-prometheus-exporter-in-python/]{.underline}](https://sysopspro.com/how-to-build-your-own-prometheus-exporter-in-python/)
+
+14. How to Create a Prometheus Exporter? - Enix.io, accessed June 17,
+    > 2025,
+    > [[https://enix.io/en/blog/create-prometheus-exporter/]{.underline}](https://enix.io/en/blog/create-prometheus-exporter/)
+
+15. Examples --- iperf3 0.1.10 documentation - iperf3 python wrapper,
+    > accessed June 17, 2025,
+    > [[https://iperf3-python.readthedocs.io/en/latest/examples.html]{.underline}](https://iperf3-python.readthedocs.io/en/latest/examples.html)
+
+16. markormesher/iperf-prometheus-collector - GitHub, accessed June 17,
+    > 2025,
+    > [[https://github.com/markormesher/iperf-prometheus-collector]{.underline}](https://github.com/markormesher/iperf-prometheus-collector)
+
+17. Using Helm with Kubernetes: A Guide to Helm Charts and Their
+    > Implementation, accessed June 17, 2025,
+    > [[https://dev.to/alexmercedcoder/using-helm-with-kubernetes-a-guide-to-helm-charts-and-their-implementation-8dg]{.underline}](https://dev.to/alexmercedcoder/using-helm-with-kubernetes-a-guide-to-helm-charts-and-their-implementation-8dg)
+
+18. Kubernetes Helm Charts: The Basics and a Quick Tutorial - Spot.io,
+    > accessed June 17, 2025,
+    > [[https://spot.io/resources/kubernetes-architecture/kubernetes-helm-charts-the-basics-and-a-quick-tutorial/]{.underline}](https://spot.io/resources/kubernetes-architecture/kubernetes-helm-charts-the-basics-and-a-quick-tutorial/)
+
+19. Understand a Helm chart structure - Bitnami Documentation, accessed
+    > June 17, 2025,
+    > [[https://docs.bitnami.com/kubernetes/faq/administration/understand-helm-chart/]{.underline}](https://docs.bitnami.com/kubernetes/faq/administration/understand-helm-chart/)
+
+20. Helm Chart Essentials & Writing Effective Charts - DEV Community,
+    > accessed June 17, 2025,
+    > [[https://dev.to/hkhelil/helm-chart-essentials-writing-effective-charts-11ca]{.underline}](https://dev.to/hkhelil/helm-chart-essentials-writing-effective-charts-11ca)
+
+21. Values - Helm, accessed June 17, 2025,
+    > [[https://helm.sh/docs/chart_best_practices/values/]{.underline}](https://helm.sh/docs/chart_best_practices/values/)
+
+22. grafana.com, accessed June 17, 2025,
+    > [[https://grafana.com/grafana/dashboards/22901-traffic-monitoring/#:\~:text=host%20traffic%20breakdowns.-,Grafana%20Dashboard,traffic%20statistics%20by%20source%2Fdestination.]{.underline}](https://grafana.com/grafana/dashboards/22901-traffic-monitoring/#:~:text=host%20traffic%20breakdowns.-,Grafana%20Dashboard,traffic%20statistics%20by%20source%2Fdestination.)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..81a9f30
--- /dev/null
+++ b/README.md
@@ -0,0 +1,437 @@
+# Kubernetes-Native Network Performance Monitoring Service
+
+This project provides a comprehensive solution for continuous network validation within a Kubernetes cluster. Leveraging industry-standard tools like `iperf3`, `Prometheus`, and `Grafana`, it offers proactive monitoring of network performance between nodes, helping to identify and troubleshoot latency, bandwidth, and packet loss issues before they impact applications.
+
+## Features
+
+*   **Continuous N-to-N Testing:** Automatically measures network performance between all nodes in the cluster.
+*   **Kubernetes-Native:** Deploys as standard Kubernetes workloads (DaemonSet, Deployment).
+*   **Dynamic Discovery:** Exporter automatically discovers iperf3 server pods using the Kubernetes API.
+*   **Prometheus Integration:** Translates iperf3 results into standard Prometheus metrics for time-series storage.
+*   **Grafana Visualization:** Provides a rich, interactive dashboard with heatmaps and time-series graphs.
+*   **Helm Packaging:** Packaged as a Helm chart for easy deployment and configuration management.
+*   **Automated CI/CD:** Includes a GitHub Actions workflow for building and publishing the exporter image and Helm chart.
+
+## Architecture
+
+The service is based on a decoupled architecture:
+
+1.  **iperf3-server DaemonSet:** Deploys an `iperf3` server pod on every node to act as a test endpoint. Running on the host network to measure raw node performance.
+2.  **iperf3-exporter Deployment:** A centralized service that uses the Kubernetes API to discover server pods, orchestrates `iperf3` client tests against them, parses the JSON output, and exposes performance metrics via an HTTP endpoint.
+3.  **Prometheus & Grafana Stack:** A standard monitoring backend (like `kube-prometheus-stack`) that scrapes the exporter's metrics and visualizes them in a custom dashboard.
+
+This separation of concerns ensures scalability, resilience, and aligns with Kubernetes operational principles.
+
+## Getting Started
+
+### Prerequisites
+
+*   A running Kubernetes cluster.
+*   `kubectl` configured to connect to your cluster.
+*   Helm v3+ installed.
+*   A Prometheus instance configured to scrape services (ideally using the Prometheus Operator and ServiceMonitors).
+*   A Grafana instance accessible and configured with Prometheus as a data source.
+
+### Installation with Helm
+
+1.  Add the Helm chart repository (replace with your actual repo URL once published):
+
+    ```/dev/null/helm-install.sh#L1-1
+    helm repo add iperf3-monitor https://your-github-org.github.io/iperf3-monitor/
+    ```
+
+2.  Update your Helm repositories:
+
+    ```/dev/null/helm-install.sh#L3-3
+    helm repo update
+    ```
+
+3.  Install the chart:
+
+    ```/dev/null/helm-install.sh#L5-8
+    helm install iperf3-monitor iperf3-monitor/iperf3-monitor \
+      --namespace monitoring # Or your preferred namespace \
+      --create-namespace \
+      --values values.yaml # Optional: Use a custom values file
+    ```
+
+    > **Note:** Ensure your Prometheus instance is configured to scrape services in the namespace where you install the chart and that it recognizes `ServiceMonitor` resources with the label `release: prometheus-operator` (if using the standard `kube-prometheus-stack` setup).
+
+### Configuration
+
+The Helm chart is highly configurable via the `values.yaml` file. You can override default settings by creating your own `values.yaml` and passing it during installation (`--values my-values.yaml`).
+
+Refer to the comments in the default `values.yaml` for a detailed explanation of each parameter:
+
+```iperf3-monitor/charts/iperf3-monitor/values.yaml
+# Default values for iperf3-monitor.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# -- Override the name of the chart.
+nameOverride: ""
+
+# -- Override the fully qualified app name.
+fullnameOverride: ""
+
+exporter:
+  # -- Configuration for the exporter container image.
+  image:
+    # -- The container image repository for the exporter.
+    repository: ghcr.io/my-org/iperf3-prometheus-exporter # Replace with your repo URL
+    # -- The container image tag for the exporter. If not set, the chart's appVersion is used.
+    tag: ""
+    # -- The image pull policy for the exporter container.
+    pullPolicy: IfNotPresent
+
+  # -- Number of exporter pod replicas. Typically 1 is sufficient.
+  replicaCount: 1
+
+  # -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
+  testInterval: 300
+
+  # -- Timeout in seconds for a single iperf3 test run.
+  testTimeout: 10
+
+  # -- Protocol to use for testing (tcp or udp).
+  testProtocol: tcp
+
+  # -- CPU and memory resource requests and limits for the exporter pod.
+  # @default -- A small default is provided if commented out.
+  resources: {}
+    # requests:
+    #   cpu: "100m"
+    #   memory: "128Mi"
+    # limits:
+    #   cpu: "500m"
+    #   memory: "256Mi"
+
+server:
+  # -- Configuration for the iperf3 server container image (DaemonSet).
+  image:
+    # -- The container image repository for the iperf3 server.
+    repository: networkstatic/iperf3
+    # -- The container image tag for the iperf3 server.
+    tag: latest
+
+  # -- CPU and memory resource requests and limits for the iperf3 server pods (DaemonSet).
+  # These should be very low as the server is mostly idle.
+  # @default -- A small default is provided if commented out.
+  resources: {}
+    # requests:
+    #   cpu: "50m"
+    #   memory: "64Mi"
+    # limits:
+    #   cpu: "100m"
+    #   memory: "128Mi"
+
+  # -- Node selector for scheduling iperf3 server pods.
+  # Use this to restrict the DaemonSet to a subset of nodes.
+  # @default -- {} (schedule on all nodes)
+  nodeSelector: {}
+
+  # -- Tolerations for scheduling iperf3 server pods on tainted nodes (e.g., control-plane nodes).
+  # This is often necessary to include master nodes in the test mesh.
+  # @default -- Tolerates control-plane and master taints.
+  tolerations:
+    - key: "node-role.kubernetes.io/control-plane"
+      operator: "Exists"
+      effect: "NoSchedule"
+    - key: "node-role.kubernetes.io/master"
+      operator: "Exists"
+      effect: "NoSchedule"
+
+rbac:
+  # -- If true, create ServiceAccount, ClusterRole, and ClusterRoleBinding for the exporter.
+  # Set to false if you manage RBAC externally.
+  create: true
+
+serviceAccount:
+  # -- The name of the ServiceAccount to use for the exporter pod.
+  # Only used if rbac.create is false. If not set, it defaults to the chart's fullname.
+  name: ""
+
+serviceMonitor:
+  # -- If true, create a ServiceMonitor resource for integration with Prometheus Operator.
+  # Requires a running Prometheus Operator in the cluster.
+  enabled: true
+
+  # -- Scrape interval for the ServiceMonitor. How often Prometheus scrapes the exporter metrics.
+  interval: 60s
+
+  # -- Scrape timeout for the ServiceMonitor. How long Prometheus waits for metrics response.
+  scrapeTimeout: 30s
+
+# -- Configuration for the exporter Service.
+service:
+  # -- Service type. ClusterIP is typically sufficient.
+  type: ClusterIP
+  # -- Port on which the exporter service is exposed.
+  port: 9876
+  # -- Target port on the exporter pod.
+  targetPort: 9876
+
+# -- Optional configuration for a network policy to allow traffic to the iperf3 server DaemonSet.
+# This is often necessary if you are using a network policy controller.
+networkPolicy:
+  # -- If true, create a NetworkPolicy resource.
+  enabled: false
+  # -- Specify source selectors if needed (e.g., pods in a specific namespace).
+  from: []
+  # -- Specify namespace selectors if needed.
+  namespaceSelector: {}
+  # -- Specify pod selectors if needed.
+  podSelector: {}
+```
+
+## Grafana Dashboard
+
+A custom Grafana dashboard is provided to visualize the collected `iperf3` metrics.
+
+1.  Log in to your Grafana instance.
+2.  Navigate to `Dashboards` -> `Import`.
+3.  Paste the full JSON model provided below into the text area and click `Load`.
+4.  Select your Prometheus data source and click `Import`.
+
+```/dev/null/grafana-dashboard.json
+{
+"__inputs": [],
+"__requires": [
+{
+"type": "grafana",
+"id": "grafana",
+"name": "Grafana",
+"version": "8.0.0"
+},
+{
+"type": "datasource",
+"id": "prometheus",
+"name": "Prometheus",
+"version": "1.0.0"
+}
+],
+"annotations": {
+"list": [
+{
+"builtIn": 1,
+"datasource": {
+"type": "grafana",
+"uid": "-- Grafana --"
+},
+"enable": true,
+"hide": true,
+"iconColor": "rgba(0, 211, 255, 1)",
+"name": "Annotations & Alerts",
+"type": "dashboard"
+}
+]
+},
+"editable": true,
+"fiscalYearStartMonth": 0,
+"gnetId": null,
+"graphTooltip": 0,
+"id": null,
+"links": [],
+"panels": [
+{
+"datasource": {
+"type": "prometheus",
+"uid": "prometheus"
+},
+"gridPos": {
+"h": 9,
+"w": 24,
+"x": 0,
+"y": 0
+},
+"id": 2,
+"targets": [
+{
+"expr": "avg(iperf_network_bandwidth_mbps) by (source_node, destination_node)",
+"format": "heatmap",
+"legendFormat": "{{source_node}} -> {{destination_node}}",
+"refId": "A"
+}
+],
+"cards": { "cardPadding": null, "cardRound": null },
+"color": {
+"mode": "spectrum",
+"scheme": "red-yellow-green",
+"exponent": 0.5,
+"reverse": false
+},
+"dataFormat": "tsbuckets",
+"yAxis": { "show": true, "format": "short" },
+"xAxis": { "show": true }
+},
+{
+"title": "Bandwidth Over Time (Source: $source_node, Dest: $destination_node)",
+"type": "timeseries",
+"datasource": {
+"type": "prometheus",
+"uid": "prometheus"
+},
+"gridPos": {
+"h": 8,
+"w": 12,
+"x": 0,
+"y": 9
+},
+"targets": [
+{
+"expr": "iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=~\"^$protocol$\"}",
+"legendFormat": "Bandwidth",
+"refId": "A"
+}
+],
+"fieldConfig": {
+"defaults": {
+"unit": "mbps"
+}
+}
+},
+{
+"title": "Jitter Over Time (Source: $source_node, Dest: $destination_node)",
+"type": "timeseries",
+"datasource": {
+"type": "prometheus",
+"uid": "prometheus"
+},
+"gridPos": {
+"h": 8,
+"w": 12,
+"x": 12,
+"y": 9
+},
+"targets": [
+{
+"expr": "iperf_network_jitter_ms{source_node=~\"^$source_node$\", destination_node=~\"^$destination_node$\", protocol=\"udp\"}",
+"legendFormat": "Jitter",
+"refId": "A"
+}
+],
+"fieldConfig": {
+"defaults": {
+"unit": "ms"
+}
+}
+}
+],
+"refresh": "30s",
+"schemaVersion": 36,
+"style": "dark",
+"tags": ["iperf3", "network", "kubernetes"],
+"templating": {
+"list": [
+{
+"current": {},
+"datasource": {
+"type": "prometheus",
+"uid": "prometheus"
+},
+"definition": "label_values(iperf_network_bandwidth_mbps, source_node)",
+"hide": 0,
+"includeAll": false,
+"multi": false,
+"name": "source_node",
+"options": [],
+"query": "label_values(iperf_network_bandwidth_mbps, source_node)",
+"refresh": 1,
+"regex": "",
+"skipUrlSync": false,
+"sort": 1,
+"type": "query"
+},
+{
+"current": {},
+"datasource": {
+"type": "prometheus",
+"uid": "prometheus"
+},
+"definition": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
+"hide": 0,
+"includeAll": false,
+"multi": false,
+"name": "destination_node",
+"options": [],
+"query": "label_values(iperf_network_bandwidth_mbps{source_node=~\"^$source_node$\"}, destination_node)",
+"refresh": 1,
+"regex": "",
+"skipUrlSync": false,
+"sort": 1,
+"type": "query"
+},
+{
+"current": { "selected": true, "text": "tcp", "value": "tcp" },
+"hide": 0,
+"includeAll": false,
+"multi": false,
+"name": "protocol",
+"options": [
+{ "selected": true, "text": "tcp", "value": "tcp" },
+{ "selected": false, "text": "udp", "value": "udp" }
+],
+"query": "tcp,udp",
+"skipUrlSync": false,
+"type": "custom"
+}
+]
+},
+"time": {
+"from": "now-1h",
+"to": "now"
+},
+"timepicker": {},
+"timezone": "browser",
+"title": "Kubernetes iperf3 Network Performance",
+"uid": "k8s-iperf3-dashboard",
+"version": 1,
+"weekStart": ""
+}
+```
+
+## Repository Structure
+
+The project follows a standard structure:
+
+```/dev/null/repo-structure.txt
+.
+├── .github/
+│   └── workflows/
+│       └── release.yml    # GitHub Actions workflow for CI/CD
+├── charts/
+│   └── iperf3-monitor/    # The Helm chart for the service
+│       ├── Chart.yaml
+│       ├── values.yaml
+│       └── templates/
+│           ├── _helpers.tpl
+│           ├── server-daemonset.yaml
+│           ├── exporter-deployment.yaml
+│           ├── rbac.yaml
+│           ├── service.yaml
+│           └── servicemonitor.yaml
+└── exporter/
+    ├── Dockerfile         # Dockerfile for the exporter
+    ├── requirements.txt   # Python dependencies
+    └── exporter.py        # Exporter source code
+├── .gitignore             # Specifies intentionally untracked files
+├── LICENSE                # Project license
+└── README.md              # This file
+```
+
+## Development and CI/CD
+
+The project includes a GitHub Actions workflow (`.github/workflows/release.yml`) triggered on Git tags (`v*.*.*`) to automate:
+
+1.  Linting the Helm chart.
+2.  Building and publishing the Docker image for the exporter to GitHub Container Registry (`ghcr.io`).
+3.  Updating the Helm chart version based on the Git tag.
+4.  Packaging and publishing the Helm chart to GitHub Pages.
+
+## License
+
+This project is licensed under the terms defined in the `LICENSE` file.
+
+```iperf3-monitor/LICENSE
+This project is currently unlicensed. Please see the project's documentation or repository for licensing information when it becomes available.
+```
diff --git a/bootstrap.md b/bootstrap.md
new file mode 100644
index 0000000..0b4c1c4
--- /dev/null
+++ b/bootstrap.md
@@ -0,0 +1,1418 @@
+# **Architecting a Kubernetes-Native Network Performance Monitoring Service with iperf3, Prometheus, and Helm**
+
+## **Section 1: Architectural Blueprint for Continuous Network Validation**
+
+### **1.1 Introduction to Proactive Network Monitoring in Kubernetes** {#introduction-to-proactive-network-monitoring-in-kubernetes}
+
+In modern cloud-native infrastructures, Kubernetes has emerged as the de
+facto standard for container orchestration, simplifying the deployment,
+scaling, and management of complex applications.^1^ However, the very
+dynamism and abstraction that make Kubernetes powerful also introduce
+significant challenges in diagnosing network performance issues. The
+ephemeral nature of pods, the complexity of overlay networks provided by
+Container Network Interfaces (CNIs), and the multi-layered traffic
+routing through Services and Ingress controllers can obscure the root
+causes of latency, packet loss, and throughput degradation.
+
+Traditional, reactive troubleshooting---investigating network problems
+only after an application has failed---is insufficient in these
+environments. Performance bottlenecks can be subtle, intermittent, and
+difficult to reproduce, often manifesting as degraded user experience
+long before they trigger hard failures.^1^ To maintain the reliability
+and performance of critical workloads, engineering teams must shift from
+a reactive to a proactive stance. This requires a system that performs
+continuous, automated validation of the underlying network fabric,
+treating network health not as an assumption but as a measurable,
+time-series metric.
+
+This document outlines the architecture and implementation of a
+comprehensive, Kubernetes-native network performance monitoring service.
+The solution leverages a suite of industry-standard, open-source tools
+to provide continuous, actionable insights into cluster network health.
+The core components are:
+
+- **iperf3:** A widely adopted tool for active network performance
+  > measurement, used to generate traffic and measure maximum achievable
+  > bandwidth, jitter, and packet loss between two points.^2^
+
+- **Prometheus:** A powerful, open-source monitoring and alerting system
+  > that has become the standard for collecting and storing time-series
+  > metrics in the Kubernetes ecosystem.^3^
+
+- **Grafana:** A leading visualization tool for creating rich,
+  > interactive dashboards from various data sources, including
+  > Prometheus, enabling intuitive analysis of complex datasets.^4^
+
+By combining these components into a cohesive, automated service, we can
+transform abstract network performance into a concrete, queryable, and
+visualizable stream of data, enabling teams to detect and address
+infrastructure-level issues before they impact end-users.^6^
+
+### **1.2 The Core Architectural Pattern: Decoupled Test Endpoints and a Central Orchestrator** {#the-core-architectural-pattern-decoupled-test-endpoints-and-a-central-orchestrator}
+
+The foundation of this monitoring service is a robust, decoupled
+architectural pattern designed for scalability and resilience within a
+dynamic Kubernetes environment. The design separates the passive test
+endpoints from the active test orchestrator, a critical distinction that
+ensures the system is both efficient and aligned with Kubernetes
+operational principles.
+
+The data flow and component interaction can be visualized as follows:
+
+1.  A **DaemonSet** deploys an iperf3 server pod onto every node in the
+    > cluster, creating a mesh of passive test targets.
+
+2.  A central **Deployment**, the iperf3-exporter, uses the Kubernetes
+    > API to discover the IP addresses of all iperf3 server pods.
+
+3.  The iperf3-exporter periodically orchestrates tests, running an
+    > iperf3 client to connect to each server pod and measure network
+    > performance.
+
+4.  The exporter parses the JSON output from iperf3, transforms the
+    > results into Prometheus metrics, and exposes them on a /metrics
+    > HTTP endpoint.
+
+5.  A **Prometheus** server, configured via a **ServiceMonitor**,
+    > scrapes the /metrics endpoint of the exporter, ingesting the
+    > performance data into its time-series database.
+
+6.  A **Grafana** instance, using Prometheus as a data source,
+    > visualizes the metrics in a purpose-built dashboard, providing
+    > heatmaps and time-series graphs of node-to-node bandwidth, jitter,
+    > and packet loss.
+
+This architecture is composed of three primary logical components:
+
+- **Component 1: The iperf3-server DaemonSet.** To accurately measure
+  > network performance between any two nodes (N-to-N), an iperf3 server
+  > process must be running and accessible on every node. The DaemonSet
+  > is the canonical Kubernetes controller for this exact use case. It
+  > guarantees that a copy of a specific pod runs on all, or a selected
+  > subset of, nodes within the cluster.^7^ When a new node joins the
+  > cluster, the
+  > DaemonSet controller automatically deploys an iperf3-server pod to
+  > it; conversely, when a node is removed, the pod is garbage
+  > collected. This ensures the mesh of test endpoints is always in sync
+  > with the state of the cluster, requiring zero manual
+  > intervention.^9^ This pattern of using a
+  > DaemonSet to deploy iperf3 across a cluster is a well-established
+  > practice for network validation.^11^
+
+- **Component 2: The iperf3-exporter Deployment.** A separate,
+  > centralized component is required to act as the test orchestrator.
+  > This component is responsible for initiating the iperf3 client
+  > connections, executing the tests, parsing the results, and exposing
+  > them as Prometheus metrics. Since this is a stateless service whose
+  > primary function is to perform a periodic task, a Deployment is the
+  > ideal controller.^8^ A
+  > Deployment ensures a specified number of replicas are running,
+  > provides mechanisms for rolling updates, and allows for independent
+  > resource management and lifecycle control, decoupled from the
+  > iperf3-server pods it tests against.^10^
+
+- **Component 3: The Prometheus & Grafana Stack.** The monitoring
+  > backend is provided by the kube-prometheus-stack, a comprehensive
+  > Helm chart that deploys Prometheus, Grafana, Alertmanager, and the
+  > necessary exporters for cluster monitoring.^4^ Our custom monitoring
+  > service is designed to integrate seamlessly with this stack,
+  > leveraging its Prometheus Operator for automatic scrape
+  > configuration and its Grafana instance for visualization.
+
+### **1.3 Architectural Justification and Design Rationale** {#architectural-justification-and-design-rationale}
+
+The primary strength of this architecture lies in its deliberate
+separation of concerns, a design choice that yields significant benefits
+in resilience, scalability, and operational efficiency. The DaemonSet is
+responsible for the *presence* of test endpoints, while the Deployment
+handles the *orchestration* of the tests. This decoupling is not
+arbitrary; it is a direct consequence of applying Kubernetes-native
+principles to the problem.
+
+The logical progression is as follows: The requirement to continuously
+measure N-to-N node bandwidth necessitates that iperf3 server processes
+are available on all N nodes to act as targets. The most reliable,
+self-healing, and automated method to achieve this \"one-pod-per-node\"
+pattern in Kubernetes is to use a DaemonSet.^7^ This makes the server
+deployment automatically scale with the cluster itself. Next, a process
+is needed to trigger the tests against these servers. This
+\"orchestrator\" is a logically distinct, active service. It needs to be
+reliable and potentially scalable, but it does not need to run on every
+single node. The standard Kubernetes object for managing such stateless
+services is a
+
+Deployment.^8^
+
+This separation allows for independent and appropriate resource
+allocation. The iperf3-server pods are extremely lightweight, consuming
+minimal resources while idle. The iperf3-exporter, however, may be more
+CPU-intensive during the brief periods it is actively running tests. By
+placing them in different workload objects (DaemonSet and Deployment),
+we can configure their resource requests and limits independently. This
+prevents the monitoring workload from interfering with or being starved
+by application workloads, a crucial consideration for any
+production-grade system. This design is fundamentally more robust and
+scalable than simpler, monolithic approaches, such as a single script
+that attempts to manage both server and client lifecycles.^12^
+
+## **Section 2: Implementing the iperf3-prometheus-exporter**
+
+The heart of this monitoring solution is the iperf3-prometheus-exporter,
+a custom application responsible for orchestrating the network tests and
+translating their results into a format that Prometheus can ingest. This
+section provides a detailed breakdown of its implementation, from
+technology selection to the final container image.
+
+### **2.1 Technology Selection: Python for Agility and Ecosystem** {#technology-selection-python-for-agility-and-ecosystem}
+
+Python was selected as the implementation language for the exporter due
+to its powerful ecosystem and rapid development capabilities. The
+availability of mature, well-maintained libraries for interacting with
+both Prometheus and Kubernetes significantly accelerates the development
+of a robust, cloud-native application.
+
+The key libraries leveraged are:
+
+- **prometheus-client:** The official Python client library for
+  > instrumenting applications with Prometheus metrics. It provides a
+  > simple API for defining metrics (Gauges, Counters, etc.) and
+  > exposing them via an HTTP server, handling much of the boilerplate
+  > required for creating a valid exporter.^13^
+
+- **iperf3-python:** A clean, high-level Python wrapper around the
+  > iperf3 C library. It allows for programmatic control of iperf3
+  > clients and servers, and it can directly parse the JSON output of a
+  > test into a convenient Python object, eliminating the need for
+  > manual process management and output parsing.^15^
+
+- **kubernetes:** The official Python client library for the Kubernetes
+  > API. This library is essential for the exporter to become
+  > \"Kubernetes-aware,\" enabling it to dynamically discover the
+  > iperf3-server pods it needs to test against by querying the API
+  > server directly.
+
+### **2.2 Core Exporter Logic (Annotated Python Code)** {#core-exporter-logic-annotated-python-code}
+
+The exporter\'s logic can be broken down into five distinct steps, which
+together form a continuous loop of discovery, testing, and reporting.
+
+#### **Step 1: Initialization and Metric Definition**
+
+The application begins by importing the necessary libraries and defining
+the Prometheus metrics that will be exposed. We use a Gauge metric, as
+bandwidth is a value that can go up or down. Labels are crucial for
+providing context; they allow us to slice and dice the data in
+Prometheus and Grafana.
+
+> Python
+
+import os
+import time
+import logging
+from kubernetes import client, config
+from prometheus_client import start_http_server, Gauge
+import iperf3
+
+\# \-\-- Configuration \-\--
+\# Configure logging
+logging.basicConfig(level=logging.INFO, format=\'%(asctime)s -
+%(levelname)s - %(message)s\')
+
+\# \-\-- Prometheus Metrics Definition \-\--
+IPERF_BANDWIDTH_MBPS = Gauge(
+\'iperf_network_bandwidth_mbps\',
+\'Network bandwidth measured by iperf3 in Megabits per second\',
+\[\'source_node\', \'destination_node\', \'protocol\'\]
+)
+IPERF_JITTER_MS = Gauge(
+\'iperf_network_jitter_ms\',
+\'Network jitter measured by iperf3 in milliseconds\',
+\[\'source_node\', \'destination_node\', \'protocol\'\]
+)
+IPERF_PACKETS_TOTAL = Gauge(
+\'iperf_network_packets_total\',
+\'Total packets transmitted or received during the iperf3 test\',
+\[\'source_node\', \'destination_node\', \'protocol\'\]
+)
+IPERF_LOST_PACKETS = Gauge(
+\'iperf_network_lost_packets_total\',
+\'Total lost packets during the iperf3 UDP test\',
+\[\'source_node\', \'destination_node\', \'protocol\'\]
+)
+IPERF_TEST_SUCCESS = Gauge(
+\'iperf_test_success\',
+\'Indicates if the iperf3 test was successful (1) or failed (0)\',
+\[\'source_node\', \'destination_node\', \'protocol\'\]
+)
+
+#### **Step 2: Kubernetes-Aware Target Discovery**
+
+A static list of test targets is an anti-pattern in a dynamic
+environment like Kubernetes.^16^ The exporter must dynamically discover
+its targets. This is achieved by using the Kubernetes Python client to
+query the API server for all pods that match the label selector of our
+
+iperf3-server DaemonSet (e.g., app=iperf3-server). The function returns
+a list of dictionaries, each containing the pod\'s IP address and the
+name of the node it is running on.
+
+This dynamic discovery is what transforms the exporter from a simple
+script into a resilient, automated service. It adapts to cluster scaling
+events without any manual intervention. The logical path is clear:
+Kubernetes clusters are dynamic, so a hardcoded list of IPs would become
+stale instantly. The API server is the single source of truth for the
+cluster\'s state. Therefore, the exporter must query this API, which in
+turn necessitates including the Kubernetes client library and
+configuring the appropriate Role-Based Access Control (RBAC) permissions
+for its ServiceAccount.
+
+> Python
+
+def discover_iperf_servers():
+\"\"\"
+Discover iperf3 server pods in the cluster using the Kubernetes API.
+\"\"\"
+try:
+\# Load in-cluster configuration
+config.load_incluster_config()
+v1 = client.CoreV1Api()
+
+namespace = os.getenv(\'IPERF_SERVER_NAMESPACE\', \'default\')
+label_selector = os.getenv(\'IPERF_SERVER_LABEL_SELECTOR\',
+\'app=iperf3-server\')
+
+logging.info(f\"Discovering iperf3 servers with label
+\'{label_selector}\' in namespace \'{namespace}\'\")
+
+ret = v1.list_pod_for_all_namespaces(label_selector=label_selector,
+watch=False)
+
+servers =
+for i in ret.items:
+\# Ensure pod has an IP and is running
+if i.status.pod_ip and i.status.phase == \'Running\':
+servers.append({
+\'ip\': i.status.pod_ip,
+\'node_name\': i.spec.node_name
+})
+logging.info(f\"Discovered {len(servers)} iperf3 server pods.\")
+return servers
+except Exception as e:
+logging.error(f\"Error discovering iperf servers: {e}\")
+return
+
+#### **Step 3: The Test Orchestration Loop**
+
+The main function of the application contains an infinite while True
+loop that orchestrates the entire process. It periodically discovers the
+servers, creates a list of test pairs (node-to-node), and then executes
+an iperf3 test for each pair.
+
+> Python
+
+def run_iperf_test(server_ip, server_port, protocol, source_node,
+dest_node):
+\"\"\"
+Runs a single iperf3 test and updates Prometheus metrics.
+\"\"\"
+logging.info(f\"Running iperf3 test from {source_node} to {dest_node}
+({server_ip}:{server_port}) using {protocol.upper()}\")
+
+client = iperf3.Client()
+client.server_hostname = server_ip
+client.port = server_port
+client.protocol = protocol
+client.duration = int(os.getenv(\'IPERF_TEST_DURATION\', 5))
+client.json_output = True \# Critical for parsing
+
+result = client.run()
+
+\# Parse results and update metrics
+parse_and_publish_metrics(result, source_node, dest_node, protocol)
+
+def main_loop():
+\"\"\"
+Main orchestration loop.
+\"\"\"
+test_interval = int(os.getenv(\'IPERF_TEST_INTERVAL\', 300))
+server_port = int(os.getenv(\'IPERF_SERVER_PORT\', 5201))
+protocol = os.getenv(\'IPERF_TEST_PROTOCOL\', \'tcp\').lower()
+source_node_name = os.getenv(\'SOURCE_NODE_NAME\') \# Injected via
+Downward API
+
+if not source_node_name:
+logging.error(\"SOURCE_NODE_NAME environment variable not set.
+Exiting.\")
+return
+
+while True:
+servers = discover_iperf_servers()
+
+for server in servers:
+\# Avoid testing a node against itself
+if server\[\'node_name\'\] == source_node_name:
+continue
+
+run_iperf_test(server\[\'ip\'\], server_port, protocol,
+source_node_name, server\[\'node_name\'\])
+
+logging.info(f\"Completed test cycle. Sleeping for {test_interval}
+seconds.\")
+time.sleep(test_interval)
+
+#### **Step 4: Parsing and Publishing Metrics**
+
+After each test run, a dedicated function parses the JSON result object
+provided by the iperf3-python library.^15^ It extracts the key
+performance indicators and uses them to set the value of the
+corresponding Prometheus
+
+Gauge, applying the correct labels for source and destination nodes.
+Robust error handling ensures that failed tests are also recorded as a
+metric, which is vital for alerting.
+
+> Python
+
+def parse_and_publish_metrics(result, source_node, dest_node,
+protocol):
+\"\"\"
+Parses the iperf3 result and updates Prometheus gauges.
+\"\"\"
+labels = {\'source_node\': source_node, \'destination_node\': dest_node,
+\'protocol\': protocol}
+
+if result.error:
+logging.error(f\"Test from {source_node} to {dest_node} failed:
+{result.error}\")
+IPERF_TEST_SUCCESS.labels(\*\*labels).set(0)
+\# Clear previous successful metrics for this path
+IPERF_BANDWIDTH_MBPS.labels(\*\*labels).set(0)
+IPERF_JITTER_MS.labels(\*\*labels).set(0)
+return
+
+IPERF_TEST_SUCCESS.labels(\*\*labels).set(1)
+
+\# The summary data is in result.sent_Mbps or result.received_Mbps
+depending on direction
+\# For simplicity, we check for available attributes.
+if hasattr(result, \'sent_Mbps\'):
+bandwidth_mbps = result.sent_Mbps
+elif hasattr(result, \'received_Mbps\'):
+bandwidth_mbps = result.received_Mbps
+else:
+\# Fallback for different iperf3 versions/outputs
+bandwidth_mbps = result.Mbps if hasattr(result, \'Mbps\') else 0
+
+IPERF_BANDWIDTH_MBPS.labels(\*\*labels).set(bandwidth_mbps)
+
+if protocol == \'udp\':
+IPERF_JITTER_MS.labels(\*\*labels).set(result.jitter_ms if
+hasattr(result, \'jitter_ms\') else 0)
+IPERF_PACKETS_TOTAL.labels(\*\*labels).set(result.packets if
+hasattr(result, \'packets\') else 0)
+IPERF_LOST_PACKETS.labels(\*\*labels).set(result.lost_packets if
+hasattr(result, \'lost_packets\') else 0)
+
+#### **Step 5: Exposing the /metrics Endpoint**
+
+Finally, the main execution block starts a simple HTTP server using the
+prometheus-client library. This server exposes the collected metrics on
+the standard /metrics path, ready to be scraped by Prometheus.^13^
+
+> Python
+
+if \_\_name\_\_ == \'\_\_main\_\_\':
+\# Start the Prometheus metrics server
+listen_port = int(os.getenv(\'LISTEN_PORT\', 9876))
+start_http_server(listen_port)
+logging.info(f\"Prometheus exporter listening on port {listen_port}\")
+
+\# Start the main orchestration loop
+main_loop()
+
+### **2.3 Containerizing the Exporter (Dockerfile)** {#containerizing-the-exporter-dockerfile}
+
+To deploy the exporter in Kubernetes, it must be packaged into a
+container image. A multi-stage Dockerfile is used to create a minimal
+and more secure final image by separating the build environment from the
+runtime environment. This is a standard best practice for producing
+production-ready containers.^14^
+
+> Dockerfile
+
+\# Stage 1: Build stage with dependencies
+FROM python:3.9-slim as builder
+
+WORKDIR /app
+
+\# Install iperf3 and build dependencies
+RUN apt-get update && \\
+apt-get install -y \--no-install-recommends gcc iperf3 libiperf-dev &&
+\\
+rm -rf /var/lib/apt/lists/\*
+
+\# Install Python dependencies
+COPY requirements.txt.
+RUN pip install \--no-cache-dir -r requirements.txt
+
+\# Stage 2: Final runtime stage
+FROM python:3.9-slim
+
+WORKDIR /app
+
+\# Copy iperf3 binary and library from the builder stage
+COPY \--from=builder /usr/bin/iperf3 /usr/bin/iperf3
+COPY \--from=builder /usr/lib/x86_64-linux-gnu/libiperf.so.0
+/usr/lib/x86_64-linux-gnu/libiperf.so.0
+
+\# Copy installed Python packages from the builder stage
+COPY \--from=builder /usr/local/lib/python3.9/site-packages
+/usr/local/lib/python3.9/site-packages
+
+\# Copy the exporter application code
+COPY exporter.py.
+
+\# Expose the metrics port
+EXPOSE 9876
+
+\# Set the entrypoint
+CMD \[\"python\", \"exporter.py\"\]
+
+The corresponding requirements.txt would contain:
+
+prometheus-client
+iperf3
+kubernetes
+
+## **Section 3: Kubernetes Manifests and Deployment Strategy**
+
+With the architectural blueprint defined and the exporter application
+containerized, the next step is to translate this design into
+declarative Kubernetes manifests. These YAML files define the necessary
+Kubernetes objects to deploy, configure, and manage the monitoring
+service. Using static manifests here provides a clear foundation before
+they are parameterized into a Helm chart in the next section.
+
+### **3.1 The iperf3-server DaemonSet** {#the-iperf3-server-daemonset}
+
+The iperf3-server component is deployed as a DaemonSet to ensure an
+instance of the server pod runs on every eligible node in the
+cluster.^7^ This creates the ubiquitous grid of test endpoints required
+for comprehensive N-to-N testing.
+
+Key fields in this manifest include:
+
+- **spec.selector**: Connects the DaemonSet to the pods it manages via
+  > labels.
+
+- **spec.template.metadata.labels**: The label app: iperf3-server is
+  > applied to the pods, which is crucial for discovery by both the
+  > iperf3-exporter and Kubernetes Services.
+
+- **spec.template.spec.containers**: Defines the iperf3 container, using
+  > a public image and running the iperf3 -s command to start it in
+  > server mode.
+
+- **spec.template.spec.tolerations**: This is often necessary to allow
+  > the DaemonSet to schedule pods on control-plane (master) nodes,
+  > which may have taints preventing normal workloads from running
+  > there. This ensures the entire cluster, including masters, is part
+  > of the test mesh.
+
+- **spec.template.spec.hostNetwork: true**: This is a critical setting.
+  > By running the server pods on the host\'s network namespace, we
+  > bypass the Kubernetes network overlay (CNI) for the server side.
+  > This allows the test to measure the raw performance of the
+  > underlying node network interface, which is often the primary goal
+  > of infrastructure-level testing.
+
+> YAML
+
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+name: iperf3-server
+labels:
+app: iperf3-server
+spec:
+selector:
+matchLabels:
+app: iperf3-server
+template:
+metadata:
+labels:
+app: iperf3-server
+spec:
+\# Run on the host network to measure raw node-to-node performance
+hostNetwork: true
+\# Tolerations to allow scheduling on control-plane nodes
+tolerations:
+- key: \"node-role.kubernetes.io/control-plane\"
+operator: \"Exists\"
+effect: \"NoSchedule\"
+- key: \"node-role.kubernetes.io/master\"
+operator: \"Exists\"
+effect: \"NoSchedule\"
+containers:
+- name: iperf3-server
+image: networkstatic/iperf3:latest
+args: \[\"-s\"\] \# Start in server mode
+ports:
+- containerPort: 5201
+name: iperf3
+protocol: TCP
+- containerPort: 5201
+name: iperf3-udp
+protocol: UDP
+resources:
+requests:
+cpu: \"50m\"
+memory: \"64Mi\"
+limits:
+cpu: \"100m\"
+memory: \"128Mi\"
+
+### **3.2 The iperf3-exporter Deployment** {#the-iperf3-exporter-deployment}
+
+The iperf3-exporter is deployed as a Deployment, as it is a stateless
+application that orchestrates the tests.^14^ Only one replica is
+typically needed, as it can sequentially test all nodes.
+
+Key fields in this manifest are:
+
+- **spec.replicas: 1**: A single instance is sufficient for most
+  > clusters.
+
+- **spec.template.spec.serviceAccountName**: This assigns the custom
+  > ServiceAccount (defined next) to the pod, granting it the necessary
+  > permissions to talk to the Kubernetes API.
+
+- **spec.template.spec.containers.env**: The SOURCE_NODE_NAME
+  > environment variable is populated using the Downward API. This is
+  > how the exporter pod knows which node *it* is running on, allowing
+  > it to skip testing against itself.
+
+- **spec.template.spec.containers.image**: This points to the custom
+  > exporter image built in the previous section.
+
+> YAML
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+name: iperf3-exporter
+labels:
+app: iperf3-exporter
+spec:
+replicas: 1
+selector:
+matchLabels:
+app: iperf3-exporter
+template:
+metadata:
+labels:
+app: iperf3-exporter
+spec:
+serviceAccountName: iperf3-exporter-sa
+containers:
+- name: iperf3-exporter
+image: your-repo/iperf3-prometheus-exporter:latest \# Replace with your
+image
+ports:
+- containerPort: 9876
+name: metrics
+env:
+\# Use the Downward API to inject the node name this pod is running on
+- name: SOURCE_NODE_NAME
+valueFrom:
+fieldRef:
+fieldPath: spec.nodeName
+\# Other configurations for the exporter script
+- name: IPERF_TEST_INTERVAL
+value: \"300\"
+- name: IPERF_SERVER_LABEL_SELECTOR
+value: \"app=iperf3-server\"
+resources:
+requests:
+cpu: \"100m\"
+memory: \"128Mi\"
+limits:
+cpu: \"500m\"
+memory: \"256Mi\"
+
+### **3.3 RBAC: Granting Necessary Permissions** {#rbac-granting-necessary-permissions}
+
+For the exporter to perform its dynamic discovery of iperf3-server pods,
+it must be granted specific, limited permissions to read information
+from the Kubernetes API. This is accomplished through a ServiceAccount,
+a ClusterRole, and a ClusterRoleBinding.
+
+- **ServiceAccount**: Provides an identity for the exporter pod within
+  > the cluster.
+
+- **ClusterRole**: Defines a set of permissions. Here, we grant get,
+  > list, and watch access to pods. These are the minimum required
+  > permissions for the discovery function to work. The role is a
+  > ClusterRole because the exporter needs to find pods across all
+  > namespaces where servers might be running.
+
+- **ClusterRoleBinding**: Links the ServiceAccount to the ClusterRole,
+  > effectively granting the permissions to any pod that uses the
+  > ServiceAccount.
+
+> YAML
+
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+name: iperf3-exporter-sa
+\-\--
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+name: iperf3-exporter-role
+rules:
+- apiGroups: \[\"\"\]
+resources: \[\"pods\"\]
+verbs: \[\"get\", \"list\", \"watch\"\]
+\-\--
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+name: iperf3-exporter-rb
+subjects:
+- kind: ServiceAccount
+name: iperf3-exporter-sa
+namespace: default \# The namespace where the exporter is deployed
+roleRef:
+kind: ClusterRole
+name: iperf3-exporter-role
+apiGroup: rbac.authorization.k8s.io
+
+### **3.4 Network Exposure: Service and ServiceMonitor** {#network-exposure-service-and-servicemonitor}
+
+To make the exporter\'s metrics available to Prometheus, we need two
+final objects. The Service exposes the exporter pod\'s metrics port
+within the cluster, and the ServiceMonitor tells the Prometheus Operator
+how to find and scrape that service.
+
+This ServiceMonitor-based approach is the linchpin for a GitOps-friendly
+integration. Instead of manually editing the central Prometheus
+configuration file---a brittle and non-declarative process---we deploy a
+ServiceMonitor custom resource alongside our application.^14^ The
+Prometheus Operator, a key component of the
+
+kube-prometheus-stack, continuously watches for these objects. When it
+discovers our iperf3-exporter-sm, it automatically generates the
+necessary scrape configuration and reloads Prometheus without any manual
+intervention.^4^ This empowers the application team to define
+
+*how their application should be monitored* as part of the
+application\'s own deployment package, a cornerstone of scalable, \"you
+build it, you run it\" observability.
+
+> YAML
+
+apiVersion: v1
+kind: Service
+metadata:
+name: iperf3-exporter-svc
+labels:
+app: iperf3-exporter
+spec:
+selector:
+app: iperf3-exporter
+ports:
+- name: metrics
+port: 9876
+targetPort: metrics
+protocol: TCP
+\-\--
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+name: iperf3-exporter-sm
+labels:
+\# Label for Prometheus Operator to discover this ServiceMonitor
+release: prometheus-operator
+spec:
+selector:
+matchLabels:
+\# This must match the labels on the Service object above
+app: iperf3-exporter
+endpoints:
+- port: metrics
+interval: 60s
+scrapeTimeout: 30s
+
+## **Section 4: Packaging with Helm for Reusability and Distribution**
+
+While static YAML manifests are excellent for defining Kubernetes
+resources, they lack the flexibility needed for easy configuration,
+distribution, and lifecycle management. Helm, the package manager for
+Kubernetes, solves this by bundling applications into
+version-controlled, reusable packages called charts.^17^ This section
+details how to package the entire
+
+iperf3 monitoring service into a professional, flexible, and
+distributable Helm chart.
+
+### **4.1 Helm Chart Structure** {#helm-chart-structure}
+
+A well-organized Helm chart follows a standard directory structure. This
+convention makes charts easier to understand and maintain.^19^
+
+iperf3-monitor/
+├── Chart.yaml \# Metadata about the chart (name, version, etc.)
+├── values.yaml \# Default configuration values for the chart
+├── charts/ \# Directory for sub-chart dependencies (empty for this
+project)
+├── templates/ \# Directory containing the templated Kubernetes
+manifests
+│ ├── \_helpers.tpl \# A place for reusable template helpers
+│ ├── server-daemonset.yaml
+│ ├── exporter-deployment.yaml
+│ ├── rbac.yaml
+│ ├── service.yaml
+│ └── servicemonitor.yaml
+└── README.md \# Documentation for the chart
+
+### **4.2 Templating the Kubernetes Manifests** {#templating-the-kubernetes-manifests}
+
+The core of Helm\'s power lies in its templating engine, which uses Go
+templates. We convert the static manifests from Section 3 into dynamic
+templates by replacing hardcoded values with references to variables
+defined in the values.yaml file.
+
+A crucial best practice is to use a \_helpers.tpl file to define common
+functions and partial templates, especially for generating resource
+names and labels. This reduces boilerplate, ensures consistency, and
+makes the chart easier to manage.^19^
+
+**Example: templates/\_helpers.tpl**
+
+> Code snippet
+
+{{/\*
+Expand the name of the chart.
+\*/}}
+{{- define \"iperf3-monitor.name\" -}}
+{{- default.Chart.Name.Values.nameOverride \| trunc 63 \| trimSuffix
+\"-\" }}
+{{- end -}}
+
+{{/\*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited
+to this (by the DNS naming spec).
+\*/}}
+{{- define \"iperf3-monitor.fullname\" -}}
+{{- if.Values.fullnameOverride }}
+{{-.Values.fullnameOverride \| trunc 63 \| trimSuffix \"-\" }}
+{{- else }}
+{{- \$name := default.Chart.Name.Values.nameOverride }}
+{{- if contains \$name.Release.Name }}
+{{-.Release.Name \| trunc 63 \| trimSuffix \"-\" }}
+{{- else }}
+{{- printf \"%s-%s\".Release.Name \$name \| trunc 63 \| trimSuffix \"-\"
+}}
+{{- end }}
+{{- end }}
+{{- end -}}
+
+{{/\*
+Common labels
+\*/}}
+{{- define \"iperf3-monitor.labels\" -}}
+helm.sh/chart: {{ include \"iperf3-monitor.name\". }}
+{{ include \"iperf3-monitor.selectorLabels\". }}
+{{- if.Chart.AppVersion }}
+app.kubernetes.io/version: {{.Chart.AppVersion \| quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{.Release.Service }}
+{{- end -}}
+
+{{/\*
+Selector labels
+\*/}}
+{{- define \"iperf3-monitor.selectorLabels\" -}}
+app.kubernetes.io/name: {{ include \"iperf3-monitor.name\". }}
+app.kubernetes.io/instance: {{.Release.Name }}
+{{- end -}}
+
+**Example: Templated exporter-deployment.yaml**
+
+> YAML
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+name: {{ include \"iperf3-monitor.fullname\". }}-exporter
+labels:
+{{- include \"iperf3-monitor.labels\". \| nindent 4 }}
+app.kubernetes.io/component: exporter
+spec:
+replicas: {{.Values.exporter.replicaCount }}
+selector:
+matchLabels:
+{{- include \"iperf3-monitor.selectorLabels\". \| nindent 6 }}
+app.kubernetes.io/component: exporter
+template:
+metadata:
+labels:
+{{- include \"iperf3-monitor.selectorLabels\". \| nindent 8 }}
+app.kubernetes.io/component: exporter
+spec:
+{{- if.Values.rbac.create }}
+serviceAccountName: {{ include \"iperf3-monitor.fullname\". }}-sa
+{{- else }}
+serviceAccountName: {{.Values.serviceAccount.name }}
+{{- end }}
+containers:
+- name: iperf3-exporter
+image: \"{{.Values.exporter.image.repository
+}}:{{.Values.exporter.image.tag \| default.Chart.AppVersion }}\"
+imagePullPolicy: {{.Values.exporter.image.pullPolicy }}
+ports:
+- containerPort: 9876
+name: metrics
+env:
+- name: SOURCE_NODE_NAME
+valueFrom:
+fieldRef:
+fieldPath: spec.nodeName
+- name: IPERF_TEST_INTERVAL
+value: \"{{.Values.exporter.testInterval }}\"
+resources:
+{{- toYaml.Values.exporter.resources \| nindent 10 }}
+
+### **4.3 Designing a Comprehensive values.yaml** {#designing-a-comprehensive-values.yaml}
+
+The values.yaml file is the public API of a Helm chart. A well-designed
+values file is intuitive, clearly documented, and provides users with
+the flexibility to adapt the chart to their specific needs. Best
+practices include using clear, camelCase naming conventions and
+providing comments for every parameter.^21^
+
+A particularly powerful feature of Helm is conditional logic. By
+wrapping entire resource definitions in if blocks based on boolean flags
+in values.yaml (e.g., {{- if.Values.rbac.create }}), the chart becomes
+highly adaptable. A user in a high-security environment can disable the
+automatic creation of ClusterRoles by setting rbac.create: false,
+allowing them to manage permissions manually without causing the Helm
+installation to fail.^20^ Similarly, a user not running the Prometheus
+Operator can set
+
+serviceMonitor.enabled: false. This adaptability transforms the chart
+from a rigid, all-or-nothing package into a flexible building block,
+dramatically increasing its utility across different organizations and
+security postures.
+
+The following table documents the comprehensive set of configurable
+parameters for the iperf3-monitor chart. This serves as the primary
+documentation for any user wishing to install and customize the service.
+
+| Parameter                    | Description                                                          | Type    | Default                                   |
+|------------------------------|----------------------------------------------------------------------|---------|-------------------------------------------|
+| nameOverride                 | Override the name of the chart.                                      | string  | \"\"                                      |
+| fullnameOverride             | Override the fully qualified app name.                               | string  | \"\"                                      |
+| exporter.image.repository    | The container image repository for the exporter.                     | string  | ghcr.io/my-org/iperf3-prometheus-exporter |
+| exporter.image.tag           | The container image tag for the exporter.                            | string  | (Chart.AppVersion)                        |
+| exporter.image.pullPolicy    | The image pull policy for the exporter.                              | string  | IfNotPresent                              |
+| exporter.replicaCount        | Number of exporter pod replicas.                                     | integer | 1                                         |
+| exporter.testInterval        | Interval in seconds between test cycles.                             | integer | 300                                       |
+| exporter.testTimeout         | Timeout in seconds for a single iperf3 test.                         | integer | 10                                        |
+| exporter.testProtocol        | Protocol to use for testing (tcp or udp).                            | string  | tcp                                       |
+| exporter.resources           | CPU/memory resource requests and limits for the exporter.            | object  | {}                                        |
+| server.image.repository      | The container image repository for the iperf3 server.                | string  | networkstatic/iperf3                      |
+| server.image.tag             | The container image tag for the iperf3 server.                       | string  | latest                                    |
+| server.resources             | CPU/memory resource requests and limits for the server pods.         | object  | {}                                        |
+| server.nodeSelector          | Node selector for scheduling server pods.                            | object  | {}                                        |
+| server.tolerations           | Tolerations for scheduling server pods on tainted nodes.             | array   | \`\`                                      |
+| rbac.create                  | If true, create ServiceAccount, ClusterRole, and ClusterRoleBinding. | boolean | true                                      |
+| serviceAccount.name          | The name of the ServiceAccount to use. Used if rbac.create is false. | string  | \"\"                                      |
+| serviceMonitor.enabled       | If true, create a ServiceMonitor for Prometheus Operator.            | boolean | true                                      |
+| serviceMonitor.interval      | Scrape interval for the ServiceMonitor.                              | string  | 60s                                       |
+| serviceMonitor.scrapeTimeout | Scrape timeout for the ServiceMonitor.                               | string  | 30s                                       |
+
+## **Section 5: Visualizing Network Performance with a Custom Grafana Dashboard**
+
+The final piece of the user experience is a purpose-built Grafana
+dashboard that transforms the raw, time-series metrics from Prometheus
+into intuitive, actionable visualizations. A well-designed dashboard
+does more than just display data; it tells a story, guiding an operator
+from a high-level overview of cluster health to a deep-dive analysis of
+a specific problematic network path.^5^
+
+### **5.1 Dashboard Design Principles** {#dashboard-design-principles}
+
+The primary goals for this network performance dashboard are:
+
+1.  **At-a-Glance Overview:** Provide an immediate, cluster-wide view of
+    > network health, allowing operators to quickly spot systemic issues
+    > or anomalies.
+
+2.  **Intuitive Drill-Down:** Enable users to seamlessly transition from
+    > a high-level view to a detailed analysis of performance between
+    > specific nodes.
+
+3.  **Correlation:** Display multiple related metrics (bandwidth,
+    > jitter, packet loss) on the same timeline to help identify causal
+    > relationships.
+
+4.  **Clarity and Simplicity:** Avoid clutter and overly complex panels
+    > that can obscure meaningful data.^4^
+
+### **5.2 Key Visualizations and Panels** {#key-visualizations-and-panels}
+
+The dashboard is constructed from several key panel types, each serving
+a specific analytical purpose.
+
+- **Panel 1: Node-to-Node Bandwidth Heatmap.** This is the centerpiece
+  > of the dashboard\'s overview. It uses Grafana\'s \"Heatmap\"
+  > visualization to create a matrix of network performance.
+
+  - **Y-Axis:** Source Node (source_node label).
+
+  - **X-Axis:** Destination Node (destination_node label).
+
+  - **Cell Color:** The value of the iperf_network_bandwidth_mbps
+    > metric.
+
+  - PromQL Query: avg(iperf_network_bandwidth_mbps) by (source_node,
+    > destination_node)
+    > This panel provides an instant visual summary of the entire
+    > cluster\'s network fabric. A healthy cluster will show a uniformly
+    > \"hot\" (high bandwidth) grid, while any \"cold\" spots
+    > immediately draw attention to underperforming network paths.
+
+- **Panel 2: Time-Series Performance Graphs.** These panels use the
+  > \"Time series\" visualization to plot performance over time,
+  > allowing for trend analysis and historical investigation.
+
+  - **Bandwidth (Mbps):** Plots
+    > iperf_network_bandwidth_mbps{source_node=\"\$source_node\",
+    > destination_node=\"\$destination_node\"}.
+
+  - **Jitter (ms):** Plots
+    > iperf_network_jitter_ms{source_node=\"\$source_node\",
+    > destination_node=\"\$destination_node\", protocol=\"udp\"}.
+
+  - Packet Loss (%): Plots (iperf_network_lost_packets_total{\...} /
+    > iperf_network_packets_total{\...}) \* 100.
+    > These graphs are filtered by the dashboard variables, enabling the
+    > drill-down analysis.
+
+- **Panel 3: Stat Panels.** These panels use the \"Stat\" visualization
+  > to display single, key performance indicators (KPIs) for the
+  > selected time range and nodes.
+
+  - **Average Bandwidth:** avg(iperf_network_bandwidth_mbps{\...})
+
+  - **Minimum Bandwidth:** min(iperf_network_bandwidth_mbps{\...})
+
+  - **Maximum Jitter:** max(iperf_network_jitter_ms{\...})
+
+### **5.3 Enabling Interactivity with Grafana Variables** {#enabling-interactivity-with-grafana-variables}
+
+The dashboard\'s interactivity is powered by Grafana\'s template
+variables. These variables are dynamically populated from Prometheus and
+are used to filter the data displayed in the panels.^4^
+
+- **\$source_node**: A dropdown variable populated by the PromQL query
+  > label_values(iperf_network_bandwidth_mbps, source_node).
+
+- **\$destination_node**: A dropdown variable populated by
+  > label_values(iperf_network_bandwidth_mbps{source_node=\"\$source_node\"},
+  > destination_node). This query is cascaded, meaning it only shows
+  > destinations relevant to the selected source.
+
+- **\$protocol**: A custom variable with the options tcp and udp.
+
+This combination of a high-level heatmap with interactive,
+variable-driven drill-down graphs creates a powerful analytical
+workflow. An operator can begin with a bird\'s-eye view of the cluster.
+Upon spotting an anomaly on the heatmap (e.g., a low-bandwidth link
+between Node-5 and Node-8), they can use the \$source_node and
+\$destination_node dropdowns to select that specific path. All the
+time-series panels will instantly update to show the detailed
+performance history for that link, allowing the operator to correlate
+bandwidth drops with jitter spikes or other events. This workflow
+transforms raw data into actionable insight, dramatically reducing the
+Mean Time to Identification (MTTI) for network issues.
+
+### **5.4 The Complete Grafana Dashboard JSON Model** {#the-complete-grafana-dashboard-json-model}
+
+To facilitate easy deployment, the entire dashboard is defined in a
+single JSON model. This model can be imported directly into any Grafana
+instance.
+
+> JSON
+
+{
+\"\_\_inputs\":,
+\"\_\_requires\": \[
+{
+\"type\": \"grafana\",
+\"id\": \"grafana\",
+\"name\": \"Grafana\",
+\"version\": \"8.0.0\"
+},
+{
+\"type\": \"datasource\",
+\"id\": \"prometheus\",
+\"name\": \"Prometheus\",
+\"version\": \"1.0.0\"
+}
+\],
+\"annotations\": {
+\"list\": \[
+{
+\"builtIn\": 1,
+\"datasource\": {
+\"type\": \"grafana\",
+\"uid\": \"\-- Grafana \--\"
+},
+\"enable\": true,
+\"hide\": true,
+\"iconColor\": \"rgba(0, 211, 255, 1)\",
+\"name\": \"Annotations & Alerts\",
+\"type\": \"dashboard\"
+}
+\]
+},
+\"editable\": true,
+\"fiscalYearStartMonth\": 0,
+\"gnetId\": null,
+\"graphTooltip\": 0,
+\"id\": null,
+\"links\":,
+\"panels\":)\",
+\"format\": \"heatmap\",
+\"legendFormat\": \"{{source_node}} -\> {{destination_node}}\",
+\"refId\": \"A\"
+}
+\],
+\"cards\": { \"cardPadding\": null, \"cardRound\": null },
+\"color\": {
+\"mode\": \"spectrum\",
+\"scheme\": \"red-yellow-green\",
+\"exponent\": 0.5,
+\"reverse\": false
+},
+\"dataFormat\": \"tsbuckets\",
+\"yAxis\": { \"show\": true, \"format\": \"short\" },
+\"xAxis\": { \"show\": true }
+},
+{
+\"title\": \"Bandwidth Over Time (Source: \$source_node, Dest:
+\$destination_node)\",
+\"type\": \"timeseries\",
+\"datasource\": {
+\"type\": \"prometheus\",
+\"uid\": \"prometheus\"
+},
+\"gridPos\": { \"h\": 8, \"w\": 12, \"x\": 0, \"y\": 9 },
+\"targets\":,
+\"fieldConfig\": {
+\"defaults\": {
+\"unit\": \"mbps\"
+}
+}
+},
+{
+\"title\": \"Jitter Over Time (Source: \$source_node, Dest:
+\$destination_node)\",
+\"type\": \"timeseries\",
+\"datasource\": {
+\"type\": \"prometheus\",
+\"uid\": \"prometheus\"
+},
+\"gridPos\": { \"h\": 8, \"w\": 12, \"x\": 12, \"y\": 9 },
+\"targets\": \[
+{
+\"expr\": \"iperf_network_jitter_ms{source_node=\\\$source_node\\,
+destination_node=\\\$destination_node\\, protocol=\\udp\\}\",
+\"legendFormat\": \"Jitter\",
+\"refId\": \"A\"
+}
+\],
+\"fieldConfig\": {
+\"defaults\": {
+\"unit\": \"ms\"
+}
+}
+}
+\],
+\"refresh\": \"30s\",
+\"schemaVersion\": 36,
+\"style\": \"dark\",
+\"tags\": \[\"iperf3\", \"network\", \"kubernetes\"\],
+\"templating\": {
+\"list\": \[
+{
+\"current\": {},
+\"datasource\": {
+\"type\": \"prometheus\",
+\"uid\": \"prometheus\"
+},
+\"definition\": \"label_values(iperf_network_bandwidth_mbps,
+source_node)\",
+\"hide\": 0,
+\"includeAll\": false,
+\"multi\": false,
+\"name\": \"source_node\",
+\"options\":,
+\"query\": \"label_values(iperf_network_bandwidth_mbps,
+source_node)\",
+\"refresh\": 1,
+\"regex\": \"\",
+\"skipUrlSync\": false,
+\"sort\": 1,
+\"type\": \"query\"
+},
+{
+\"current\": {},
+\"datasource\": {
+\"type\": \"prometheus\",
+\"uid\": \"prometheus\"
+},
+\"definition\":
+\"label_values(iperf_network_bandwidth_mbps{source_node=\\\$source_node\\},
+destination_node)\",
+\"hide\": 0,
+\"includeAll\": false,
+\"multi\": false,
+\"name\": \"destination_node\",
+\"options\":,
+\"query\":
+\"label_values(iperf_network_bandwidth_mbps{source_node=\\\$source_node\\},
+destination_node)\",
+\"refresh\": 1,
+\"regex\": \"\",
+\"skipUrlSync\": false,
+\"sort\": 1,
+\"type\": \"query\"
+},
+{
+\"current\": { \"selected\": true, \"text\": \"tcp\", \"value\": \"tcp\"
+},
+\"hide\": 0,
+\"includeAll\": false,
+\"multi\": false,
+\"name\": \"protocol\",
+\"options\": \[
+{ \"selected\": true, \"text\": \"tcp\", \"value\": \"tcp\" },
+{ \"selected\": false, \"text\": \"udp\", \"value\": \"udp\" }
+\],
+\"query\": \"tcp,udp\",
+\"skipUrlSync\": false,
+\"type\": \"custom\"
+}
+\]
+},
+\"time\": {
+\"from\": \"now-1h\",
+\"to\": \"now\"
+},
+\"timepicker\": {},
+\"timezone\": \"browser\",
+\"title\": \"Kubernetes iperf3 Network Performance\",
+\"uid\": \"k8s-iperf3-dashboard\",
+\"version\": 1,
+\"weekStart\": \"\"
+}
+
+## **Section 6: GitHub Repository Structure and CI/CD Workflow**
+
+To deliver this monitoring service as a professional, open-source-ready
+project, it is essential to package it within a well-structured GitHub
+repository and implement a robust Continuous Integration and Continuous
+Deployment (CI/CD) pipeline. This automates the build, test, and release
+process, ensuring that every version of the software is consistent,
+trustworthy, and easy for consumers to adopt.
+
+### **6.1 Recommended Repository Structure** {#recommended-repository-structure}
+
+A clean, logical directory structure is fundamental for project
+maintainability and ease of navigation for contributors and users.
+
+.
+├──.github/
+│ └── workflows/
+│ └── release.yml \# GitHub Actions workflow for CI/CD
+├── charts/
+│ └── iperf3-monitor/ \# The Helm chart for the service
+│ ├── Chart.yaml
+│ ├── values.yaml
+│ └── templates/
+│ └──\...
+└── exporter/
+├── Dockerfile \# Dockerfile for the exporter
+├── requirements.txt \# Python dependencies
+└── exporter.py \# Exporter source code
+├──.gitignore
+├── LICENSE
+└── README.md
+
+This structure cleanly separates the exporter application code
+(/exporter) from its deployment packaging (/charts/iperf3-monitor), and
+its release automation (/.github/workflows).
+
+### **6.2 CI/CD Pipeline with GitHub Actions** {#cicd-pipeline-with-github-actions}
+
+A fully automated CI/CD pipeline is the hallmark of a mature software
+project. It eliminates manual, error-prone release steps and provides
+strong guarantees about the integrity of the published artifacts. By
+triggering the pipeline on the creation of a Git tag (e.g., v1.2.3), we
+use the tag as a single source of truth for versioning both the Docker
+image and the Helm chart. This ensures that chart version 1.2.3 is built
+to use image version 1.2.3, and that both have been validated before
+release. This automated, atomic release process provides trust and
+velocity, elevating the project from a collection of files into a
+reliable, distributable piece of software.
+
+The following GitHub Actions workflow automates the entire release
+process:
+
+> YAML
+
+\#.github/workflows/release.yml
+name: Release iperf3-monitor
+
+on:
+push:
+tags:
+- \'v\*.\*.\*\'
+
+env:
+REGISTRY: ghcr.io
+IMAGE_NAME: \${{ github.repository }}
+
+jobs:
+lint-and-test:
+name: Lint and Test
+runs-on: ubuntu-latest
+steps:
+- name: Check out code
+uses: actions/checkout@v3
+
+- name: Set up Helm
+uses: azure/setup-helm@v3
+with:
+version: v3.10.0
+
+- name: Helm Lint
+run: helm lint./charts/iperf3-monitor
+
+build-and-publish-image:
+name: Build and Publish Docker Image
+runs-on: ubuntu-latest
+needs: lint-and-test
+permissions:
+contents: read
+packages: write
+steps:
+- name: Check out code
+uses: actions/checkout@v3
+
+- name: Log in to GitHub Container Registry
+uses: docker/login-action@v2
+with:
+registry: \${{ env.REGISTRY }}
+username: \${{ github.actor }}
+password: \${{ secrets.GITHUB_TOKEN }}
+
+- name: Extract metadata (tags, labels) for Docker
+id: meta
+uses: docker/metadata-action@v4
+with:
+images: \${{ env.REGISTRY }}/\${{ env.IMAGE_NAME }}
+
+- name: Build and push Docker image
+uses: docker/build-push-action@v4
+with:
+context:./exporter
+push: true
+tags: \${{ steps.meta.outputs.tags }}
+labels: \${{ steps.meta.outputs.labels }}
+
+package-and-publish-chart:
+name: Package and Publish Helm Chart
+runs-on: ubuntu-latest
+needs: build-and-publish-image
+permissions:
+contents: write
+steps:
+- name: Check out code
+uses: actions/checkout@v3
+with:
+fetch-depth: 0
+
+- name: Set up Helm
+uses: azure/setup-helm@v3
+with:
+version: v3.10.0
+
+- name: Set Chart Version
+run: \|
+VERSION=\$(echo \"\${{ github.ref_name }}\" \| sed \'s/\^v//\')
+helm-docs \--sort-values-order file
+yq e -i \'.version =
+strenv(VERSION)\'./charts/iperf3-monitor/Chart.yaml
+yq e -i \'.appVersion =
+strenv(VERSION)\'./charts/iperf3-monitor/Chart.yaml
+
+- name: Publish Helm chart
+uses: stefanprodan/helm-gh-pages@v1.6.0
+with:
+token: \${{ secrets.GITHUB_TOKEN }}
+charts_dir:./charts
+charts_url: https://\${{ github.repository_owner }}.github.io/\${{
+github.event.repository.name }}
+
+### **6.3 Documentation and Usability** {#documentation-and-usability}
+
+The final, and arguably most critical, component for project success is
+high-quality documentation. The README.md file at the root of the
+repository is the primary entry point for any user. It should clearly
+explain what the project does, its architecture, and how to deploy and
+use it.
+
+A common failure point in software projects is documentation that falls
+out of sync with the code. For Helm charts, the values.yaml file
+frequently changes, adding new parameters and options. To combat this,
+it is a best practice to automate the documentation of these parameters.
+The helm-docs tool can be integrated directly into the CI/CD pipeline to
+automatically generate the \"Parameters\" section of the README.md by
+parsing the comments directly from the values.yaml file.^20^ This
+ensures that the documentation is always an accurate reflection of the
+chart\'s configurable options, providing a seamless and trustworthy
+experience for users.
+
+## **Conclusion**
+
+The proliferation of distributed microservices on Kubernetes has made
+network performance a critical, yet often opaque, component of overall
+application health. This report has detailed a comprehensive,
+production-grade solution for establishing continuous network validation
+within a Kubernetes cluster. By architecting a system around the robust,
+decoupled pattern of an iperf3-server DaemonSet and a Kubernetes-aware
+iperf3-exporter Deployment, this service provides a resilient and
+automated foundation for network observability.
+
+The implementation leverages industry-standard tools---Python for the
+exporter, Prometheus for metrics storage, and Grafana for
+visualization---to create a powerful and flexible monitoring pipeline.
+The entire service is packaged into a professional Helm chart, following
+best practices for templating, configuration, and adaptability. This
+allows for simple, version-controlled deployment across a wide range of
+environments. The final Grafana dashboard transforms the collected data
+into an intuitive, interactive narrative, enabling engineers to move
+swiftly from high-level anomaly detection to root-cause analysis.
+
+Ultimately, by treating network performance not as a given but as a
+continuously measured metric, organizations can proactively identify and
+resolve infrastructure bottlenecks, enhance application reliability, and
+ensure a consistent, high-quality experience for their users in the
+dynamic world of Kubernetes.
diff --git a/charts/iperf3-monitor/Chart.yaml b/charts/iperf3-monitor/Chart.yaml
new file mode 100644
index 0000000..912aa46
--- /dev/null
+++ b/charts/iperf3-monitor/Chart.yaml
@@ -0,0 +1,30 @@
+apiVersion: v2
+name: iperf3-monitor
+version: 0.1.0
+appVersion: "0.1.0"
+description: A Helm chart for deploying a Kubernetes-native iperf3 network performance monitoring service with Prometheus and Grafana.
+type: application
+keywords:
+  - iperf3
+  - network
+  - performance
+  - monitoring
+  - kubernetes
+  - prometheus
+  - grafana
+home: https://github.com/malarinv/iperf3-monitor # Replace with your repo URL
+sources:
+  - https://github.com/malarinv/iperf3-monitor # Replace with your repo URL
+maintainers:
+  - name: Malar Invention # Replace with your name
+    email: malarkannan.invention@gmail.com # Replace with your email
+icon: https://raw.githubusercontent.com/malarinv/iperf3-monitor/main/icon.png # Optional icon URL
+annotations:
+  artifacthub.io/changes: |
+    - Add initial Helm chart structure.
+  artifacthub.io/category: networking
+dependencies:
+  - name: prometheus-community/kube-prometheus-stack # Example dependency if you package the whole stack
+    version: ">=30.0.0" # Specify a compatible version range
+    repository: https://prometheus-community.github.io/helm-charts
+    condition: serviceMonitor.enabled # Only include if ServiceMonitor is enabled (assuming Prometheus Operator)
diff --git a/charts/iperf3-monitor/templates/_helpers.tpl b/charts/iperf3-monitor/templates/_helpers.tpl
new file mode 100644
index 0000000..9e7d6c4
--- /dev/null
+++ b/charts/iperf3-monitor/templates/_helpers.tpl
@@ -0,0 +1,55 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "iperf3-monitor.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "iperf3-monitor.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create chart's labels
+*/}}
+{{- define "iperf3-monitor.labels" -}}
+helm.sh/chart: {{ include "iperf3-monitor.name" . }}-{{ .Chart.Version | replace "+" "_" }}
+{{ include "iperf3-monitor.selectorLabels" . }}
+{{- if .Chart.AppVersion -}}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end -}}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end -}}
+
+{{/*
+Selector labels
+*/}}
+{{- define "iperf3-monitor.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "iperf3-monitor.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end -}}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "iperf3-monitor.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create -}}
+    {{- default (include "iperf3-monitor.fullname" .) .Values.serviceAccount.name -}}
+{{- else -}}
+    {{- default "default" .Values.serviceAccount.name -}}
+{{- end -}}
+{{- end -}}
\ No newline at end of file
diff --git a/charts/iperf3-monitor/templates/exporter-deployment.yaml b/charts/iperf3-monitor/templates/exporter-deployment.yaml
new file mode 100644
index 0000000..943aa45
--- /dev/null
+++ b/charts/iperf3-monitor/templates/exporter-deployment.yaml
@@ -0,0 +1,49 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "iperf3-monitor.fullname" . }}-exporter
+  labels:
+    {{- include "iperf3-monitor.labels" . | nindent 4 }}
+    app.kubernetes.io/component: exporter
+spec:
+  replicas: {{ .Values.exporter.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "iperf3-monitor.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: exporter
+  template:
+    metadata:
+      labels:
+        {{- include "iperf3-monitor.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: exporter
+    spec:
+      serviceAccountName: {{ include "iperf3-monitor.serviceAccountName" . }}
+      containers:
+      - name: iperf3-exporter
+        image: "{{ .Values.exporter.image.repository }}:{{ .Values.exporter.image.tag | default .Chart.AppVersion }}"
+        imagePullPolicy: {{ .Values.exporter.image.pullPolicy }}
+        ports:
+        - containerPort: {{ .Values.service.targetPort }}
+          name: metrics
+        env:
+        - name: SOURCE_NODE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: spec.nodeName
+        - name: IPERF_TEST_INTERVAL
+          value: "{{ .Values.exporter.testInterval }}"
+        - name: IPERF_TEST_PROTOCOL
+          value: "{{ .Values.exporter.testProtocol }}"
+        - name: IPERF_SERVER_PORT
+          value: "5201" # Hardcoded as per server DaemonSet
+        - name: IPERF_SERVER_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: IPERF_SERVER_LABEL_SELECTOR
+          value: "app.kubernetes.io/name={{ include \"iperf3-monitor.name\" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server"
+        {{- with .Values.exporter.resources }}
+        resources:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+```
\ No newline at end of file
diff --git a/charts/iperf3-monitor/templates/rbac.yaml b/charts/iperf3-monitor/templates/rbac.yaml
new file mode 100644
index 0000000..0a931e1
--- /dev/null
+++ b/charts/iperf3-monitor/templates/rbac.yaml
@@ -0,0 +1,34 @@
+{{- if .Values.rbac.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "iperf3-monitor.serviceAccountName" . }}
+  labels:
+    {{- include "iperf3-monitor.labels" . | nindent 4 }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: {{ include "iperf3-monitor.fullname" . }}-role
+  labels:
+    {{- include "iperf3-monitor.labels" . | nindent 4 }}
+rules:
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: {{ include "iperf3-monitor.fullname" . }}-rb
+  labels:
+    {{- include "iperf3-monitor.labels" . | nindent 4 }}
+subjects:
+  - kind: ServiceAccount
+    name: {{ include "iperf3-monitor.serviceAccountName" . }}
+    namespace: {{ .Release.Namespace }}
+roleRef:
+  kind: ClusterRole
+  name: {{ include "iperf3-monitor.fullname" . }}-role
+  apiGroup: rbac.authorization.k8s.io
+{{- end -}}
\ No newline at end of file
diff --git a/charts/iperf3-monitor/templates/server-daemonset.yaml b/charts/iperf3-monitor/templates/server-daemonset.yaml
new file mode 100644
index 0000000..9523270
--- /dev/null
+++ b/charts/iperf3-monitor/templates/server-daemonset.yaml
@@ -0,0 +1,45 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: {{ include "iperf3-monitor.fullname" . }}-server
+  labels:
+    {{- include "iperf3-monitor.labels" . | nindent 4 }}
+    app.kubernetes.io/component: server
+spec:
+  selector:
+    matchLabels:
+      {{- include "iperf3-monitor.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: server
+  template:
+    metadata:
+      labels:
+        {{- include "iperf3-monitor.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: server
+    spec:
+      # Run on the host network to measure raw node-to-node performance
+      hostNetwork: true
+      {{- with .Values.server.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.server.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: iperf3-server
+          image: "{{ .Values.server.image.repository }}:{{ .Values.server.image.tag }}"
+          imagePullPolicy: {{ .Values.server.image.pullPolicy }}
+          args: ["-s"] # Start in server mode
+          ports:
+            - containerPort: 5201
+              name: iperf3-tcp
+              protocol: TCP
+            - containerPort: 5201
+              name: iperf3-udp
+              protocol: UDP
+          {{- with .Values.server.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+```
\ No newline at end of file
diff --git a/charts/iperf3-monitor/templates/service.yaml b/charts/iperf3-monitor/templates/service.yaml
new file mode 100644
index 0000000..2b60f7b
--- /dev/null
+++ b/charts/iperf3-monitor/templates/service.yaml
@@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "iperf3-monitor.fullname" . }}-exporter-svc
+  labels:
+    {{- include "iperf3-monitor.labels" . | nindent 4 }}
+    app.kubernetes.io/component: exporter
+spec:
+  type: {{ .Values.service.type }}
+  selector:
+    {{- include "iperf3-monitor.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: exporter
+  ports:
+    - name: metrics
+      port: {{ .Values.service.port }}
+      targetPort: {{ .Values.service.targetPort }}
+      protocol: TCP
\ No newline at end of file
diff --git a/charts/iperf3-monitor/templates/servicemonitor.yaml b/charts/iperf3-monitor/templates/servicemonitor.yaml
new file mode 100644
index 0000000..a8041cf
--- /dev/null
+++ b/charts/iperf3-monitor/templates/servicemonitor.yaml
@@ -0,0 +1,20 @@
+{{- if .Values.serviceMonitor.enabled -}}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "iperf3-monitor.fullname" . }}-sm
+  labels:
+    {{- include "iperf3-monitor.labels" . | nindent 4 }}
+    release: prometheus-operator # Standard label for Prometheus Operator discovery
+    app.kubernetes.io/component: exporter
+spec:
+  selector:
+    matchLabels:
+      {{- include "iperf3-monitor.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: exporter
+  endpoints:
+    - port: metrics
+      interval: {{ .Values.serviceMonitor.interval }}
+      scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }}
+      path: /metrics
+{{- end -}}
\ No newline at end of file
diff --git a/charts/iperf3-monitor/values.yaml b/charts/iperf3-monitor/values.yaml
new file mode 100644
index 0000000..62be7ef
--- /dev/null
+++ b/charts/iperf3-monitor/values.yaml
@@ -0,0 +1,120 @@
+# Default values for iperf3-monitor.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# -- Override the name of the chart.
+nameOverride: ""
+
+# -- Override the fully qualified app name.
+fullnameOverride: ""
+
+exporter:
+  # -- Configuration for the exporter container image.
+  image:
+    # -- The container image repository for the exporter.
+    repository: ghcr.io/malarinv/iperf3-prometheus-exporter # Replace with your repo URL
+    # -- The container image tag for the exporter. If not set, the chart's appVersion is used.
+    tag: ""
+    # -- The image pull policy for the exporter container.
+    pullPolicy: IfNotPresent
+
+  # -- Number of exporter pod replicas. Typically 1 is sufficient.
+  replicaCount: 1
+
+  # -- Interval in seconds between complete test cycles (i.e., testing all server nodes).
+  testInterval: 300
+
+  # -- Timeout in seconds for a single iperf3 test run.
+  testTimeout: 10
+
+  # -- Protocol to use for testing (tcp or udp).
+  testProtocol: tcp
+
+  # -- CPU and memory resource requests and limits for the exporter pod.
+  # @default -- A small default is provided if commented out.
+  resources:
+    {}
+    # requests:
+    #   cpu: "100m"
+    #   memory: "128Mi"
+    # limits:
+    #   cpu: "500m"
+    #   memory: "256Mi"
+
+server:
+  # -- Configuration for the iperf3 server container image (DaemonSet).
+  image:
+    # -- The container image repository for the iperf3 server.
+    repository: networkstatic/iperf3
+    # -- The container image tag for the iperf3 server.
+    tag: latest
+
+  # -- CPU and memory resource requests and limits for the iperf3 server pods (DaemonSet).
+  # These should be very low as the server is mostly idle.
+  # @default -- A small default is provided if commented out.
+  resources:
+    {}
+    # requests:
+    #   cpu: "50m"
+    #   memory: "64Mi"
+    # limits:
+    #   cpu: "100m"
+    #   memory: "128Mi"
+
+  # -- Node selector for scheduling iperf3 server pods.
+  # Use this to restrict the DaemonSet to a subset of nodes.
+  # @default -- {} (schedule on all nodes)
+  nodeSelector: {}
+
+  # -- Tolerations for scheduling iperf3 server pods on tainted nodes (e.g., control-plane nodes).
+  # This is often necessary to include master nodes in the test mesh.
+  # @default -- Tolerates control-plane and master taints.
+  tolerations:
+    - key: "node-role.kubernetes.io/control-plane"
+      operator: "Exists"
+      effect: "NoSchedule"
+    - key: "node-role.kubernetes.io/master"
+      operator: "Exists"
+      effect: "NoSchedule"
+
+rbac:
+  # -- If true, create ServiceAccount, ClusterRole, and ClusterRoleBinding for the exporter.
+  # Set to false if you manage RBAC externally.
+  create: true
+
+serviceAccount:
+  # -- The name of the ServiceAccount to use for the exporter pod.
+  # Only used if rbac.create is false. If not set, it defaults to the chart's fullname.
+  name: ""
+
+serviceMonitor:
+  # -- If true, create a ServiceMonitor resource for integration with Prometheus Operator.
+  # Requires a running Prometheus Operator in the cluster.
+  enabled: true
+
+  # -- Scrape interval for the ServiceMonitor. How often Prometheus scrapes the exporter metrics.
+  interval: 60s
+
+  # -- Scrape timeout for the ServiceMonitor. How long Prometheus waits for metrics response.
+  scrapeTimeout: 30s
+
+# -- Configuration for the exporter Service.
+service:
+  # -- Service type. ClusterIP is typically sufficient.
+  type: ClusterIP
+  # -- Port on which the exporter service is exposed.
+  port: 9876
+  # -- Target port on the exporter pod.
+  targetPort: 9876
+
+# -- Optional configuration for a network policy to allow traffic to the iperf3 server DaemonSet.
+# This is often necessary if you are using a network policy controller.
+networkPolicy:
+  # -- If true, create a NetworkPolicy resource.
+  enabled: false
+  # -- Specify source selectors if needed (e.g., pods in a specific namespace).
+  from: []
+  # -- Specify namespace selectors if needed.
+  namespaceSelector: {}
+  # -- Specify pod selectors if needed.
+  podSelector: {}
diff --git a/exporter/Dockerfile b/exporter/Dockerfile
new file mode 100644
index 0000000..76a2138
--- /dev/null
+++ b/exporter/Dockerfile
@@ -0,0 +1,34 @@
+# Stage 1: Build stage with dependencies
+FROM python:3.9-slim as builder
+
+WORKDIR /app
+
+# Install iperf3 and build dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends gcc iperf3 libiperf-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Stage 2: Final runtime stage
+FROM python:3.9-slim
+
+WORKDIR /app
+
+# Copy iperf3 binary and library from the builder stage
+COPY --from=builder /usr/bin/iperf3 /usr/bin/iperf3
+COPY --from=builder /usr/lib/x86_64-linux-gnu/libiperf.so.0 /usr/lib/x86_64-linux-gnu/libiperf.so.0
+
+# Copy installed Python packages from the builder stage
+COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
+
+# Copy the exporter application code
+COPY exporter.py .
+
+# Expose the metrics port
+EXPOSE 9876
+
+# Set the entrypoint
+CMD ["python", "exporter.py"]
\ No newline at end of file
diff --git a/exporter/exporter.py b/exporter/exporter.py
new file mode 100644
index 0000000..c53c6c7
--- /dev/null
+++ b/exporter/exporter.py
@@ -0,0 +1,159 @@
+import os
+import time
+import logging
+from kubernetes import client, config
+from prometheus_client import start_http_server, Gauge
+import iperf3
+
+# --- Configuration ---
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+# --- Prometheus Metrics Definition ---
+IPERF_BANDWIDTH_MBPS = Gauge(
+    'iperf_network_bandwidth_mbps',
+    'Network bandwidth measured by iperf3 in Megabits per second',
+    ['source_node', 'destination_node', 'protocol']
+)
+IPERF_JITTER_MS = Gauge(
+    'iperf_network_jitter_ms',
+    'Network jitter measured by iperf3 in milliseconds',
+    ['source_node', 'destination_node', 'protocol']
+)
+IPERF_PACKETS_TOTAL = Gauge(
+    'iperf_network_packets_total',
+    'Total packets transmitted or received during the iperf3 test',
+    ['source_node', 'destination_node', 'protocol']
+)
+IPERF_LOST_PACKETS = Gauge(
+    'iperf_network_lost_packets_total',
+    'Total lost packets during the iperf3 UDP test',
+    ['source_node', 'destination_node', 'protocol']
+)
+IPERF_TEST_SUCCESS = Gauge(
+    'iperf_test_success',
+    'Indicates if the iperf3 test was successful (1) or failed (0)',
+    ['source_node', 'destination_node', 'protocol']
+)
+
+def discover_iperf_servers():
+    """
+    Discover iperf3 server pods in the cluster using the Kubernetes API.
+    """
+    try:
+        # Load in-cluster configuration
+        # Assumes the exporter runs in a pod with a service account having permissions
+        config.load_incluster_config()
+        v1 = client.CoreV1Api()
+
+        namespace = os.getenv('IPERF_SERVER_NAMESPACE', 'default')
+        label_selector = os.getenv('IPERF_SERVER_LABEL_SELECTOR', 'app=iperf3-server')
+
+        logging.info(f"Discovering iperf3 servers with label '{label_selector}' in namespace '{namespace}'")
+
+        # List pods across all namespaces with the specified label selector
+        # Note: list_pod_for_all_namespaces requires cluster-wide permissions
+        ret = v1.list_pod_for_all_namespaces(label_selector=label_selector, watch=False)
+
+        servers = []
+        for i in ret.items:
+            # Ensure pod has an IP and is running
+            if i.status.pod_ip and i.status.phase == 'Running':
+                servers.append({
+                    'ip': i.status.pod_ip,
+                    'node_name': i.spec.node_name
+                })
+        logging.info(f"Discovered {len(servers)} iperf3 server pods.")
+        return servers
+    except Exception as e:
+        logging.error(f"Error discovering iperf servers: {e}")
+        return [] # Return empty list on error to avoid crashing the loop
+
+def run_iperf_test(server_ip, server_port, protocol, source_node, dest_node):
+    """
+    Runs a single iperf3 test and updates Prometheus metrics.
+    """
+    logging.info(f"Running iperf3 test from {source_node} to {dest_node} ({server_ip}:{server_port}) using {protocol.upper()}")
+
+    client = iperf3.Client()
+    client.server_hostname = server_ip
+    client.port = server_port
+    client.protocol = protocol
+    # Duration of the test (seconds)
+    client.duration = int(os.getenv('IPERF_TEST_DURATION', 5))
+    # Output results as JSON for easy parsing
+    client.json_output = True
+
+    result = client.run()
+
+    # Parse results and update metrics
+    parse_and_publish_metrics(result, source_node, dest_node, protocol)
+
+def parse_and_publish_metrics(result, source_node, dest_node, protocol):
+    """
+    Parses the iperf3 result and updates Prometheus gauges.
+    Handles both successful and failed tests.
+    """
+    labels = {'source_node': source_node, 'destination_node': dest_node, 'protocol': protocol}
+
+    if result and result.error:
+        logging.error(f"Test from {source_node} to {dest_node} failed: {result.error}")
+        IPERF_TEST_SUCCESS.labels(**labels).set(0)
+        # Set metrics to 0 on failure
+        try:
+            IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
+            IPERF_JITTER_MS.labels(**labels).set(0)
+            IPERF_PACKETS_TOTAL.labels(**labels).set(0)
+            IPERF_LOST_PACKETS.labels(**labels).set(0)
+        except KeyError:
+             # Labels might not be registered yet if this is the first failure
+             pass
+        return
+
+    if not result:
+         logging.error(f"Test from {source_node} to {dest_node} failed to return a result object.")
+         IPERF_TEST_SUCCESS.labels(**labels).set(0)
+         try:
+            IPERF_BANDWIDTH_MBPS.labels(**labels).set(0)
+            IPERF_JITTER_MS.labels(**labels).set(0)
+            IPERF_PACKETS_TOTAL.labels(**labels).set(0)
+            IPERF_LOST_PACKETS.labels(**labels).set(0)
+         except KeyError:
+             pass
+         return
+
+
+    IPERF_TEST_SUCCESS.labels(**labels).set(1)
+
+    # The summary data is typically in result.json['end']['sum_sent'] or result.json['end']['sum_received']
+    # The iperf3-python client often exposes this directly as attributes like sent_Mbps or received_Mbps
+    # For TCP, we usually care about the received bandwidth on the client side (which is the exporter)
+    # For UDP, the client report contains jitter, lost packets, etc.
+    bandwidth_mbps = 0
+    if hasattr(result, 'received_Mbps') and result.received_Mbps is not None:
+        bandwidth_mbps = result.received_Mbps
+    elif hasattr(result, 'sent_Mbps') and result.sent_Mbps is not None:
+        # Fallback, though received_Mbps is usually more relevant for TCP client
+        bandwidth_mbps = result.sent_Mbps
+    # Add a check for the raw JSON output structure as a fallback
+    elif result.json and 'end' in result.json and 'sum_received' in result.json['end'] and result.json['end']['sum_received']['bits_per_second'] is not None:
+         bandwidth_mbps = result.json['end']['sum_received']['bits_per_second'] / 1000000
+    elif result.json and 'end' in result.json and 'sum_sent' in result.json['end'] and result.json['end']['sum_sent']['bits_per_second'] is not None:
+         bandwidth_mbps = result.json['end']['sum_sent']['bits_per_second'] / 1000000
+
+
+    IPERF_BANDWIDTH_MBPS.labels(**labels).set(bandwidth_mbps)
+
+    # UDP specific metrics
+    if protocol == 'udp':
+        # iperf3-python exposes UDP results directly
+        IPERF_JITTER_MS.labels(**labels).set(result.jitter_ms if hasattr(result, 'jitter_ms') and result.jitter_ms is not None else 0)
+        IPERF_PACKETS_TOTAL.labels(**labels).set(result.packets if hasattr(result, 'packets') and result.packets is not None else 0)
+        IPERF_LOST_PACKETS.labels(**labels).set(result.lost_packets if hasattr(result, 'lost_packets') and result.lost_packets is not None else 0)
+    else:
+        # Ensure UDP metrics are zeroed or absent for TCP tests
+        try:
+             IPERF_JITTER_MS.labels(**labels).set(0)
+             IPERF_PACKETS_TOTAL.labels(**labels).set(0)
+             IPERF_LOST_PACKETS.labels(**labels).set(0)
+        except KeyError:
\ No newline at end of file
diff --git a/exporter/requirements.txt b/exporter/requirements.txt
new file mode 100644
index 0000000..05f83d6
--- /dev/null
+++ b/exporter/requirements.txt
@@ -0,0 +1,3 @@
+prometheus-client
+iperf3
+kubernetes
\ No newline at end of file