Compare commits

..

No commits in common. "e72c1e3c1e5159d765f9260fc1af4ca1bfd62844" and "1d5ce487a642a28318af5b7b1948fffcad4205f8" have entirely different histories.

3 changed files with 75 additions and 40 deletions

4
gateway_services.yaml Normal file
View File

@ -0,0 +1,4 @@
gateway_services:
- name: traefik-tcp
namespace: kube-system
label_pattern: app.kubernetes.io/name=traefik

View File

@ -6,6 +6,7 @@ RUN pip install kubernetes kubernetes_asyncio PyYAML
# Copy the controller script into the container # Copy the controller script into the container
COPY node_external_ip_controller_async.py /app/node_external_ip_controller.py COPY node_external_ip_controller_async.py /app/node_external_ip_controller.py
COPY gateway_services.yaml /app/gateway_services.yaml
# Set the working directory # Set the working directory
WORKDIR /app WORKDIR /app

View File

@ -1,106 +1,136 @@
import asyncio import asyncio
from kubernetes_asyncio import client, config, watch from kubernetes_asyncio import client, config, watch
import os import os
import logging
# Configuration # Configuration
import yaml
GATEWAY_SERVICES_FILE = os.getenv("GATEWAY_SERVICES_FILE", "gateway_services.yaml")
with open(GATEWAY_SERVICES_FILE, "r") as f:
gateway_services = yaml.safe_load(f)
services = gateway_services["gateway_services"]
ANNOTATION_KEY = os.getenv("ANNOTATION_KEY", "kube-vip.io/loadbalancerIPs") ANNOTATION_KEY = os.getenv("ANNOTATION_KEY", "kube-vip.io/loadbalancerIPs")
ZERO_GATEWAY_IP = os.getenv("ZERO_GATEWAY_IP", "172.28.10.1") ZERO_GATEWAY_IP = os.getenv("ZERO_GATEWAY_IP", "172.28.10.1")
NODE_LABEL = os.getenv("NODE_LABEL", "svccontroller.k3s.cattle.io/enablelb=true") NODE_LABEL = os.getenv("NODE_LABEL", "svccontroller.k3s.cattle.io/enablelb=true")
SERVICE_LABEL_KEY = os.getenv("SERVICE_LABEL_KEY", "zlanservice")
SERVICE_LABEL_VALUE = os.getenv("SERVICE_LABEL_VALUE", "true")
SERVICE_REQUEST_TIMEOUT = int(os.getenv("SERVICE_REQUEST_TIMEOUT", 300)) SERVICE_REQUEST_TIMEOUT = int(os.getenv("SERVICE_REQUEST_TIMEOUT", 300))
NODE_REQUEST_TIMEOUT = int(os.getenv("NODE_REQUEST_TIMEOUT", 30)) NODE_REQUEST_TIMEOUT = int(os.getenv("NODE_REQUEST_TIMEOUT", 30))
# Logging configuration
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def update_service_annotation(v1, service, external_ips): async def update_service_annotation(v1, service, external_ips):
try: try:
service_name = service.metadata.name # Get the current service object
namespace = service.metadata.namespace service_name = service["name"]
namespace = service["namespace"]
service_obj = await v1.read_namespaced_service(service_name, namespace) service_obj = await v1.read_namespaced_service(service_name, namespace)
# Check if the annotation needs to be updated
current_annotation = service_obj.metadata.annotations.get(ANNOTATION_KEY) current_annotation = service_obj.metadata.annotations.get(ANNOTATION_KEY)
target_annotation = ",".join(external_ips) + "," + ZERO_GATEWAY_IP target_annotation = ",".join(external_ips) + "," + ZERO_GATEWAY_IP
if current_annotation != target_annotation: if current_annotation != target_annotation:
# Update the annotation
body = {"metadata": {"annotations": {ANNOTATION_KEY: target_annotation}}} body = {"metadata": {"annotations": {ANNOTATION_KEY: target_annotation}}}
await v1.patch_namespaced_service(service_name, namespace, body) await v1.patch_namespaced_service(service_name, namespace, body)
logger.info( print(
f"Updated service {service_name} with new external IP: {target_annotation}" f"Updated service {service_name} with new external IP: {target_annotation}",
flush=True,
) )
except client.exceptions.ApiException as e: except client.exceptions.ApiException as e:
logger.error(f"API Exception in update_service_annotation: {e}") print(f"API Exception in update_service_annotation: {e}", flush=True)
async def watch_nodes(v1, external_ips_update_queue): async def watch_nodes():
config.load_incluster_config()
v1 = client.CoreV1Api()
w = watch.Watch() w = watch.Watch()
while True: while True:
try: try:
# Check for external IP
external_ips, node_names = [], []
async for event in w.stream( async for event in w.stream(
v1.list_node, v1.list_node,
label_selector=NODE_LABEL, label_selector=NODE_LABEL,
_request_timeout=NODE_REQUEST_TIMEOUT, _request_timeout=NODE_REQUEST_TIMEOUT,
): ):
node = event["object"] node = event["object"]
external_ips = [ node_name = node.metadata.name
addr.address
for addr in node.status.addresses for address in node.status.addresses:
if addr.type == "ExternalIP" if address.type == "ExternalIP":
] external_ips.append(address.address)
if external_ips: node_names.append(node_name)
await external_ips_update_queue.put(external_ips) if len(external_ips) > 0:
print(
f"Detected external IPs {str(external_ips)} for node {str(node_names)}",
flush=True,
)
for service in services:
await update_service_annotation(v1, service, external_ips)
except client.exceptions.ApiException as e: except client.exceptions.ApiException as e:
logger.error(f"API Exception in watch_nodes: {e}") print(f"API Exception in watch_nodes: {e}", flush=True)
await asyncio.sleep(5) await asyncio.sleep(5)
except asyncio.CancelledError: except asyncio.CancelledError:
logger.info("Watch task was cancelled.") print("Watch task was cancelled.", flush=True)
break break
except Exception as e: except Exception as e:
logger.error(f"Unexpected error in watch_nodes: {e}") print(f"Unexpected error in watch_nodes: {e}", flush=True)
await asyncio.sleep(5) await asyncio.sleep(5)
async def watch_services(v1, external_ips_update_queue): async def watch_service(service):
config.load_incluster_config()
v1 = client.CoreV1Api()
w = watch.Watch() w = watch.Watch()
label_selector = f"{SERVICE_LABEL_KEY}={SERVICE_LABEL_VALUE}" service_name = service["name"]
namespace = service["namespace"]
label_selector = service["label_pattern"]
while True: while True:
try: try:
async for event in w.stream( async for event in w.stream(
v1.list_service_for_all_namespaces, v1.list_namespaced_service,
namespace,
label_selector=label_selector, label_selector=label_selector,
_request_timeout=SERVICE_REQUEST_TIMEOUT, _request_timeout=SERVICE_REQUEST_TIMEOUT,
): ):
service = event["object"] service_obj = event["object"]
if event["type"] in {"ADDED", "MODIFIED"}: if (
external_ips = await external_ips_update_queue.get() event["type"] == "ADDED"
and service_obj.metadata.name == service_name
):
print(f"New service detected: {service_name}", flush=True)
# Fetch current external IP for nodes that match the label
nodes = await v1.list_node(label_selector=NODE_LABEL)
external_ips = []
for node in nodes.items:
for address in node.status.addresses:
if address.type == "ExternalIP":
external_ips.append(address.address)
await update_service_annotation(v1, service, external_ips) await update_service_annotation(v1, service, external_ips)
except client.exceptions.ApiException as e: except client.exceptions.ApiException as e:
logger.error(f"API Exception in watch_services: {e}") print(f"API Exception in watch_services: {e}", flush=True)
await asyncio.sleep(5) await asyncio.sleep(5)
except asyncio.CancelledError: except asyncio.CancelledError:
logger.info("Watch task was cancelled.") print("Watch task was cancelled.", flush=True)
break break
except Exception as e: except Exception as e:
logger.error(f"Unexpected error in watch_services: {e}") print(f"Unexpected error in watch_services: {e}", flush=True)
await asyncio.sleep(5) await asyncio.sleep(5)
async def main(): async def main():
config.load_incluster_config() # Run watch_nodes and watch_services concurrently
v1 = client.CoreV1Api() service_watchers = [watch_service(service) for service in services]
await asyncio.gather(watch_nodes(), *service_watchers)
external_ips_update_queue = asyncio.Queue()
await asyncio.gather(
watch_nodes(v1, external_ips_update_queue),
watch_services(v1, external_ips_update_queue),
)
if __name__ == "__main__": if __name__ == "__main__":