node-external-ip-controller/node_external_ip_controller...

165 lines
6.6 KiB
Python
Raw Normal View History

import asyncio
from kubernetes_asyncio import client, config, watch
2024-11-06 21:25:27 +00:00
import os
2025-01-12 10:42:07 +00:00
import logging
2025-01-12 12:17:03 +00:00
import ipaddress
2024-11-06 21:25:27 +00:00
# Configuration
2024-11-06 21:25:27 +00:00
ANNOTATION_KEY = os.getenv("ANNOTATION_KEY", "kube-vip.io/loadbalancerIPs")
ZERO_GATEWAY_IP = os.getenv("ZERO_GATEWAY_IP", "172.28.10.1")
NODE_LABEL = os.getenv("NODE_LABEL", "svccontroller.k3s.cattle.io/enablelb=true")
2025-01-12 12:53:19 +00:00
SERVICE_LABEL = os.getenv("SERVICE_LABEL", "enablezlan=true")
ZLAN_GATEWAY_IP_KEY = os.getenv("ZLAN_GATEWAY_IP_KEY", "zlanip")
2024-11-07 18:13:17 +00:00
SERVICE_REQUEST_TIMEOUT = int(os.getenv("SERVICE_REQUEST_TIMEOUT", 300))
NODE_REQUEST_TIMEOUT = int(os.getenv("NODE_REQUEST_TIMEOUT", 30))
2025-01-12 10:42:07 +00:00
# Logging configuration
2025-01-12 10:46:25 +00:00
logging.basicConfig(level=logging.DEBUG)
2025-01-12 10:42:07 +00:00
logger = logging.getLogger(__name__)
2025-01-12 12:17:03 +00:00
def is_valid_ip(ip):
try:
ipaddress.ip_address(ip)
return True
except ValueError:
return False
async def update_service_annotation(v1, service, external_ipset):
try:
service_name = service.metadata.name
namespace = service.metadata.namespace
2025-01-12 10:46:25 +00:00
logger.debug(f"Fetching service {service_name} in namespace {namespace}")
service_obj = await v1.read_namespaced_service(service_name, namespace)
2025-01-12 14:10:00 +00:00
current_annotation = service_obj.metadata.annotations.get(ANNOTATION_KEY, "")
2025-01-12 14:19:19 +00:00
annotated_ipset = set(current_annotation.split(","))
2025-01-12 12:53:19 +00:00
zlan_gateway_ip = service_obj.metadata.labels.get(ZLAN_GATEWAY_IP_KEY)
logger.debug(f"Zlan Gateway IP: {zlan_gateway_ip}")
2025-01-12 12:17:03 +00:00
if is_valid_ip(zlan_gateway_ip):
2025-01-12 14:38:37 +00:00
external_ipset = set(external_ipset)
external_ipset.add(zlan_gateway_ip)
2025-01-12 12:17:03 +00:00
else:
logger.debug(
f"Invalid Zlan Gateway IP: {zlan_gateway_ip}, excluding from target annotation"
)
2025-01-12 14:33:46 +00:00
external_ips = list(external_ipset)
target_annotation = ",".join(external_ips)
2025-01-12 14:19:19 +00:00
if annotated_ipset != external_ipset:
logger.debug(f"Current annotation: {current_annotation}: {annotated_ipset}")
logger.debug(f"Target annotation: {target_annotation}: {external_ipset}")
body = {"metadata": {"annotations": {ANNOTATION_KEY: target_annotation}}}
2025-01-12 10:46:25 +00:00
logger.debug(f"Patching service {service_name} with body: {body}")
await v1.patch_namespaced_service(service_name, namespace, body)
2025-01-12 10:42:07 +00:00
logger.info(
f"Updated service {service_name} with new external IP: {target_annotation}"
)
2025-01-12 10:46:25 +00:00
else:
logger.debug(f"No update required for service {service_name}")
except client.exceptions.ApiException as e:
2025-01-12 10:42:07 +00:00
logger.error(f"API Exception in update_service_annotation: {e}")
async def watch_nodes(v1, external_ips_update_queue):
w = watch.Watch()
external_node_ipset = set()
while True:
try:
2025-01-12 10:46:25 +00:00
logger.debug("Starting to watch nodes")
async for event in w.stream(
2024-11-07 18:13:17 +00:00
v1.list_node,
label_selector=NODE_LABEL,
_request_timeout=NODE_REQUEST_TIMEOUT,
):
node = event["object"]
event_type = event["type"]
logger.debug(
f"Received {event_type} event for node: {node.metadata.name}"
)
external_ips = [
addr.address
for addr in node.status.addresses
if addr.type == "ExternalIP"
]
if event_type in {"ADDED", "MODIFIED"}:
external_node_ipset.update(external_ips)
logger.debug(
f"External IPs for node {node.metadata.name}: {external_ips}"
)
elif event_type == "REMOVED":
for ip in external_ips:
if ip in external_node_ipset:
external_node_ipset.remove(ip)
logger.debug(
f"External IPs for node {node.metadata.name}: {external_ips}"
)
external_node_ips = list(external_node_ipset)
2025-01-12 13:49:32 +00:00
# remove old ip lists before populating
while not external_ips_update_queue.empty():
_previous_external_ips = await external_ips_update_queue.get()
2025-01-12 14:19:19 +00:00
await external_ips_update_queue.put(set(external_node_ips))
2025-01-12 11:40:14 +00:00
logger.debug(f"Added external IPs to update queue: {external_node_ips}")
except client.exceptions.ApiException as e:
2025-01-12 10:42:07 +00:00
logger.error(f"API Exception in watch_nodes: {e}")
await asyncio.sleep(5)
except asyncio.CancelledError:
2025-01-12 10:42:07 +00:00
logger.info("Watch task was cancelled.")
break
except Exception as e:
2025-01-12 14:37:17 +00:00
logger.error(f"Unexpected error in watch_nodes: {e}", exc_info=True)
await asyncio.sleep(5)
async def watch_services(v1, external_ips_update_queue):
w = watch.Watch()
while True:
try:
2025-01-12 12:43:59 +00:00
logger.debug("Starting to watch services")
async for event in w.stream(
v1.list_service_for_all_namespaces,
2025-01-12 12:53:19 +00:00
label_selector=SERVICE_LABEL,
2024-11-07 18:13:17 +00:00
_request_timeout=SERVICE_REQUEST_TIMEOUT,
):
service = event["object"]
if event["type"] in {"ADDED", "MODIFIED"}:
2025-01-12 12:42:14 +00:00
logger.debug(
f"Processing event type: {event['type']} for service: {service.metadata.name}"
)
external_ipset = await external_ips_update_queue.get()
2025-01-12 13:49:32 +00:00
# put back the last one if it turns empty
if external_ips_update_queue.empty():
await external_ips_update_queue.put(external_ipset)
logger.debug(f"Retrieved external IPs: {list(external_ipset)}")
await update_service_annotation(v1, service, external_ipset)
2025-01-12 12:42:14 +00:00
logger.debug(f"Service updated: {service.metadata.name}")
except client.exceptions.ApiException as e:
2025-01-12 10:42:07 +00:00
logger.error(f"API Exception in watch_services: {e}")
await asyncio.sleep(5)
except asyncio.CancelledError:
2025-01-12 10:42:07 +00:00
logger.info("Watch task was cancelled.")
break
except Exception as e:
2025-01-12 14:37:17 +00:00
logger.error(f"Unexpected error in watch_services: {e}", exc_info=True)
await asyncio.sleep(5)
async def main():
config.load_incluster_config()
v1 = client.CoreV1Api()
external_ips_update_queue = asyncio.Queue()
await asyncio.gather(
watch_nodes(v1, external_ips_update_queue),
watch_services(v1, external_ips_update_queue),
)
if __name__ == "__main__":
asyncio.run(main())