Compare commits

..

34 Commits

Author SHA1 Message Date
5045de1a64 fix: bug 2025-01-12 20:08:37 +05:30
a92fd0bff0 fix: traceback if unexpected error 2025-01-12 20:07:17 +05:30
d636f4b59a fix: consider zlan ip when updating 2025-01-12 20:03:46 +05:30
ebd9797d59 fix: logs 2025-01-12 19:49:19 +05:30
bb50947391 fix: log only if updating 2025-01-12 19:41:05 +05:30
799071601e fix: if no ip exists set empty set 2025-01-12 19:40:00 +05:30
15ad2e5902 fix: compare only if ip set has changed 2025-01-12 19:34:16 +05:30
971cbb667f fix: edge cases 2025-01-12 19:19:32 +05:30
69a595e708 fix: put await bug 2025-01-12 18:58:39 +05:30
5832314a19 fix: update service with the last updated external_ips 2025-01-12 18:57:22 +05:30
2661c264c9 fix: keep ip and selector separate 2025-01-12 18:23:19 +05:30
1c1a29b2ea fix: add more log 2025-01-12 18:13:59 +05:30
59dd11f353 fix: add logs for service watcher 2025-01-12 18:12:14 +05:30
a1cf0467da cleanup: remove legacy sync code 2025-01-12 18:07:56 +05:30
7efde36ca3 fix: add ip address validation 2025-01-12 17:47:03 +05:30
924c24b16b fix: use gateway ip set in zlangateway lable 2025-01-12 17:35:42 +05:30
c51c400a4b fix: log all ips 2025-01-12 17:10:14 +05:30
4a14d77cc5 feat: enable node addition/removal based externalip updates 2025-01-12 17:08:31 +05:30
041b45bb94 logs: add debug logs 2025-01-12 16:16:25 +05:30
e72c1e3c1e fix: setup proper logging 2025-01-12 16:12:07 +05:30
4b23c6bcd9 feat: use service labels to identify service that needs to be annotated 2025-01-12 15:59:34 +05:30
1d5ce487a6 script: build-n-push make target 2025-01-12 15:31:24 +05:30
c386f2e648 fix: set default filename 2025-01-12 15:30:12 +05:30
87869abf8b fix: provide configurable file path 2025-01-12 14:35:31 +05:30
87953222bb feat: listen for a list of services to update gateways 2025-01-12 14:31:00 +05:30
b297643ef7 fix: load gateway services from yaml 2025-01-12 13:13:39 +05:30
d4db24a8d8 fix: add gateway service list support 2025-01-12 13:02:06 +05:30
c972a4e56b cleanup: formatting 2025-01-12 12:48:39 +05:30
3e89eaaa26 node names logging 2024-11-08 01:48:49 +05:30
573f16c5c2 accumulate external ips before updating service 2024-11-08 01:46:22 +05:30
4e71e70890 update service name to web listener 2024-11-08 01:24:23 +05:30
d71f7f1c49 enable multiple gateway ip support 2024-11-07 23:43:17 +05:30
a225b8ec5f read values from env vars 2024-11-07 02:55:27 +05:30
f9a2713530 use service label patter 2024-11-01 14:10:35 +05:30
4 changed files with 116 additions and 154 deletions

View File

@@ -7,6 +7,9 @@ build:
push:
docker push code.whiteblossom.xyz/infra/node-external-ip-controller:latest
build-n-push:
docker buildx build --platform=linux/amd64,linux/arm64 -t code.whiteblossom.xyz/infra/node-external-ip-controller:latest --push -f node-external-ip-controller.Dockerfile . --push
builder:
docker buildx create \
--name container-builder \

View File

@@ -2,7 +2,7 @@
FROM python:3.13-alpine3.19
# Install the Kubernetes Python client
RUN pip install kubernetes kubernetes_asyncio
RUN pip install kubernetes kubernetes_asyncio PyYAML
# Copy the controller script into the container
COPY node_external_ip_controller_async.py /app/node_external_ip_controller.py

View File

@@ -1,76 +0,0 @@
from kubernetes import client, config, watch
import os
import time
# Load in-cluster config
config.load_incluster_config()
# Set up Kubernetes API client
v1 = client.CoreV1Api()
# Configuration
SERVICE_NAME = "traefik"
NAMESPACE = "kube-system"
ANNOTATION_KEY = "kube-vip.io/loadbalancerIPs"
NODE_LABEL = "svccontroller.k3s.cattle.io/enablelb=true"
def update_service_annotation(external_ip):
# Get the current service object
service = v1.read_namespaced_service(SERVICE_NAME, NAMESPACE)
# Check if the annotation needs to be updated
current_annotation = service.metadata.annotations.get(ANNOTATION_KEY)
ipam_address = service.spec.load_balancer_ip
if ipam_address:
print(
f"service {SERVICE_NAME} has existing ipam IP: {ipam_address}", flush=True
)
target_annotation = ",".join({external_ip, ipam_address})
else:
target_annotation = external_ip
if current_annotation != target_annotation:
# Update the annotation
body = {"metadata": {"annotations": {ANNOTATION_KEY: target_annotation}}}
v1.patch_namespaced_service(SERVICE_NAME, NAMESPACE, body)
print(
f"Updated service {SERVICE_NAME} with new external IPs: {target_annotation}",
flush=True,
)
def main():
w = watch.Watch()
while True:
try:
for event in w.stream(
v1.list_node, label_selector=NODE_LABEL, _request_timeout=300
):
node = event["object"]
node_name = node.metadata.name
# Extract the external IP if it exists
external_ip = None
for address in node.status.addresses:
if address.type == "ExternalIP":
external_ip = address.address
break
if external_ip:
print(
f"Detected external IP {external_ip} for node {node_name}",
flush=True,
)
update_service_annotation(external_ip)
except client.exceptions.ApiException as e:
print(f"API Exception: {e}", flush=True)
time.sleep(5) # Wait before retrying
except Exception as e:
print(f"Unexpected error: {e}", flush=True)
time.sleep(5)
if __name__ == "__main__":
main()

View File

@@ -1,128 +1,163 @@
import asyncio
from kubernetes_asyncio import client, config, watch
import os
import logging
import ipaddress
# Configuration
SERVICE_NAME_PATTERN = (
"traefik" # Define the service name pattern or label to identify services
)
NAMESPACE = "kube-system"
ANNOTATION_KEY = "kube-vip.io/loadbalancerIPs"
ZERO_GATEWAY_IP = "172.28.10.1"
NODE_LABEL = "svccontroller.k3s.cattle.io/enablelb=true"
ANNOTATION_KEY = os.getenv("ANNOTATION_KEY", "kube-vip.io/loadbalancerIPs")
ZERO_GATEWAY_IP = os.getenv("ZERO_GATEWAY_IP", "172.28.10.1")
NODE_LABEL = os.getenv("NODE_LABEL", "svccontroller.k3s.cattle.io/enablelb=true")
SERVICE_LABEL = os.getenv("SERVICE_LABEL", "enablezlan=true")
ZLAN_GATEWAY_IP_KEY = os.getenv("ZLAN_GATEWAY_IP_KEY", "zlanip")
SERVICE_REQUEST_TIMEOUT = int(os.getenv("SERVICE_REQUEST_TIMEOUT", 300))
NODE_REQUEST_TIMEOUT = int(os.getenv("NODE_REQUEST_TIMEOUT", 30))
# Logging configuration
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
async def update_service_annotation(v1, service_name, external_ip):
def is_valid_ip(ip):
try:
# Get the current service object
service = await v1.read_namespaced_service(service_name, NAMESPACE)
ipaddress.ip_address(ip)
return True
except ValueError:
return False
# Check if the annotation needs to be updated
current_annotation = service.metadata.annotations.get(ANNOTATION_KEY)
target_annotation = ",".join({external_ip, ZERO_GATEWAY_IP})
if current_annotation != target_annotation:
# Update the annotation
async def update_service_annotation(v1, service, external_ipset):
try:
service_name = service.metadata.name
namespace = service.metadata.namespace
logger.debug(f"Fetching service {service_name} in namespace {namespace}")
service_obj = await v1.read_namespaced_service(service_name, namespace)
current_annotation = service_obj.metadata.annotations.get(ANNOTATION_KEY, "")
annotated_ipset = set(current_annotation.split(","))
zlan_gateway_ip = service_obj.metadata.labels.get(ZLAN_GATEWAY_IP_KEY)
logger.debug(f"Zlan Gateway IP: {zlan_gateway_ip}")
if is_valid_ip(zlan_gateway_ip):
external_ipset = set(external_ipset)
external_ipset.add(zlan_gateway_ip)
else:
logger.debug(
f"Invalid Zlan Gateway IP: {zlan_gateway_ip}, excluding from target annotation"
)
external_ips = list(external_ipset)
target_annotation = ",".join(external_ips)
if annotated_ipset != external_ipset:
logger.debug(f"Current annotation: {current_annotation}: {annotated_ipset}")
logger.debug(f"Target annotation: {target_annotation}: {external_ipset}")
body = {"metadata": {"annotations": {ANNOTATION_KEY: target_annotation}}}
await v1.patch_namespaced_service(service_name, NAMESPACE, body)
print(
logger.debug(f"Patching service {service_name} with body: {body}")
await v1.patch_namespaced_service(service_name, namespace, body)
logger.info(
f"Updated service {service_name} with new external IP: {target_annotation}"
)
else:
logger.debug(f"No update required for service {service_name}")
except client.exceptions.ApiException as e:
print(f"API Exception in update_service_annotation: {e}")
logger.error(f"API Exception in update_service_annotation: {e}")
async def watch_nodes():
await config.load_incluster_config()
v1 = client.CoreV1Api()
async def watch_nodes(v1, external_ips_update_queue):
w = watch.Watch()
external_node_ipset = set()
while True:
try:
logger.debug("Starting to watch nodes")
async for event in w.stream(
v1.list_node, label_selector=NODE_LABEL, _request_timeout=300
v1.list_node,
label_selector=NODE_LABEL,
_request_timeout=NODE_REQUEST_TIMEOUT,
):
node = event["object"]
node_name = node.metadata.name
# Check for external IP
external_ip = None
for address in node.status.addresses:
if address.type == "ExternalIP":
external_ip = address.address
break
if external_ip:
print(f"Detected external IP {external_ip} for node {node_name}")
# Get all services that need to be updated with this external IP
services = await v1.list_namespaced_service(
NAMESPACE, label_selector=SERVICE_NAME_PATTERN
event_type = event["type"]
logger.debug(
f"Received {event_type} event for node: {node.metadata.name}"
)
external_ips = [
addr.address
for addr in node.status.addresses
if addr.type == "ExternalIP"
]
if event_type in {"ADDED", "MODIFIED"}:
external_node_ipset.update(external_ips)
logger.debug(
f"External IPs for node {node.metadata.name}: {external_ips}"
)
for service in services.items:
await update_service_annotation(
v1, service.metadata.name, external_ip
)
elif event_type == "REMOVED":
for ip in external_ips:
if ip in external_node_ipset:
external_node_ipset.remove(ip)
logger.debug(
f"External IPs for node {node.metadata.name}: {external_ips}"
)
external_node_ips = list(external_node_ipset)
# remove old ip lists before populating
while not external_ips_update_queue.empty():
_previous_external_ips = await external_ips_update_queue.get()
await external_ips_update_queue.put(set(external_node_ips))
logger.debug(f"Added external IPs to update queue: {external_node_ips}")
except client.exceptions.ApiException as e:
print(f"API Exception in watch_nodes: {e}")
logger.error(f"API Exception in watch_nodes: {e}")
await asyncio.sleep(5)
except asyncio.CancelledError:
print("Watch task was cancelled.")
logger.info("Watch task was cancelled.")
break
except Exception as e:
print(f"Unexpected error in watch_nodes: {e}")
logger.error(f"Unexpected error in watch_nodes: {e}", exc_info=True)
await asyncio.sleep(5)
async def watch_services():
await config.load_incluster_config()
v1 = client.CoreV1Api()
async def watch_services(v1, external_ips_update_queue):
w = watch.Watch()
while True:
try:
logger.debug("Starting to watch services")
async for event in w.stream(
v1.list_namespaced_service,
NAMESPACE,
label_selector=SERVICE_NAME_PATTERN,
_request_timeout=300,
v1.list_service_for_all_namespaces,
label_selector=SERVICE_LABEL,
_request_timeout=SERVICE_REQUEST_TIMEOUT,
):
if event["type"] == "ADDED":
service = event["object"]
service_name = service.metadata.name
print(f"New service detected: {service_name}")
# Fetch current external IP for nodes that match the label
nodes = await v1.list_node(label_selector=NODE_LABEL)
for node in nodes.items:
for address in node.status.addresses:
if address.type == "ExternalIP":
external_ip = address.address
await update_service_annotation(
v1, service_name, external_ip
)
break
service = event["object"]
if event["type"] in {"ADDED", "MODIFIED"}:
logger.debug(
f"Processing event type: {event['type']} for service: {service.metadata.name}"
)
external_ipset = await external_ips_update_queue.get()
# put back the last one if it turns empty
if external_ips_update_queue.empty():
await external_ips_update_queue.put(external_ipset)
logger.debug(f"Retrieved external IPs: {list(external_ipset)}")
await update_service_annotation(v1, service, external_ipset)
logger.debug(f"Service updated: {service.metadata.name}")
except client.exceptions.ApiException as e:
print(f"API Exception in watch_services: {e}")
logger.error(f"API Exception in watch_services: {e}")
await asyncio.sleep(5)
except asyncio.CancelledError:
print("Watch task was cancelled.")
logger.info("Watch task was cancelled.")
break
except Exception as e:
print(f"Unexpected error in watch_services: {e}")
logger.error(f"Unexpected error in watch_services: {e}", exc_info=True)
await asyncio.sleep(5)
async def main():
await config.load_incluster_config()
config.load_incluster_config()
v1 = client.CoreV1Api()
# Run watch_nodes and watch_services concurrently
await asyncio.gather(watch_nodes(), watch_services())
external_ips_update_queue = asyncio.Queue()
await asyncio.gather(
watch_nodes(v1, external_ips_update_queue),
watch_services(v1, external_ips_update_queue),
)
if __name__ == "__main__":