Compare commits
31 Commits
d71f7f1c49
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 5045de1a64 | |||
| a92fd0bff0 | |||
| d636f4b59a | |||
| ebd9797d59 | |||
| bb50947391 | |||
| 799071601e | |||
| 15ad2e5902 | |||
| 971cbb667f | |||
| 69a595e708 | |||
| 5832314a19 | |||
| 2661c264c9 | |||
| 1c1a29b2ea | |||
| 59dd11f353 | |||
| a1cf0467da | |||
| 7efde36ca3 | |||
| 924c24b16b | |||
| c51c400a4b | |||
| 4a14d77cc5 | |||
| 041b45bb94 | |||
| e72c1e3c1e | |||
| 4b23c6bcd9 | |||
| 1d5ce487a6 | |||
| c386f2e648 | |||
| 87869abf8b | |||
| 87953222bb | |||
| b297643ef7 | |||
| d4db24a8d8 | |||
| c972a4e56b | |||
| 3e89eaaa26 | |||
| 573f16c5c2 | |||
| 4e71e70890 |
3
Makefile
3
Makefile
@@ -7,6 +7,9 @@ build:
|
|||||||
push:
|
push:
|
||||||
docker push code.whiteblossom.xyz/infra/node-external-ip-controller:latest
|
docker push code.whiteblossom.xyz/infra/node-external-ip-controller:latest
|
||||||
|
|
||||||
|
build-n-push:
|
||||||
|
docker buildx build --platform=linux/amd64,linux/arm64 -t code.whiteblossom.xyz/infra/node-external-ip-controller:latest --push -f node-external-ip-controller.Dockerfile . --push
|
||||||
|
|
||||||
builder:
|
builder:
|
||||||
docker buildx create \
|
docker buildx create \
|
||||||
--name container-builder \
|
--name container-builder \
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
FROM python:3.13-alpine3.19
|
FROM python:3.13-alpine3.19
|
||||||
|
|
||||||
# Install the Kubernetes Python client
|
# Install the Kubernetes Python client
|
||||||
RUN pip install kubernetes kubernetes_asyncio
|
RUN pip install kubernetes kubernetes_asyncio PyYAML
|
||||||
|
|
||||||
# Copy the controller script into the container
|
# Copy the controller script into the container
|
||||||
COPY node_external_ip_controller_async.py /app/node_external_ip_controller.py
|
COPY node_external_ip_controller_async.py /app/node_external_ip_controller.py
|
||||||
|
|||||||
@@ -1,76 +0,0 @@
|
|||||||
from kubernetes import client, config, watch
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
# Load in-cluster config
|
|
||||||
config.load_incluster_config()
|
|
||||||
|
|
||||||
# Set up Kubernetes API client
|
|
||||||
v1 = client.CoreV1Api()
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
SERVICE_NAME = "traefik"
|
|
||||||
NAMESPACE = "kube-system"
|
|
||||||
ANNOTATION_KEY = "kube-vip.io/loadbalancerIPs"
|
|
||||||
NODE_LABEL = "svccontroller.k3s.cattle.io/enablelb=true"
|
|
||||||
|
|
||||||
|
|
||||||
def update_service_annotation(external_ip):
|
|
||||||
# Get the current service object
|
|
||||||
service = v1.read_namespaced_service(SERVICE_NAME, NAMESPACE)
|
|
||||||
|
|
||||||
# Check if the annotation needs to be updated
|
|
||||||
current_annotation = service.metadata.annotations.get(ANNOTATION_KEY)
|
|
||||||
ipam_address = service.spec.load_balancer_ip
|
|
||||||
if ipam_address:
|
|
||||||
print(
|
|
||||||
f"service {SERVICE_NAME} has existing ipam IP: {ipam_address}", flush=True
|
|
||||||
)
|
|
||||||
target_annotation = ",".join({external_ip, ipam_address})
|
|
||||||
else:
|
|
||||||
target_annotation = external_ip
|
|
||||||
if current_annotation != target_annotation:
|
|
||||||
# Update the annotation
|
|
||||||
body = {"metadata": {"annotations": {ANNOTATION_KEY: target_annotation}}}
|
|
||||||
v1.patch_namespaced_service(SERVICE_NAME, NAMESPACE, body)
|
|
||||||
print(
|
|
||||||
f"Updated service {SERVICE_NAME} with new external IPs: {target_annotation}",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
w = watch.Watch()
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
for event in w.stream(
|
|
||||||
v1.list_node, label_selector=NODE_LABEL, _request_timeout=300
|
|
||||||
):
|
|
||||||
node = event["object"]
|
|
||||||
node_name = node.metadata.name
|
|
||||||
|
|
||||||
# Extract the external IP if it exists
|
|
||||||
external_ip = None
|
|
||||||
for address in node.status.addresses:
|
|
||||||
if address.type == "ExternalIP":
|
|
||||||
external_ip = address.address
|
|
||||||
break
|
|
||||||
|
|
||||||
if external_ip:
|
|
||||||
print(
|
|
||||||
f"Detected external IP {external_ip} for node {node_name}",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
update_service_annotation(external_ip)
|
|
||||||
|
|
||||||
except client.exceptions.ApiException as e:
|
|
||||||
print(f"API Exception: {e}", flush=True)
|
|
||||||
time.sleep(5) # Wait before retrying
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Unexpected error: {e}", flush=True)
|
|
||||||
time.sleep(5)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,137 +1,163 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from kubernetes_asyncio import client, config, watch
|
from kubernetes_asyncio import client, config, watch
|
||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
|
import ipaddress
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
SERVICE_NAME = os.getenv("SERVICE_NAME", "traefik")
|
|
||||||
SERVICE_NAME_LABEL_PATTERN = os.getenv(
|
|
||||||
"SERVICE_NAME_LABEL_PATTERN", "app.kubernetes.io/name=traefik"
|
|
||||||
)
|
|
||||||
NAMESPACE = os.getenv("NAMESPACE", "kube-system")
|
|
||||||
ANNOTATION_KEY = os.getenv("ANNOTATION_KEY", "kube-vip.io/loadbalancerIPs")
|
ANNOTATION_KEY = os.getenv("ANNOTATION_KEY", "kube-vip.io/loadbalancerIPs")
|
||||||
ZERO_GATEWAY_IP = os.getenv("ZERO_GATEWAY_IP", "172.28.10.1")
|
ZERO_GATEWAY_IP = os.getenv("ZERO_GATEWAY_IP", "172.28.10.1")
|
||||||
NODE_LABEL = os.getenv("NODE_LABEL", "svccontroller.k3s.cattle.io/enablelb=true")
|
NODE_LABEL = os.getenv("NODE_LABEL", "svccontroller.k3s.cattle.io/enablelb=true")
|
||||||
|
SERVICE_LABEL = os.getenv("SERVICE_LABEL", "enablezlan=true")
|
||||||
|
ZLAN_GATEWAY_IP_KEY = os.getenv("ZLAN_GATEWAY_IP_KEY", "zlanip")
|
||||||
SERVICE_REQUEST_TIMEOUT = int(os.getenv("SERVICE_REQUEST_TIMEOUT", 300))
|
SERVICE_REQUEST_TIMEOUT = int(os.getenv("SERVICE_REQUEST_TIMEOUT", 300))
|
||||||
NODE_REQUEST_TIMEOUT = int(os.getenv("NODE_REQUEST_TIMEOUT", 300))
|
NODE_REQUEST_TIMEOUT = int(os.getenv("NODE_REQUEST_TIMEOUT", 30))
|
||||||
|
|
||||||
|
# Logging configuration
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
async def update_service_annotation(v1, service_name, external_ips):
|
def is_valid_ip(ip):
|
||||||
try:
|
try:
|
||||||
# Get the current service object
|
ipaddress.ip_address(ip)
|
||||||
service = await v1.read_namespaced_service(service_name, NAMESPACE)
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
# Check if the annotation needs to be updated
|
|
||||||
current_annotation = service.metadata.annotations.get(ANNOTATION_KEY)
|
async def update_service_annotation(v1, service, external_ipset):
|
||||||
target_annotation = ",".join(external_ips) + "," + ZERO_GATEWAY_IP
|
try:
|
||||||
if current_annotation != target_annotation:
|
service_name = service.metadata.name
|
||||||
# Update the annotation
|
namespace = service.metadata.namespace
|
||||||
body = {"metadata": {"annotations": {ANNOTATION_KEY: target_annotation}}}
|
logger.debug(f"Fetching service {service_name} in namespace {namespace}")
|
||||||
await v1.patch_namespaced_service(service_name, NAMESPACE, body)
|
service_obj = await v1.read_namespaced_service(service_name, namespace)
|
||||||
print(
|
|
||||||
f"Updated service {service_name} with new external IP: {target_annotation}",
|
current_annotation = service_obj.metadata.annotations.get(ANNOTATION_KEY, "")
|
||||||
flush=True,
|
annotated_ipset = set(current_annotation.split(","))
|
||||||
|
zlan_gateway_ip = service_obj.metadata.labels.get(ZLAN_GATEWAY_IP_KEY)
|
||||||
|
logger.debug(f"Zlan Gateway IP: {zlan_gateway_ip}")
|
||||||
|
|
||||||
|
if is_valid_ip(zlan_gateway_ip):
|
||||||
|
external_ipset = set(external_ipset)
|
||||||
|
external_ipset.add(zlan_gateway_ip)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"Invalid Zlan Gateway IP: {zlan_gateway_ip}, excluding from target annotation"
|
||||||
)
|
)
|
||||||
|
external_ips = list(external_ipset)
|
||||||
|
target_annotation = ",".join(external_ips)
|
||||||
|
if annotated_ipset != external_ipset:
|
||||||
|
logger.debug(f"Current annotation: {current_annotation}: {annotated_ipset}")
|
||||||
|
logger.debug(f"Target annotation: {target_annotation}: {external_ipset}")
|
||||||
|
body = {"metadata": {"annotations": {ANNOTATION_KEY: target_annotation}}}
|
||||||
|
logger.debug(f"Patching service {service_name} with body: {body}")
|
||||||
|
await v1.patch_namespaced_service(service_name, namespace, body)
|
||||||
|
logger.info(
|
||||||
|
f"Updated service {service_name} with new external IP: {target_annotation}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug(f"No update required for service {service_name}")
|
||||||
|
|
||||||
except client.exceptions.ApiException as e:
|
except client.exceptions.ApiException as e:
|
||||||
print(f"API Exception in update_service_annotation: {e}", flush=True)
|
logger.error(f"API Exception in update_service_annotation: {e}")
|
||||||
|
|
||||||
|
|
||||||
async def watch_nodes():
|
async def watch_nodes(v1, external_ips_update_queue):
|
||||||
config.load_incluster_config()
|
|
||||||
v1 = client.CoreV1Api()
|
|
||||||
w = watch.Watch()
|
w = watch.Watch()
|
||||||
|
external_node_ipset = set()
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
logger.debug("Starting to watch nodes")
|
||||||
async for event in w.stream(
|
async for event in w.stream(
|
||||||
v1.list_node,
|
v1.list_node,
|
||||||
label_selector=NODE_LABEL,
|
label_selector=NODE_LABEL,
|
||||||
_request_timeout=NODE_REQUEST_TIMEOUT,
|
_request_timeout=NODE_REQUEST_TIMEOUT,
|
||||||
):
|
):
|
||||||
node = event["object"]
|
node = event["object"]
|
||||||
node_name = node.metadata.name
|
event_type = event["type"]
|
||||||
|
logger.debug(
|
||||||
# Check for external IP
|
f"Received {event_type} event for node: {node.metadata.name}"
|
||||||
external_ips = []
|
|
||||||
for address in node.status.addresses:
|
|
||||||
if address.type == "ExternalIP":
|
|
||||||
external_ips.append(address.address)
|
|
||||||
|
|
||||||
if len(external_ips) > 0:
|
|
||||||
print(
|
|
||||||
f"Detected external IP {str(external_ips)} for node {node_name}",
|
|
||||||
flush=True,
|
|
||||||
)
|
)
|
||||||
|
external_ips = [
|
||||||
# Get all services that need to be updated with this external IP
|
addr.address
|
||||||
# services = await v1.list_namespaced_service(
|
for addr in node.status.addresses
|
||||||
# NAMESPACE, label_selector=SERVICE_NAME_PATTERN
|
if addr.type == "ExternalIP"
|
||||||
# )
|
]
|
||||||
service = await v1.read_namespaced_service(SERVICE_NAME, NAMESPACE)
|
if event_type in {"ADDED", "MODIFIED"}:
|
||||||
# for service in services.items:
|
external_node_ipset.update(external_ips)
|
||||||
await update_service_annotation(
|
logger.debug(
|
||||||
v1, service.metadata.name, external_ips
|
f"External IPs for node {node.metadata.name}: {external_ips}"
|
||||||
)
|
)
|
||||||
|
elif event_type == "REMOVED":
|
||||||
|
for ip in external_ips:
|
||||||
|
if ip in external_node_ipset:
|
||||||
|
external_node_ipset.remove(ip)
|
||||||
|
logger.debug(
|
||||||
|
f"External IPs for node {node.metadata.name}: {external_ips}"
|
||||||
|
)
|
||||||
|
external_node_ips = list(external_node_ipset)
|
||||||
|
# remove old ip lists before populating
|
||||||
|
while not external_ips_update_queue.empty():
|
||||||
|
_previous_external_ips = await external_ips_update_queue.get()
|
||||||
|
await external_ips_update_queue.put(set(external_node_ips))
|
||||||
|
logger.debug(f"Added external IPs to update queue: {external_node_ips}")
|
||||||
|
|
||||||
except client.exceptions.ApiException as e:
|
except client.exceptions.ApiException as e:
|
||||||
print(f"API Exception in watch_nodes: {e}", flush=True)
|
logger.error(f"API Exception in watch_nodes: {e}")
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
print("Watch task was cancelled.", flush=True)
|
logger.info("Watch task was cancelled.")
|
||||||
break
|
break
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Unexpected error in watch_nodes: {e}", flush=True)
|
logger.error(f"Unexpected error in watch_nodes: {e}", exc_info=True)
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
|
|
||||||
async def watch_services():
|
async def watch_services(v1, external_ips_update_queue):
|
||||||
config.load_incluster_config()
|
|
||||||
v1 = client.CoreV1Api()
|
|
||||||
w = watch.Watch()
|
w = watch.Watch()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
logger.debug("Starting to watch services")
|
||||||
async for event in w.stream(
|
async for event in w.stream(
|
||||||
v1.list_namespaced_service,
|
v1.list_service_for_all_namespaces,
|
||||||
NAMESPACE,
|
label_selector=SERVICE_LABEL,
|
||||||
label_selector=SERVICE_NAME_LABEL_PATTERN,
|
|
||||||
_request_timeout=SERVICE_REQUEST_TIMEOUT,
|
_request_timeout=SERVICE_REQUEST_TIMEOUT,
|
||||||
):
|
):
|
||||||
service = event["object"]
|
service = event["object"]
|
||||||
service_name = service.metadata.name
|
if event["type"] in {"ADDED", "MODIFIED"}:
|
||||||
if event["type"] == "ADDED":
|
logger.debug(
|
||||||
print(f"New service detected: {service_name}", flush=True)
|
f"Processing event type: {event['type']} for service: {service.metadata.name}"
|
||||||
|
|
||||||
# Fetch current external IP for nodes that match the label
|
|
||||||
nodes = await v1.list_node(label_selector=NODE_LABEL)
|
|
||||||
for node in nodes.items:
|
|
||||||
for address in node.status.addresses:
|
|
||||||
if address.type == "ExternalIP":
|
|
||||||
external_ip = address.address
|
|
||||||
await update_service_annotation(
|
|
||||||
v1, service_name, external_ip
|
|
||||||
)
|
)
|
||||||
break
|
external_ipset = await external_ips_update_queue.get()
|
||||||
|
# put back the last one if it turns empty
|
||||||
|
if external_ips_update_queue.empty():
|
||||||
|
await external_ips_update_queue.put(external_ipset)
|
||||||
|
logger.debug(f"Retrieved external IPs: {list(external_ipset)}")
|
||||||
|
await update_service_annotation(v1, service, external_ipset)
|
||||||
|
logger.debug(f"Service updated: {service.metadata.name}")
|
||||||
|
|
||||||
except client.exceptions.ApiException as e:
|
except client.exceptions.ApiException as e:
|
||||||
print(f"API Exception in watch_services: {e}", flush=True)
|
logger.error(f"API Exception in watch_services: {e}")
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
print("Watch task was cancelled.", flush=True)
|
logger.info("Watch task was cancelled.")
|
||||||
break
|
break
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Unexpected error in watch_services: {e}", flush=True)
|
logger.error(f"Unexpected error in watch_services: {e}", exc_info=True)
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
|
config.load_incluster_config()
|
||||||
|
v1 = client.CoreV1Api()
|
||||||
|
|
||||||
# Run watch_nodes and watch_services concurrently
|
external_ips_update_queue = asyncio.Queue()
|
||||||
await asyncio.gather(watch_nodes(), watch_services())
|
await asyncio.gather(
|
||||||
|
watch_nodes(v1, external_ips_update_queue),
|
||||||
|
watch_services(v1, external_ips_update_queue),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user