Page MenuHomePhabricator
Paste P53484

add_k8s_nodes.py
ActivePublic

Authored by JMeybohm on Nov 15 2023, 1:36 PM.
Referenced Files
F41521797: add_k8s_nodes.py
Nov 20 2023, 10:15 AM
F41513790: add_k8s_nodes.py
Nov 17 2023, 10:15 AM
F41513777: add_k8s_nodes.py
Nov 17 2023, 10:04 AM
F41507748: add_k8s_nodes.py
Nov 15 2023, 1:36 PM
Subscribers
None
#!/usr/bin/env python3
##
## Moved to https://gitlab.wikimedia.org/repos/sre/serviceops-kitchensink/-/blob/main/add_k8s_node/add_k8s_node.py?ref_type=heads
##
import argparse
import subprocess
import sys
from contextlib import contextmanager
from pathlib import Path
import ruamel
from ruamel.yaml import YAML
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
"--puppet-dir",
type=lambda p: Path(p).absolute(),
required=True,
help="Path to the puppet git repo",
)
parser.add_argument(
"--homer-dir",
type=lambda p: Path(p).absolute(),
required=True,
help="Path to the homer public git repo",
)
parser.add_argument("fqdns", nargs="+", help="FQDNs of the nodes to add")
return parser.parse_args()
def get_parent(fqdn):
# Use subprocess to run the ssh command
ssh_command = f"ssh {fqdn} \"/bin/bash -c 'sudo facter -p lldp.parent'\""
result = subprocess.run(
ssh_command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
return result.stdout.strip()
def parse_known_hosts():
known_hosts = {}
with (Path.home() / Path(".ssh/known_hosts.d/wmf-prod")).open("r") as f:
for line in f.readlines():
try:
fqdn, ipv4, ipv6 = line.split(" ", 1)[0].split(",")
except ValueError:
continue
known_hosts[fqdn] = (ipv4, ipv6)
return known_hosts
def homer_patches(fqdns):
known_hosts = parse_known_hosts()
devices_yaml = {}
sites_yaml = {}
for fqdn in fqdns:
try:
ipv4, ipv6 = known_hosts[fqdn]
except KeyError:
print(f"{bold('ERROR:')} Could not find both ipv4 and ipv6 for the server {fqdn}, please check netbox!")
sys.exit(1)
if "eqiad" in fqdn:
datacenter = "eqiad"
elif "codfw" in fqdn:
datacenter = "codfw"
else:
print(f"{bold('ERROR:')} Host {fqdn} is not in codfw or eqiad, please double check!")
sys.exit(1)
hostname = fqdn.split(".", 1)[0]
parent = get_parent(fqdn)
row = parent.split("-", 2)[1][0]
if datacenter == "eqiad" and (row in ["e", "f"]):
devices_yaml.setdefault(parent, []).append((hostname, {4: ipv4, 6: ipv6}))
else:
sites_yaml.setdefault(datacenter, []).append((hostname, {4: ipv4, 6: ipv6}))
return (devices_yaml, sites_yaml)
def bold(s):
return f"\033[1m{s}\033[0m"
def init_ruamel():
yaml = YAML(typ="rt")
# Allow for mixed quotes and quotes around strings which are not required
yaml.preserve_quotes = True
# Indent lists
yaml.indent(mapping=2, sequence=4, offset=2)
# Break lines after 4096 characters
yaml.width = 4096
return yaml
@contextmanager
def kubernetes_yaml(path):
yaml = init_ruamel()
# Ensure nil/null is represented as ~
# this is used in kubernetes.yaml
def represent_none_as_tilde(self, data):
return self.represent_scalar("tag:yaml.org,2002:null", "~")
yaml.representer.add_representer(type(None), represent_none_as_tilde)
yaml_data = yaml.load(path)
try:
yield yaml_data
finally:
yaml.dump(yaml_data, path)
@contextmanager
def puppet_yaml(path):
yaml = init_ruamel()
yaml_data = yaml.load(path)
try:
yield yaml_data
finally:
yaml.dump(yaml_data, path)
@contextmanager
def homer_yaml(path):
"""Copy of puppet_yaml but with explicit_start enabled"""
yaml = init_ruamel()
yaml.explicit_start = True
yaml_data = yaml.load(path)
try:
yield yaml_data
finally:
yaml.dump(yaml_data, path)
if __name__ == "__main__":
args = parse_arguments()
yaml = init_ruamel()
fqdns_by_dc = {}
for fqdn in args.fqdns:
for dc in ["eqiad", "codfw"]:
if dc in fqdn:
fqdns_by_dc.setdefault(dc, []).append(fqdn)
break
devices_yaml, sites_yaml = homer_patches(args.fqdns)
homer_commands = set()
if sites_yaml:
with homer_yaml(args.homer_dir / "config/sites.yaml") as yd:
for k, v in sites_yaml.items():
for line in v:
hostname, ips = line
yd[k]["site_bgp"]["k8s"][hostname] = ruamel.yaml.comments.CommentedMap(ips)
yd[k]["site_bgp"]["k8s"][hostname].fa.set_flow_style()
yd[k]["site_bgp"]["k8s"] = dict(sorted(yd[k]["site_bgp"]["k8s"].items()))
homer_commands.add(f"homer 'cr*{k}*' commit -m 'T351074'")
if devices_yaml:
with homer_yaml(args.homer_dir / "config/devices.yaml") as yd:
for k, v in devices_yaml.items():
for line in v:
hostname, ips = line
yd[k]["config"]["device_bgp"]["k8s"][hostname] = ruamel.yaml.comments.CommentedMap(ips)
yd[k]["config"]["device_bgp"]["k8s"][hostname].fa.set_flow_style()
yd[k]["config"]["device_bgp"]["k8s"] = dict(sorted(yd[k]["config"]["device_bgp"]["k8s"].items()))
homer_query = "{0}-*{2}*".format(*k.split(".")[0].split("-"))
homer_commands.add(f"homer '{homer_query}' commit -m 'T351074'")
for dc in ["eqiad", "codfw"]:
if dc in fqdns_by_dc:
# Add nodes as kubernetes workers
with kubernetes_yaml(args.puppet_dir / "hieradata/common/kubernetes.yaml") as yd:
yd["kubernetes::clusters"]["main"][dc]["cluster_nodes"].extend(fqdns_by_dc[dc])
yd["kubernetes::clusters"]["main"][dc]["cluster_nodes"] = sorted(
list(set(yd["kubernetes::clusters"]["main"][dc]["cluster_nodes"]))
)
with puppet_yaml(args.puppet_dir / f"conftool-data/node/{dc}.yaml") as yd:
for node in fqdns_by_dc[dc]:
# Remove nodes from appserver/api_appserver cluster
for cluster in ["appserver", "api_appserver"]:
try:
del yd[dc][cluster][node]
except KeyError:
pass
# Add nodes to kubernetes cluster (kubesvc)
yd[dc]["kubernetes"][node] = ruamel.yaml.comments.CommentedSeq(
[
"kubesvc",
]
)
yd[dc]["kubernetes"][node].fa.set_flow_style()
yd[dc]["kubernetes"] = dict(sorted(yd[dc]["kubernetes"].items()))
print("####### TODOs #######")
print(f"1.) {bold('Depool')} hosts:")
print(f"sudo cumin '{','.join(args.fqdns)}' 'depool; sleep 120'")
print(
f"2.) Add the new nodes to profile::installserver::preseed::preseed_per_hostname in {bold(args.puppet_dir / 'hieradata/role/common/apt_repo.yaml')}."
)
print(f"3.) Include the new nodes in the appropriate role in {bold(args.puppet_dir / 'manifests/site.pp')}.")
print(f"4.) {bold('Verify')} and {bold('commit')} changes to homer and puppet repo, review, merge etc.")
print(f"5.) {bold('Run puppet on apt')} (to generate netboot.cfg):")
print("sudo cumin A:api 'run-puppet-agent -q")
print(f"6.) {bold('Run the reimage')} cookbook:")
for node in [fqdn.split(".", 1)[0] for fqdn in args.fqdns]:
print(f"sudo sre.hosts.reimage -t T351074 --os bullseye -p 7 {node}")
print(f"7.) {bold('Run the following')} homer commands:")
for cmd in homer_commands:
print(cmd)
print(f"8.) {bold('Uncordon')} the nodes:")
for dc in ["eqiad", "codfw"]:
if dc in fqdns_by_dc:
print(f"kube-env admin {dc}")
print(f"kubectl uncordon {' '.join(fqdns_by_dc[dc])}")