Hello all,
I'm trying to upgrade a working cluster from Openshift Origin
3.9 to OKD 3.10 and the control plane update fails at one point
with host not found.
I've looked abit over the problem and found this issue on
github: https://github.com/openshift/openshift-ansible/issues/9935
where michaelgugino points out that "when upgrading from
3.9, your hostnames match the node names in 'oc get nodes'
otherwise, we won't be able to find the CSRs for your nodes."
In fact my issue is precisely this: the node names are in fact
their IPs and not the hostnames of the specific machines. It was
something that I saw upon installation, but as the 3.9
cluster was functioning all right, I let it be.
The idea is that I (think) I have the DNS resolution set up
properly, with all machines being able to resolve each-other by
FQDNs, however the 3.9 installer configured the node names
with their respective IP addresses and I don't know how to
address this.
I should mention that the cluster is deployed inside an
Openstack project, but the install config doesn't use
OpenShift-Openstack configuration. However when running the
~/openshift-ansible/playbooks/byo/openshift_facts.yml I get
references to the underlying openstack(somehow the installer
"figures out" the undelying Openstack and treats it as a
provider, the way I see it). I've pasted the output for one of
the nodes below.
Has any of you come across this node name config problem and
were you able to solve it?
Is there any procedure to change node names of a working
cluster? I should say that the masters are also
nodes(infrasructure), so I'm guessing the procedure, if there is
one, would
have to do with deprecating one master at a time, while for the
nodes with a delete/change config/re-add procedure.
Thank you!
Output from openshift_facts playbook:
ok: [node1.oshift-pinfold.intra] => {
"result": {
"ansible_facts": {
"openshift": {
"common": {
"all_hostnames": [
"node1.oshift-pinfold.intra",
"192.168.150.22"
],
"config_base": "/etc/origin",
"deployment_subtype": "basic",
"deployment_type": "origin",
"dns_domain": "cluster.local",
"examples_content_version": "v3.9",
"generate_no_proxy_hosts": true,
"hostname": "192.168.150.22",
"internal_hostnames": [
"192.168.150.22"
],
"ip": "192.168.150.22",
"kube_svc_ip": "172.30.0.1",
"portal_net": "172.30.0.0/16
<http://172.30.0.0/16>",
"public_hostname":
"node1.oshift-pinfold.intra",
"public_ip": "192.168.150.22",
"short_version": "3.9",
"version": "3.9.0",
"version_gte_3_10": false,
"version_gte_3_6": true,
"version_gte_3_7": true,
"version_gte_3_8": true,
"version_gte_3_9": true
},
"current_config": {
"roles": [
"node"
]
},
"node": {
"bootstrapped": false,
"nodename": "192.168.150.22",
"sdn_mtu": "1408"
},
"provider": {
"metadata": {
"availability_zone": "nova",
"ec2_compat": {
"ami-id": "None",
"ami-launch-index": "0",
"ami-manifest-path": "FIXME",
"block-device-mapping": {
"ami": "vda",
"ebs0": "/dev/vda",
"ebs1": "/dev/vdb",
"root": "/dev/vda"
},
"hostname": "node1.novalocal",
"instance-action": "none",
"instance-id": "i-00000583",
"instance-type": "1cpu-2ram-20disk",
"local-hostname": "node1.novalocal",
"local-ipv4": "192.168.150.22",
"placement": {
"availability-zone": "nova"
},
"public-hostname": "node1.novalocal",
"public-ipv4": [],
"public-keys/": "0=xxxxxxxxxxx",
"reservation-id": "r-la13azpq",
"security-groups": [
"DefaultInternal",
"oshift-node"
]
},
"hostname": "node1.novalocal",
"keys": [
{
"data": "ssh-rsa
AAAA...........................................................
Generated-by-Nova",
"name": "xxxxxxxxxx",
"type": "ssh"
}
],
"launch_index": 0,
"name": "node1",
"project_id":
"2a2.........................",
"uuid":
"80bb................................."
},
"name": "openstack",
"network": {
"hostname": "192.168.150.22",
"interfaces": [],
"ip": "192.168.150.22",
"ipv6_enabled": false,
"public_hostname": [],
"public_ip": []
},
"zone": "nova"
}
}
},
"changed": false,
"failed": false
}
}