Here is what I have right now after carefully checking things. I don’t see what I am doing wrong but maybe you can spot the issue?
kubectl logs:
[root@gsil-kube04 ~]# kubectl logs -f -n awx awx-operator-controller-manager-6ffc56f846-2pn8n
... <output> ...
--------------------------- Ansible Task StdOut -------------------------------
TASK [Check to make sure backup directory exists on PVC] ********************************
fatal: [localhost]: FAILED! => {"changed": true, "rc": 1, "return_code": 1, "stderr": "stat: missing operand\nTry 'stat --help' for more information.\n", "stderr_lines": ["stat: missing operand", "Try 'stat --help' for more information."], "stdout": "", "stdout_lines": []}
restore file:
[root@gsil-kube04 ~]# cat restore-awx.yaml
---
apiVersion: awx.ansible.com/v1beta1
kind: AWXRestore
metadata:
name: restore-awx
namespace: awx
spec:
deployment_name: awx
postgres_image: gsil-docker1.idm.gsil.org:5001/postgres
postgres_image_version: '13'
backup_pvc: awx-backup
backup_directory: /backups/tower-openshift-backup-2024-09-19-175222
PV & PVC status:
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
awx-backup 2Gi RWX Delete Bound awx/awx-backup local-storage 6s
postgres-pv 2Gi RWX Delete Bound awx/postgres-13-awx-postgres-13-0 local-storage 84d
[root@gsil-kube04 ~]# kubectl get pvc -n awx
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
awx-backup Bound awx-backup 2Gi RWX local-storage 12s
postgres-13-awx-postgres-13-0 Bound postgres-pv 2Gi RWX local-storage 84d
directory structure on kube05:
[root@gsil-kube05 backup]# pwd
/var/lib/postgresql
[root@gsil-kube05 postgresql]# ls -lah
total 4.0K
drwx------. 4 root root 32 Sep 24 15:35 .
drwxr-xr-x. 69 root root 4.0K Jul 31 14:15 ..
drwxr-xr-x. 3 root root 54 Sep 23 14:57 backup
drwxr-xr-x. 3 root root 18 Jul 1 15:42 data
[root@gsil-kube05 postgresql]# cd backup/
[root@gsil-kube05 backup]# ls -lah
total 0
drwxr-xr-x. 3 root root 54 Sep 23 14:57 .
drwx------. 4 root root 32 Sep 24 15:35 ..
drwxr-xr-x. 2 root root 59 Sep 23 14:57 tower-openshift-backup-2024-09-19-175222
[root@gsil-kube05 backup]# cd tower-openshift-backup-2024-09-19-175222/
[root@gsil-kube05 tower-openshift-backup-2024-09-19-175222]# ls -lah
total 151M
drwxr-xr-x. 2 root root 59 Sep 23 14:57 .
drwxr-xr-x. 3 root root 54 Sep 23 14:57 ..
-rwxr-xr-x. 1 root root 3.0K Sep 23 14:57 awx_object
-rwxr-xr-x. 1 root root 51K Sep 23 14:57 secrets.yml
-rwxr-xr-x. 1 root root 150M Sep 23 14:57 tower.db
cluster storage configuration:
[root@gsil-kube04 ~]# cat storage.yaml
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
annotations:
storageClass.kubernetes.io/is-default-class: "true"
name: local-storage
namespace: awx
provisioner: kubernetes.io/no-provisioner
volumeBindingMode: Immediate
#volumeBindingMode: WaitForFirstConsumer
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: postgres-pv
namespace: awx
spec:
capacity:
storage: 2Gi
volumeMode: Filesystem
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Delete
storageClassName: local-storage
local:
path: /var/lib/postgresql/data
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- gsil-kube04.idm.gsil.org
- gsil-kube05.idm.gsil.org
- gsil-kube06.idm.gsil.org
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: postgres-13-awx-postgres-13-0
namespace: awx
spec:
storageClassName: local-storage
accessModes:
- ReadWriteMany
resources:
requests:
storage: 2Gi
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: awx-backup
namespace: awx
spec:
capacity:
storage: 2Gi
volumeMode: Filesystem
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Delete
storageClassName: local-storage
local:
path: /var/lib/postgresql/backup
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- gsil-kube04.idm.gsil.org
- gsil-kube05.idm.gsil.org
- gsil-kube06.idm.gsil.org
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: awx-backup
namespace: awx
spec:
storageClassName: local-storage
accessModes:
- ReadWriteMany
resources:
requests:
storage: 2Gi
kubectl describe restore-awx-db-management:
[root@gsil-kube04 localadm]# kubectl describe po/restore-awx-db-management -n awx
Name: restore-awx-db-management
Namespace: awx
Priority: 0
Service Account: default
Node: gsil-kube05.idm.gsil.org/x.x.8.38
Start Time: Tue, 24 Sep 2024 15:48:15 +0000
Labels: app.kubernetes.io/component=awx
app.kubernetes.io/managed-by=awx-operator
app.kubernetes.io/operator-version=2.11.0
app.kubernetes.io/part-of=restore-awx
Annotations: <none>
Status: Terminating (lasts <invalid>)
Termination Grace Period: 30s
IP: x.x.1.214
IPs:
IP: x.x.1.214
Containers:
restore-awx-db-management:
Container ID: containerd://f705f7796e4517061ba1c5d34dcd73cf5fba53d9cd985776922f04a889d61c1d
Image: gsil-docker1.idm.gsil.org:5001/postgres:13
Image ID: gsil-docker1.idm.gsil.org:5001/postgres@sha256:5f4b5af578e8d63f371b724f7b83230125230793282cd2e08d221452dbb1fffe
Port: <none>
Host Port: <none>
Command:
sleep
infinity
State: Running
Started: Tue, 24 Sep 2024 15:48:15 +0000
Ready: True
Restart Count: 0
Limits:
cpu: 1
memory: 4Gi
Requests:
cpu: 25m
memory: 32Mi
Environment: <none>
Mounts:
/backups from restore-awx-backup (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-vgls5 (ro)
Conditions:
Type Status
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
restore-awx-backup:
Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
ClaimName: awx-backup
ReadOnly: false
kube-api-access-vgls5:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 11s default-scheduler Successfully assigned awx/restore-awx-db-management to gsil-kube05.idm.gsil.org
Normal Pulled 11s kubelet Container image "gsil-docker1.idm.gsil.org:5001/postgres:13" already present on machine
Normal Created 11s kubelet Created container restore-awx-db-management
Normal Started 11s kubelet Started container restore-awx-db-management
Normal Killing 1s kubelet Stopping container restore-awx-db-management
Helm configuration and deployment for AWX:
[root@gsil-kube04 ~]# cat awxvalues.yaml
AWX:
# enable use of awx-deploy template
enabled: true
name: awx
spec:
replicas: 2
service_type: NodePort
nodeport_port: 30080
admin_user: admin
hostname: awx.idm.gsil.org
image: gsil-docker1.idm.gsil.org:5001/quay.io/ansible/awx
image_version: 23.7.0
init_container_image: gsil-docker1.idm.gsil.org:5001/quay.io/ansible/awx-ee
init_container_image_version: latest
ee_images:
- name: AWX EE
image: gsil-docker1.idm.gsil.org:5001/quay.io/ansible/awx-ee:23.7.0
ee_extra_env: |
- name: RECEPTOR_KUBE_SUPPORT_RECONNECT
value: enabled
postgres_image: gsil-docker1.idm.gsil.org:5001/postgres
postgres_image_version: "13"
postgres_selector: |
nodefor: psql
control_plane_ee_image: gsil-docker1.idm.gsil.org:5001/quay.io/ansible/awx-ee:23.7.0
redis_image: gsil-docker1.idm.gsil.org:5001/redis
redis_image_version: "7"
bundle_cacert_secret: awx-custom-certs
ldap_cacert_secret: awx-custom-certs
ldap_password_secret: awx-ldap-password
extra_settings:
- setting: AUTH_LDAP_SERVER_URI
value: >-
... <secret_something_here> ...
customVolumes:
postgres:
enabled: true
hostPath: /var/lib/postgresql
size: 2Gi
storageClassName: local-storage
projects:
enabled: true
hostPath: /opt/projects/data
size: 5Gi