Skip to content

Commit 27cd4e2

Browse files
committed
Replace manual escluster setup with script
1 parent 2d8838d commit 27cd4e2

File tree

1 file changed

+22
-290
lines changed

1 file changed

+22
-290
lines changed

.github/workflows/escluster.yml

Lines changed: 22 additions & 290 deletions
Original file line numberDiff line numberDiff line change
@@ -87,45 +87,43 @@ jobs:
8787
cd testing/v2/development
8888
docker compose -p ${{ env.UNIQUE_ID }} up -d pipeline
8989
90-
- name: Install Python requirements
91-
run: |
92-
cd testing/v2/development
93-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
94-
cd /home/lme-user/LME/testing/v2/installers/azure && \
95-
pip install -r requirements.txt
96-
"
97-
98-
- name: Build Azure Ubuntu instances with Windows VM
90+
- name: Set up cluster via setup_cluster.sh
9991
env:
10092
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
10193
AZURE_CLIENT_SECRET: ${{ secrets.AZURE_SECRET }}
10294
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT }}
10395
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
10496
run: |
10597
cd testing/v2/development
98+
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
99+
100+
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
101+
printf '%s\n' \
102+
'export RESOURCE_GROUP=pipe-${{ env.UNIQUE_ID }}' \
103+
'export PUBLIC_IP=${{ env.IP_ADDRESS }}/32' \
104+
'export VM_SIZE=Standard_D8_v4' \
105+
'export LOCATION=${{ inputs.azure_region || '\''centralus'\'' }}' \
106+
'export AUTO_SHUTDOWN_TIME=23:00' \
107+
'export LME_USER=lme-user' \
108+
'export BRANCH=${{ env.BRANCH_NAME }}' \
109+
'export CLUSTER_SIZE=3' \
110+
> ${INSTALLERS_DIR}/exporter.txt && \
111+
echo '=== exporter.txt ===' && cat ${INSTALLERS_DIR}/exporter.txt
112+
"
113+
106114
docker compose -p ${{ env.UNIQUE_ID }} exec -T \
107115
-e AZURE_CLIENT_ID \
108116
-e AZURE_CLIENT_SECRET \
109117
-e AZURE_TENANT_ID \
110118
-e AZURE_SUBSCRIPTION_ID \
111119
pipeline bash -c "
112-
cd /home/lme-user/LME/testing/v2/installers && \
113-
python3 ./azure/build_azure_linux_network.py \
114-
-g pipe-${{ env.UNIQUE_ID }} \
115-
-s ${{ env.IP_ADDRESS }}/32 \
116-
-vs Standard_D8_v4 \
117-
-l ${{ inputs.azure_region || 'centralus' }} \
118-
-ast 23:00 \
119-
-c 3 \
120-
-w \
121-
-y
120+
cd /home/lme-user/LME/testing/v2/installers/cluster_installer && \
121+
./setup_cluster.sh
122122
"
123123
124124
- name: Extract cluster metadata
125125
run: |
126126
cd testing/v2/development
127-
echo "Waiting for VMs to boot..."
128-
sleep 120
129127
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
130128
MACHINES_FILE="${INSTALLERS_DIR}/pipe-${{ env.UNIQUE_ID }}.machines.json"
131129
@@ -141,14 +139,10 @@ jobs:
141139
"jq -c '[.linux_vms[].private_ip]' ${MACHINES_FILE}" | tr -d '\r\n ')
142140
echo "ALL_PRIVATE_IPS_JSON=$ALL_PRIVATE_IPS_JSON" >> $GITHUB_ENV
143141
144-
WINDOWS_IP=$(docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
145-
az network public-ip show --resource-group pipe-${{ env.UNIQUE_ID }} --name ws1-public-ip --query ipAddress --output tsv 2>/dev/null || \
146-
az vm show --resource-group pipe-${{ env.UNIQUE_ID }} --name ws1 --show-details --query publicIps --output tsv 2>/dev/null || \
147-
az vm list-ip-addresses --resource-group pipe-${{ env.UNIQUE_ID }} --query '[?virtualMachine.name==\`ws1\`].virtualMachine.network.publicIpAddresses[0].ipAddress' --output tsv 2>/dev/null || \
148-
echo ''
149-
" | tr -d '\r\n ')
142+
WINDOWS_IP=$(docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c \
143+
"cat ${INSTALLERS_DIR}/pipe-${{ env.UNIQUE_ID }}.ws1.ip.txt 2>/dev/null || echo ''" | tr -d '\r\n ')
150144
if [ -z "$WINDOWS_IP" ]; then
151-
echo "Warning: Could not retrieve Windows VM public IP, using private IP instead"
145+
echo "Warning: Could not read ws1 IP file, using private IP fallback"
152146
WINDOWS_IP="10.1.0.4"
153147
fi
154148
echo "WINDOWS_IP=$WINDOWS_IP" >> $GITHUB_ENV
@@ -158,268 +152,6 @@ jobs:
158152
echo "All private IPs: $ALL_PRIVATE_IPS_JSON"
159153
echo "Windows IP: $WINDOWS_IP"
160154
161-
- name: Set up SSH access to all Linux VMs
162-
run: |
163-
cd testing/v2/development
164-
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
165-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
166-
cd ${INSTALLERS_DIR} && \
167-
PASSWORD=\$(cat pipe-${{ env.UNIQUE_ID }}.password.txt) && \
168-
for IP in \$(jq -r '.linux_vms[].ip_address' pipe-${{ env.UNIQUE_ID }}.machines.json); do
169-
echo \"Waiting for SSH on \$IP...\"
170-
for i in \$(seq 1 30); do
171-
if sshpass -p \"\$PASSWORD\" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 lme-user@\$IP 'echo ok' 2>/dev/null; then
172-
echo \"SSH ready on \$IP\"
173-
break
174-
fi
175-
echo -n '.'
176-
sleep 10
177-
done
178-
echo \"Copying SSH key to \$IP...\"
179-
./lib/copy_ssh_key.sh lme-user \$IP pipe-${{ env.UNIQUE_ID }}.password.txt
180-
done
181-
"
182-
183-
- name: Set up SSH trust from master to child nodes
184-
run: |
185-
cd testing/v2/development
186-
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
187-
188-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
189-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
190-
'ssh-keygen -t rsa -b 4096 -N \"\" -f ~/.ssh/id_rsa -q <<< y 2>/dev/null || true'
191-
"
192-
193-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
194-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
195-
'sudo apt-get update -qq && sudo apt-get install -y -qq sshpass'
196-
"
197-
198-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
199-
cd ${INSTALLERS_DIR} && \
200-
PASSWORD=\$(cat pipe-${{ env.UNIQUE_ID }}.password.txt) && \
201-
for ip in \$(jq -r '.linux_vms[1:][].private_ip' pipe-${{ env.UNIQUE_ID }}.machines.json); do
202-
echo \"Copying master SSH key to \$ip...\"
203-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
204-
\"sshpass -p '\$PASSWORD' ssh-copy-id -o StrictHostKeyChecking=no lme-user@\$ip\"
205-
echo \"Testing SSH from master to \$ip...\"
206-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
207-
\"ssh -o StrictHostKeyChecking=no lme-user@\$ip hostname\"
208-
done
209-
"
210-
211-
- name: Clone repo and prepare master for cluster install
212-
run: |
213-
cd testing/v2/development
214-
215-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
216-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
217-
git clone https://github.com/cisagov/LME.git ~/LME &&
218-
cd ~/LME &&
219-
git fetch --all --tags &&
220-
if git show-ref --tags --verify --quiet \"refs/tags/${{ env.BRANCH_NAME }}\"; then
221-
git checkout ${{ env.BRANCH_NAME }}
222-
else
223-
git checkout -t origin/${{ env.BRANCH_NAME }} || git checkout ${{ env.BRANCH_NAME }}
224-
fi
225-
'
226-
"
227-
228-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
229-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
230-
cp ~/LME/config/example.env ~/LME/config/lme-environment.env &&
231-
sed -i \"s/IPVAR=.*/IPVAR=${{ env.MASTER_PRIVATE_IP }}/\" ~/LME/config/lme-environment.env
232-
'
233-
"
234-
235-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
236-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
237-
sudo apt-get update && sudo apt-get install -y ansible jq
238-
'
239-
"
240-
241-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
242-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
243-
cd ~/LME/ansible && ansible-galaxy collection install -r requirements.yml --timeout 30
244-
'
245-
" || echo "Galaxy install failed, continuing with existing collections..."
246-
247-
- name: Run site.yml in cluster mode on master
248-
run: |
249-
cd testing/v2/development
250-
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
251-
252-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
253-
cd ${INSTALLERS_DIR} && \
254-
MASTER_PIP=\$(jq -r '.linux_vms[0].private_ip' pipe-${{ env.UNIQUE_ID }}.machines.json) && \
255-
ALL_IPS=\$(jq -c '[.linux_vms[].private_ip]' pipe-${{ env.UNIQUE_ID }}.machines.json) && \
256-
jq -n --argjson hosts \"\$ALL_IPS\" --arg master \"\$MASTER_PIP\" \
257-
'{lme_cluster_mode: true, es_cluster_seed_hosts: \$hosts, es_master_publish_host: \$master}' \
258-
> /tmp/cluster_vars.json && \
259-
echo 'Cluster extra vars:' && cat /tmp/cluster_vars.json && \
260-
scp /tmp/cluster_vars.json lme-user@${{ env.AZURE_IP }}:/tmp/cluster_vars.json
261-
"
262-
263-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
264-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
265-
cd ~/LME && ansible-playbook ansible/site.yml -e @/tmp/cluster_vars.json
266-
'
267-
"
268-
269-
- name: Create cluster inventory and deploy to child nodes
270-
run: |
271-
cd testing/v2/development
272-
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
273-
274-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
275-
cd ${INSTALLERS_DIR} && \
276-
MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
277-
MASTER_PIP=\$(jq -r '.linux_vms[0].private_ip' \$MACHINES_FILE) && \
278-
MASTER_PUB=\$(jq -r '.linux_vms[0].ip_address' \$MACHINES_FILE) && \
279-
TMPFILE=\$(mktemp) && \
280-
echo 'all:' > \$TMPFILE && \
281-
echo ' vars:' >> \$TMPFILE && \
282-
echo \" es_master_host: \${MASTER_PIP}\" >> \$TMPFILE && \
283-
echo ' es_cluster_seed_hosts:' >> \$TMPFILE && \
284-
for ip in \$(jq -r '.linux_vms[].private_ip' \$MACHINES_FILE); do
285-
echo \" - \${ip}\" >> \$TMPFILE
286-
done && \
287-
echo ' children:' >> \$TMPFILE && \
288-
echo ' elasticsearch:' >> \$TMPFILE && \
289-
echo ' hosts:' >> \$TMPFILE && \
290-
echo ' es1:' >> \$TMPFILE && \
291-
echo \" ansible_host: \${MASTER_PIP}\" >> \$TMPFILE && \
292-
echo ' ansible_connection: local' >> \$TMPFILE && \
293-
echo ' es_node_name: lme-elasticsearch' >> \$TMPFILE && \
294-
echo ' es_is_initial_master: true' >> \$TMPFILE && \
295-
echo \" es_publish_host: \${MASTER_PIP}\" >> \$TMPFILE && \
296-
i=2 && \
297-
for ip in \$(jq -r '.linux_vms[1:][].private_ip' \$MACHINES_FILE); do
298-
echo \" es\${i}:\" >> \$TMPFILE && \
299-
echo \" ansible_host: \${ip}\" >> \$TMPFILE && \
300-
echo ' ansible_user: lme-user' >> \$TMPFILE && \
301-
echo \" es_node_name: es\${i}\" >> \$TMPFILE && \
302-
echo \" es_publish_host: \${ip}\" >> \$TMPFILE
303-
i=\$((i + 1))
304-
done && \
305-
echo 'Generated cluster inventory:' && cat \$TMPFILE && \
306-
scp \$TMPFILE lme-user@\${MASTER_PUB}:~/LME/ansible/inventory/cluster.yml && \
307-
rm \$TMPFILE
308-
"
309-
310-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
311-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
312-
cd ~/LME && ansible-playbook -i ansible/inventory/cluster.yml ansible/elasticsearch.yml
313-
'
314-
"
315-
316-
- name: Verify cluster health
317-
run: |
318-
cd testing/v2/development
319-
echo "Waiting for cluster to stabilize..."
320-
sleep 120
321-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
322-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} 'sudo bash -c \"\
323-
source /opt/lme/scripts/extract_secrets.sh -q && \
324-
echo === Cluster Health === && \
325-
curl -sk -u \\\"elastic:\\\$elastic\\\" https://localhost:9200/_cluster/health?pretty && \
326-
echo === Cluster Nodes === && \
327-
curl -sk -u \\\"elastic:\\\$elastic\\\" https://localhost:9200/_cat/nodes?v\"'
328-
"
329-
330-
- name: Set up NFS server on master
331-
run: |
332-
cd testing/v2/development
333-
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
334-
335-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
336-
cd ${INSTALLERS_DIR} && \
337-
MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
338-
MASTER_PIP=\$(jq -r '.linux_vms[0].private_ip' \$MACHINES_FILE) && \
339-
NFS_EXPORTS='/srv/es-snapshots' && \
340-
for ip in \$(jq -r '.linux_vms[].private_ip' \$MACHINES_FILE); do
341-
NFS_EXPORTS=\"\${NFS_EXPORTS} \${ip}(rw,sync,no_subtree_check,no_root_squash)\"
342-
done && \
343-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
344-
\"sudo apt-get install -y nfs-kernel-server && \
345-
sudo mkdir -p /srv/es-snapshots && sudo chmod 777 /srv/es-snapshots && \
346-
echo '\${NFS_EXPORTS}' | sudo tee /etc/exports && \
347-
sudo exportfs -ra && sudo systemctl start nfs-kernel-server\"
348-
"
349-
350-
- name: Mount NFS on all nodes
351-
run: |
352-
cd testing/v2/development
353-
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
354-
355-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
356-
cd ${INSTALLERS_DIR} && \
357-
MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
358-
MASTER_PIP=\$(jq -r '.linux_vms[0].private_ip' \$MACHINES_FILE) && \
359-
for pub_ip in \$(jq -r '.linux_vms[].ip_address' \$MACHINES_FILE); do
360-
echo \"=== Mounting NFS on \${pub_ip} ===\" && \
361-
ssh -o StrictHostKeyChecking=no lme-user@\${pub_ip} \
362-
\"sudo apt-get install -y nfs-common && \
363-
sudo mkdir -p /mnt/es-snapshots && \
364-
sudo mount -t nfs \${MASTER_PIP}:/srv/es-snapshots /mnt/es-snapshots && \
365-
grep -q /mnt/es-snapshots /etc/fstab || echo '\${MASTER_PIP}:/srv/es-snapshots /mnt/es-snapshots nfs defaults 0 0' | sudo tee -a /etc/fstab\"
366-
done
367-
"
368-
369-
- name: Configure Elasticsearch NFS snapshot path on all nodes
370-
run: |
371-
cd testing/v2/development
372-
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
373-
374-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
375-
cd ${INSTALLERS_DIR} && \
376-
MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
377-
for pub_ip in \$(jq -r '.linux_vms[].ip_address' \$MACHINES_FILE); do
378-
echo \"=== Configuring ES on \${pub_ip} ===\" && \
379-
ssh -o StrictHostKeyChecking=no lme-user@\${pub_ip} '
380-
# path.repo now set by Ansible template (elasticsearch.yml.j2) when lme_cluster_mode=true
381-
# sudo grep -q /usr/share/elasticsearch/snapshots /opt/lme/config/elasticsearch.yml || \
382-
# sudo sed -i \"/\\/usr\\/share\\/elasticsearch\\/backups/a\\\\ - /usr/share/elasticsearch/snapshots\" /opt/lme/config/elasticsearch.yml
383-
sudo mkdir -p /etc/containers/systemd/lme-elasticsearch.container.d/
384-
echo \"[Container]
385-
Volume=/mnt/es-snapshots:/usr/share/elasticsearch/snapshots\" | sudo tee /etc/containers/systemd/lme-elasticsearch.container.d/nfs-mount.conf
386-
sudo systemctl daemon-reload && sudo systemctl restart lme-elasticsearch
387-
'
388-
done
389-
"
390-
391-
- name: Verify NFS and cluster health after restart
392-
run: |
393-
cd testing/v2/development
394-
INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
395-
echo "Waiting for Elasticsearch to restart on all nodes..."
396-
sleep 60
397-
398-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
399-
echo '=== Creating test file on master NFS export ===' && \
400-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
401-
'sudo touch /srv/es-snapshots/nfs_test_file' && \
402-
cd ${INSTALLERS_DIR} && \
403-
MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
404-
for pub_ip in \$(jq -r '.linux_vms[1:][].ip_address' \$MACHINES_FILE); do
405-
echo \"=== Checking NFS on \${pub_ip} ===\" && \
406-
ssh -o StrictHostKeyChecking=no lme-user@\${pub_ip} \
407-
'df -h /mnt/es-snapshots && ls -la /mnt/es-snapshots/nfs_test_file'
408-
done && \
409-
echo '=== Cleaning up test file ===' && \
410-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
411-
'sudo rm /srv/es-snapshots/nfs_test_file'
412-
"
413-
414-
docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
415-
ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} 'sudo bash -c \"\
416-
source /opt/lme/scripts/extract_secrets.sh -q && \
417-
echo === Cluster Health After NFS === && \
418-
curl -sk -u \\\"elastic:\\\$elastic\\\" https://localhost:9200/_cluster/health?pretty && \
419-
echo === Cluster Nodes After NFS === && \
420-
curl -sk -u \\\"elastic:\\\$elastic\\\" https://localhost:9200/_cat/nodes?v\"'
421-
"
422-
423155
- name: Retrieve Elastic password
424156
env:
425157
AZURE_IP: ${{ env.AZURE_IP }}

0 commit comments

Comments
 (0)