@@ -87,45 +87,43 @@ jobs:
8787 cd testing/v2/development
8888 docker compose -p ${{ env.UNIQUE_ID }} up -d pipeline
8989
90- - name : Install Python requirements
91- run : |
92- cd testing/v2/development
93- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
94- cd /home/lme-user/LME/testing/v2/installers/azure && \
95- pip install -r requirements.txt
96- "
97-
98- - name : Build Azure Ubuntu instances with Windows VM
90+ - name : Set up cluster via setup_cluster.sh
9991 env :
10092 AZURE_CLIENT_ID : ${{ secrets.AZURE_CLIENT_ID }}
10193 AZURE_CLIENT_SECRET : ${{ secrets.AZURE_SECRET }}
10294 AZURE_TENANT_ID : ${{ secrets.AZURE_TENANT }}
10395 AZURE_SUBSCRIPTION_ID : ${{ secrets.AZURE_SUBSCRIPTION_ID }}
10496 run : |
10597 cd testing/v2/development
98+ INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
99+
100+ docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
101+ printf '%s\n' \
102+ 'export RESOURCE_GROUP=pipe-${{ env.UNIQUE_ID }}' \
103+ 'export PUBLIC_IP=${{ env.IP_ADDRESS }}/32' \
104+ 'export VM_SIZE=Standard_D8_v4' \
105+ 'export LOCATION=${{ inputs.azure_region || '\''centralus'\'' }}' \
106+ 'export AUTO_SHUTDOWN_TIME=23:00' \
107+ 'export LME_USER=lme-user' \
108+ 'export BRANCH=${{ env.BRANCH_NAME }}' \
109+ 'export CLUSTER_SIZE=3' \
110+ > ${INSTALLERS_DIR}/exporter.txt && \
111+ echo '=== exporter.txt ===' && cat ${INSTALLERS_DIR}/exporter.txt
112+ "
113+
106114 docker compose -p ${{ env.UNIQUE_ID }} exec -T \
107115 -e AZURE_CLIENT_ID \
108116 -e AZURE_CLIENT_SECRET \
109117 -e AZURE_TENANT_ID \
110118 -e AZURE_SUBSCRIPTION_ID \
111119 pipeline bash -c "
112- cd /home/lme-user/LME/testing/v2/installers && \
113- python3 ./azure/build_azure_linux_network.py \
114- -g pipe-${{ env.UNIQUE_ID }} \
115- -s ${{ env.IP_ADDRESS }}/32 \
116- -vs Standard_D8_v4 \
117- -l ${{ inputs.azure_region || 'centralus' }} \
118- -ast 23:00 \
119- -c 3 \
120- -w \
121- -y
120+ cd /home/lme-user/LME/testing/v2/installers/cluster_installer && \
121+ ./setup_cluster.sh
122122 "
123123
124124 - name : Extract cluster metadata
125125 run : |
126126 cd testing/v2/development
127- echo "Waiting for VMs to boot..."
128- sleep 120
129127 INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
130128 MACHINES_FILE="${INSTALLERS_DIR}/pipe-${{ env.UNIQUE_ID }}.machines.json"
131129
@@ -141,14 +139,10 @@ jobs:
141139 "jq -c '[.linux_vms[].private_ip]' ${MACHINES_FILE}" | tr -d '\r\n ')
142140 echo "ALL_PRIVATE_IPS_JSON=$ALL_PRIVATE_IPS_JSON" >> $GITHUB_ENV
143141
144- WINDOWS_IP=$(docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
145- az network public-ip show --resource-group pipe-${{ env.UNIQUE_ID }} --name ws1-public-ip --query ipAddress --output tsv 2>/dev/null || \
146- az vm show --resource-group pipe-${{ env.UNIQUE_ID }} --name ws1 --show-details --query publicIps --output tsv 2>/dev/null || \
147- az vm list-ip-addresses --resource-group pipe-${{ env.UNIQUE_ID }} --query '[?virtualMachine.name==\`ws1\`].virtualMachine.network.publicIpAddresses[0].ipAddress' --output tsv 2>/dev/null || \
148- echo ''
149- " | tr -d '\r\n ')
142+ WINDOWS_IP=$(docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c \
143+ "cat ${INSTALLERS_DIR}/pipe-${{ env.UNIQUE_ID }}.ws1.ip.txt 2>/dev/null || echo ''" | tr -d '\r\n ')
150144 if [ -z "$WINDOWS_IP" ]; then
151- echo "Warning: Could not retrieve Windows VM public IP , using private IP instead "
145+ echo "Warning: Could not read ws1 IP file , using private IP fallback "
152146 WINDOWS_IP="10.1.0.4"
153147 fi
154148 echo "WINDOWS_IP=$WINDOWS_IP" >> $GITHUB_ENV
@@ -158,268 +152,6 @@ jobs:
158152 echo "All private IPs: $ALL_PRIVATE_IPS_JSON"
159153 echo "Windows IP: $WINDOWS_IP"
160154
161- - name : Set up SSH access to all Linux VMs
162- run : |
163- cd testing/v2/development
164- INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
165- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
166- cd ${INSTALLERS_DIR} && \
167- PASSWORD=\$(cat pipe-${{ env.UNIQUE_ID }}.password.txt) && \
168- for IP in \$(jq -r '.linux_vms[].ip_address' pipe-${{ env.UNIQUE_ID }}.machines.json); do
169- echo \"Waiting for SSH on \$IP...\"
170- for i in \$(seq 1 30); do
171- if sshpass -p \"\$PASSWORD\" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 lme-user@\$IP 'echo ok' 2>/dev/null; then
172- echo \"SSH ready on \$IP\"
173- break
174- fi
175- echo -n '.'
176- sleep 10
177- done
178- echo \"Copying SSH key to \$IP...\"
179- ./lib/copy_ssh_key.sh lme-user \$IP pipe-${{ env.UNIQUE_ID }}.password.txt
180- done
181- "
182-
183- - name : Set up SSH trust from master to child nodes
184- run : |
185- cd testing/v2/development
186- INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
187-
188- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
189- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
190- 'ssh-keygen -t rsa -b 4096 -N \"\" -f ~/.ssh/id_rsa -q <<< y 2>/dev/null || true'
191- "
192-
193- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
194- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
195- 'sudo apt-get update -qq && sudo apt-get install -y -qq sshpass'
196- "
197-
198- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
199- cd ${INSTALLERS_DIR} && \
200- PASSWORD=\$(cat pipe-${{ env.UNIQUE_ID }}.password.txt) && \
201- for ip in \$(jq -r '.linux_vms[1:][].private_ip' pipe-${{ env.UNIQUE_ID }}.machines.json); do
202- echo \"Copying master SSH key to \$ip...\"
203- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
204- \"sshpass -p '\$PASSWORD' ssh-copy-id -o StrictHostKeyChecking=no lme-user@\$ip\"
205- echo \"Testing SSH from master to \$ip...\"
206- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
207- \"ssh -o StrictHostKeyChecking=no lme-user@\$ip hostname\"
208- done
209- "
210-
211- - name : Clone repo and prepare master for cluster install
212- run : |
213- cd testing/v2/development
214-
215- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
216- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
217- git clone https://github.com/cisagov/LME.git ~/LME &&
218- cd ~/LME &&
219- git fetch --all --tags &&
220- if git show-ref --tags --verify --quiet \"refs/tags/${{ env.BRANCH_NAME }}\"; then
221- git checkout ${{ env.BRANCH_NAME }}
222- else
223- git checkout -t origin/${{ env.BRANCH_NAME }} || git checkout ${{ env.BRANCH_NAME }}
224- fi
225- '
226- "
227-
228- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
229- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
230- cp ~/LME/config/example.env ~/LME/config/lme-environment.env &&
231- sed -i \"s/IPVAR=.*/IPVAR=${{ env.MASTER_PRIVATE_IP }}/\" ~/LME/config/lme-environment.env
232- '
233- "
234-
235- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
236- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
237- sudo apt-get update && sudo apt-get install -y ansible jq
238- '
239- "
240-
241- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
242- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
243- cd ~/LME/ansible && ansible-galaxy collection install -r requirements.yml --timeout 30
244- '
245- " || echo "Galaxy install failed, continuing with existing collections..."
246-
247- - name : Run site.yml in cluster mode on master
248- run : |
249- cd testing/v2/development
250- INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
251-
252- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
253- cd ${INSTALLERS_DIR} && \
254- MASTER_PIP=\$(jq -r '.linux_vms[0].private_ip' pipe-${{ env.UNIQUE_ID }}.machines.json) && \
255- ALL_IPS=\$(jq -c '[.linux_vms[].private_ip]' pipe-${{ env.UNIQUE_ID }}.machines.json) && \
256- jq -n --argjson hosts \"\$ALL_IPS\" --arg master \"\$MASTER_PIP\" \
257- '{lme_cluster_mode: true, es_cluster_seed_hosts: \$hosts, es_master_publish_host: \$master}' \
258- > /tmp/cluster_vars.json && \
259- echo 'Cluster extra vars:' && cat /tmp/cluster_vars.json && \
260- scp /tmp/cluster_vars.json lme-user@${{ env.AZURE_IP }}:/tmp/cluster_vars.json
261- "
262-
263- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
264- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
265- cd ~/LME && ansible-playbook ansible/site.yml -e @/tmp/cluster_vars.json
266- '
267- "
268-
269- - name : Create cluster inventory and deploy to child nodes
270- run : |
271- cd testing/v2/development
272- INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
273-
274- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
275- cd ${INSTALLERS_DIR} && \
276- MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
277- MASTER_PIP=\$(jq -r '.linux_vms[0].private_ip' \$MACHINES_FILE) && \
278- MASTER_PUB=\$(jq -r '.linux_vms[0].ip_address' \$MACHINES_FILE) && \
279- TMPFILE=\$(mktemp) && \
280- echo 'all:' > \$TMPFILE && \
281- echo ' vars:' >> \$TMPFILE && \
282- echo \" es_master_host: \${MASTER_PIP}\" >> \$TMPFILE && \
283- echo ' es_cluster_seed_hosts:' >> \$TMPFILE && \
284- for ip in \$(jq -r '.linux_vms[].private_ip' \$MACHINES_FILE); do
285- echo \" - \${ip}\" >> \$TMPFILE
286- done && \
287- echo ' children:' >> \$TMPFILE && \
288- echo ' elasticsearch:' >> \$TMPFILE && \
289- echo ' hosts:' >> \$TMPFILE && \
290- echo ' es1:' >> \$TMPFILE && \
291- echo \" ansible_host: \${MASTER_PIP}\" >> \$TMPFILE && \
292- echo ' ansible_connection: local' >> \$TMPFILE && \
293- echo ' es_node_name: lme-elasticsearch' >> \$TMPFILE && \
294- echo ' es_is_initial_master: true' >> \$TMPFILE && \
295- echo \" es_publish_host: \${MASTER_PIP}\" >> \$TMPFILE && \
296- i=2 && \
297- for ip in \$(jq -r '.linux_vms[1:][].private_ip' \$MACHINES_FILE); do
298- echo \" es\${i}:\" >> \$TMPFILE && \
299- echo \" ansible_host: \${ip}\" >> \$TMPFILE && \
300- echo ' ansible_user: lme-user' >> \$TMPFILE && \
301- echo \" es_node_name: es\${i}\" >> \$TMPFILE && \
302- echo \" es_publish_host: \${ip}\" >> \$TMPFILE
303- i=\$((i + 1))
304- done && \
305- echo 'Generated cluster inventory:' && cat \$TMPFILE && \
306- scp \$TMPFILE lme-user@\${MASTER_PUB}:~/LME/ansible/inventory/cluster.yml && \
307- rm \$TMPFILE
308- "
309-
310- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
311- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} '
312- cd ~/LME && ansible-playbook -i ansible/inventory/cluster.yml ansible/elasticsearch.yml
313- '
314- "
315-
316- - name : Verify cluster health
317- run : |
318- cd testing/v2/development
319- echo "Waiting for cluster to stabilize..."
320- sleep 120
321- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
322- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} 'sudo bash -c \"\
323- source /opt/lme/scripts/extract_secrets.sh -q && \
324- echo === Cluster Health === && \
325- curl -sk -u \\\"elastic:\\\$elastic\\\" https://localhost:9200/_cluster/health?pretty && \
326- echo === Cluster Nodes === && \
327- curl -sk -u \\\"elastic:\\\$elastic\\\" https://localhost:9200/_cat/nodes?v\"'
328- "
329-
330- - name : Set up NFS server on master
331- run : |
332- cd testing/v2/development
333- INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
334-
335- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
336- cd ${INSTALLERS_DIR} && \
337- MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
338- MASTER_PIP=\$(jq -r '.linux_vms[0].private_ip' \$MACHINES_FILE) && \
339- NFS_EXPORTS='/srv/es-snapshots' && \
340- for ip in \$(jq -r '.linux_vms[].private_ip' \$MACHINES_FILE); do
341- NFS_EXPORTS=\"\${NFS_EXPORTS} \${ip}(rw,sync,no_subtree_check,no_root_squash)\"
342- done && \
343- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
344- \"sudo apt-get install -y nfs-kernel-server && \
345- sudo mkdir -p /srv/es-snapshots && sudo chmod 777 /srv/es-snapshots && \
346- echo '\${NFS_EXPORTS}' | sudo tee /etc/exports && \
347- sudo exportfs -ra && sudo systemctl start nfs-kernel-server\"
348- "
349-
350- - name : Mount NFS on all nodes
351- run : |
352- cd testing/v2/development
353- INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
354-
355- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
356- cd ${INSTALLERS_DIR} && \
357- MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
358- MASTER_PIP=\$(jq -r '.linux_vms[0].private_ip' \$MACHINES_FILE) && \
359- for pub_ip in \$(jq -r '.linux_vms[].ip_address' \$MACHINES_FILE); do
360- echo \"=== Mounting NFS on \${pub_ip} ===\" && \
361- ssh -o StrictHostKeyChecking=no lme-user@\${pub_ip} \
362- \"sudo apt-get install -y nfs-common && \
363- sudo mkdir -p /mnt/es-snapshots && \
364- sudo mount -t nfs \${MASTER_PIP}:/srv/es-snapshots /mnt/es-snapshots && \
365- grep -q /mnt/es-snapshots /etc/fstab || echo '\${MASTER_PIP}:/srv/es-snapshots /mnt/es-snapshots nfs defaults 0 0' | sudo tee -a /etc/fstab\"
366- done
367- "
368-
369- - name : Configure Elasticsearch NFS snapshot path on all nodes
370- run : |
371- cd testing/v2/development
372- INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
373-
374- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
375- cd ${INSTALLERS_DIR} && \
376- MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
377- for pub_ip in \$(jq -r '.linux_vms[].ip_address' \$MACHINES_FILE); do
378- echo \"=== Configuring ES on \${pub_ip} ===\" && \
379- ssh -o StrictHostKeyChecking=no lme-user@\${pub_ip} '
380- # path.repo now set by Ansible template (elasticsearch.yml.j2) when lme_cluster_mode=true
381- # sudo grep -q /usr/share/elasticsearch/snapshots /opt/lme/config/elasticsearch.yml || \
382- # sudo sed -i \"/\\/usr\\/share\\/elasticsearch\\/backups/a\\\\ - /usr/share/elasticsearch/snapshots\" /opt/lme/config/elasticsearch.yml
383- sudo mkdir -p /etc/containers/systemd/lme-elasticsearch.container.d/
384- echo \"[Container]
385- Volume=/mnt/es-snapshots:/usr/share/elasticsearch/snapshots\" | sudo tee /etc/containers/systemd/lme-elasticsearch.container.d/nfs-mount.conf
386- sudo systemctl daemon-reload && sudo systemctl restart lme-elasticsearch
387- '
388- done
389- "
390-
391- - name : Verify NFS and cluster health after restart
392- run : |
393- cd testing/v2/development
394- INSTALLERS_DIR="/home/lme-user/LME/testing/v2/installers"
395- echo "Waiting for Elasticsearch to restart on all nodes..."
396- sleep 60
397-
398- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
399- echo '=== Creating test file on master NFS export ===' && \
400- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
401- 'sudo touch /srv/es-snapshots/nfs_test_file' && \
402- cd ${INSTALLERS_DIR} && \
403- MACHINES_FILE=pipe-${{ env.UNIQUE_ID }}.machines.json && \
404- for pub_ip in \$(jq -r '.linux_vms[1:][].ip_address' \$MACHINES_FILE); do
405- echo \"=== Checking NFS on \${pub_ip} ===\" && \
406- ssh -o StrictHostKeyChecking=no lme-user@\${pub_ip} \
407- 'df -h /mnt/es-snapshots && ls -la /mnt/es-snapshots/nfs_test_file'
408- done && \
409- echo '=== Cleaning up test file ===' && \
410- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \
411- 'sudo rm /srv/es-snapshots/nfs_test_file'
412- "
413-
414- docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c "
415- ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} 'sudo bash -c \"\
416- source /opt/lme/scripts/extract_secrets.sh -q && \
417- echo === Cluster Health After NFS === && \
418- curl -sk -u \\\"elastic:\\\$elastic\\\" https://localhost:9200/_cluster/health?pretty && \
419- echo === Cluster Nodes After NFS === && \
420- curl -sk -u \\\"elastic:\\\$elastic\\\" https://localhost:9200/_cat/nodes?v\"'
421- "
422-
423155 - name : Retrieve Elastic password
424156 env :
425157 AZURE_IP : ${{ env.AZURE_IP }}
0 commit comments