Fix fact gathering with --limit

Prior to this change, when the --limit argument is used, each host in the limit gathers facts for every other host. This is clearly unnecessary, and can result in up to (N-1)^2 fact gathers. This change gathers facts for each host only once. Hosts that are not in the limit are divided between those that are in the limit, and facts are gathered via delegation. This change also factors out the fact gathering logic into a separate playbook that is imported where necessary. Change-Id: I923df5af41a7f1b7b0142d0da185a9a0979be543 (cherry picked from commit 56b4352f9e)
2018-09-17 16:01:52 +01:00 · 2018-09-17 16:01:52 +01:00 · 1ae90d4b55
parent 7b4b526480
commit 1ae90d4b55
3 changed files with 46 additions and 62 deletions
--- a/ansible/gather-facts.yml
+++ b/ansible/gather-facts.yml
@ -0,0 +1,44 @@
+---
+# NOTE(awiddersheim): Gather facts for all hosts as a
+# first step since several plays below require them when
+# building their configurations. The below 'gather_facts'
+# set to 'false' is a bit confusing but this is to avoid
+# Ansible gathering facts twice.
+- name: Gather facts for all hosts
+  hosts: all
+  serial: '{{ serial|default("0") }}'
+  gather_facts: false
+  tasks:
+    - name: Gather facts
+      setup:
+
+    - name: Group hosts to determine when using --limit
+      group_by:
+        key: "all_using_limit_{{ (ansible_play_batch | length) != (groups['all'] | length) }}"
+  tags: always
+
+# NOTE(pbourke): This case covers deploying subsets of hosts using --limit. The
+# limit arg will cause the first play to gather facts only about that node,
+# meaning facts such as IP addresses for rabbitmq nodes etc. will be undefined
+# in the case of adding a single compute node.
+# NOTE(mgoddard): Divide all hosts to be queried between the hosts selected via
+# the limit.
+- name: Gather facts for all hosts (if using --limit)
+  hosts: all_using_limit_True
+  serial: '{{ serial|default("0") }}'
+  gather_facts: false
+  vars:
+    batch_index: "{{ ansible_play_batch.index(inventory_hostname) }}"
+    batch_size: "{{ ansible_play_batch | length }}"
+    # Use a python list slice to divide the group up.
+    # Syntax: [<start index>:<end index>:<step size>]
+    delegate_hosts: "{{ groups['all'][batch_index | int::batch_size | int] }}"
+  tasks:
+    - name: Gather facts
+      setup:
+      delegate_facts: True
+      delegate_to: "{{ item }}"
+      with_items: "{{ delegate_hosts }}"
+      # We gathered facts for all hosts in the batch during the first play.
+      when: item not in ansible_play_batch
+  tags: always
--- a/ansible/kolla-host.yml
+++ b/ansible/kolla-host.yml
@ -1,35 +1,5 @@
 ---
-# NOTE(awiddersheim): Gather facts for all hosts as a
-# first step since several plays below require them when
-# building their configurations. The below 'gather_facts'
-# set to 'false' is a bit confusing but this is to avoid
-# Ansible gathering facts twice.
- name: Gather facts for all hosts
-  hosts: all
-  serial: '{{ serial|default("0") }}'
-  gather_facts: false
-  tasks:
-    - setup:
-  tags: always
-
-# NOTE(pbourke): This case covers deploying subsets of hosts using --limit. The
-# limit arg will cause the first play to gather facts only about that node,
-# meaning facts such as IP addresses for rabbitmq nodes etc. will be undefined
-# in the case of adding a single compute node.
-# We don't want to add the delegate parameters to the above play as it will
-# result in ((num_nodes-1)^2) number of SSHs when running for all nodes
-# which can be very inefficient.
- name: Gather facts for all hosts (if using --limit)
-  hosts: all
-  serial: '{{ serial|default("0") }}'
-  gather_facts: false
-  tasks:
-    - setup:
-      delegate_facts: True
-      delegate_to: "{{ item }}"
-      with_items: "{{ groups['all'] }}"
-      when:
-        - (ansible_play_batch | length) != (groups['all'] | length)
+- include: gather-facts.yml

 - name: Apply role baremetal
  hosts: baremetal
--- a/ansible/site.yml
+++ b/ansible/site.yml
@ -1,35 +1,5 @@
 ---
-# NOTE(awiddersheim): Gather facts for all hosts as a
-# first step since several plays below require them when
-# building their configurations. The below 'gather_facts'
-# set to 'false' is a bit confusing but this is to avoid
-# Ansible gathering facts twice.
- name: Gather facts for all hosts
-  hosts: all
-  serial: '{{ serial|default("0") }}'
-  gather_facts: false
-  tasks:
-    - setup:
-  tags: always
-
-# NOTE(pbourke): This case covers deploying subsets of hosts using --limit. The
-# limit arg will cause the first play to gather facts only about that node,
-# meaning facts such as IP addresses for rabbitmq nodes etc. will be undefined
-# in the case of adding a single compute node.
-# We don't want to add the delegate parameters to the above play as it will
-# result in ((num_nodes-1)^2) number of SSHs when running for all nodes
-# which can be very inefficient.
- name: Gather facts for all hosts (if using --limit)
-  hosts: all
-  serial: '{{ serial|default("0") }}'
-  gather_facts: false
-  tasks:
-    - setup:
-      delegate_facts: True
-      delegate_to: "{{ item }}"
-      with_items: "{{ groups['all'] }}"
-      when:
-        - (ansible_play_batch | length) != (groups['all'] | length)
+- include: gather-facts.yml

 - name: Detect openstack_release variable
  hosts: all