Update documentation for recent failure domain options in Ceph

Change-Id: Id6707566753b834da598b62f630f4caafe8ee234
2018-12-13 11:11:46 -06:00 · 2018-12-13 11:11:46 -06:00 · b18dd351d2
parent 6589af54db
commit b18dd351d2
1 changed files with 148 additions and 403 deletions
--- a/doc/source/testing/ceph-resiliency/failure-domain.rst
+++ b/doc/source/testing/ceph-resiliency/failure-domain.rst
@ -51,8 +51,9 @@ Ceph Environment
 ================

 The ceph commands and scripts described in this write-up are executed as
-Linux user root on one of the ceph monitors deployed as kubernetes
-pods.  The root user has the credential to execute all the ceph commands.
+Linux user root on one of orchestration nodes and one of the ceph monitors
+deployed as kubernetes pods. The root user has the credential to execute
+all the ceph commands.

 On a kubernetes cluster, a separate namespace named **ceph** is configured
 for the ceph cluster.  Include the **ceph** namespace in **kubectl** when
@ -386,11 +387,11 @@ Verify the Ceph cluster has a CRUSH rule with rack as the failure domain.
 ::

  # ceph osd crush rule ls
-  replicated_rack
-  # ceph osd crush rule dump replicated_rack
+  rack_replicated_rule
+  # ceph osd crush rule dump rack_replicated_rule
  {
      "rule_id": 2,
-      "rule_name": "replicated_rack",
+      "rule_name": "rack_replicated_rule",
      "ruleset": 2,
      "type": 1,
      "min_size": 1,
@ -416,10 +417,10 @@ Create a ceph pool with its CRUSH rule set to the rack's rule.

 ::

-  # ceph osd pool create rbd 2048 2048 replicated replicated_rack
+  # ceph osd pool create rbd 2048 2048 replicated rack_replicated_rule
  pool 'rbd' created
  # ceph osd pool get rbd crush_rule
-  crush_rule: replicated_rack
+  crush_rule: rack_replicated_rule
  # ceph osd pool get rbd size
  size: 3
  # ceph osd pool get rbd pg_num
@ -570,6 +571,7 @@ To see weight and affinity of each OSD.
   23   hdd  1.09000             osd.23            up  1.00000 1.00000


+
 crushtool CLI
 -------------

@ -658,418 +660,161 @@ The **utils-checkPGs.py** script can read the same data from memory and construc
 the failure domains with OSDs.  Verify the OSDs in each PG against the
 constructed failure domains.

-You can edit the **/tmp/cm.rack.ascii** to modify the CRUSH Map.  Compile
-the modified ascii file into binary that has the new CRUSH Map.  To set
-the running ceph cluster with the new CRUSH Map, execute the following
-commands on one of the monitor nodes:
+Configure the Failure Domain in CRUSH Map
+=========================================
+
+The Ceph ceph-osd, ceph-client and cinder charts accept configuration parameters to set the Failure Domain for CRUSH.
+The options available are **failure_domain**, **failure_domain_by_hostname**, **failure_domain_name** and **crush_rule**

 ::

-  # vi /tmp/cm.rack.ascii
-  # crushtool -c /tmp/cm.rack.ascii -o /tmp/cm.bin.new
-  # ceph osd setcrushmap -i /tmp/cm.bin.new
-  # watch ceph status
+ ceph-osd specific overrides
+ failure_domain: Set the CRUSH bucket type for your OSD to reside in. (DEFAULT: "host")
+ failure_domain_by_hostname: Specify the portion of the hostname to use for your failure domain bucket name. (DEFAULT: "false")
+ failure_domain_name: Manually name the failure domain bucket name. This configuration option should only be used when using host based overrides. (DEFAULT: "false")
+
+::
+
+ ceph-client and cinder specific overrides
+ crush_rule**: Set the crush rule for a pool (DEFAULT: "replicated_rule")
+
+An example of a lab enviroment had the following paramters set for the ceph yaml override file to apply a rack level failure domain within CRUSH.
+
+::
+
+  endpoints:
+    identity:
+      namespace: openstack
+    object_store:
+      namespace: ceph
+    ceph_mon:
+      namespace: ceph
+  network:
+    public: 10.0.0.0/24
+    cluster: 10.0.0.0/24
+  deployment:
+    storage_secrets: true
+    ceph: true
+    rbd_provisioner: true
+    cephfs_provisioner: true
+    client_secrets: false
+    rgw_keystone_user_and_endpoints: false
+  bootstrap:
+    enabled: true
+  conf:
+    ceph:
+      global:
+        fsid: 6c12a986-148d-45a7-9120-0cf0522ca5e0
+    rgw_ks:
+      enabled: true
+    pool:
+      default:
+        crush_rule: rack_replicated_rule
+      crush:
+        tunables: null
+      target:
+        # NOTE(portdirect): 5 nodes, with one osd per node
+        osd: 18
+        pg_per_osd: 100
+    storage:
+      osd:
+        - data:
+            type: block-logical
+            location: /dev/vdb
+          journal:
+            type: block-logical
+            location: /dev/vde1
+        - data:
+            type: block-logical
+            location: /dev/vdc
+          journal:
+            type: block-logical
+            location: /dev/vde2
+        - data:
+            type: block-logical
+            location: /dev/vdd
+          journal:
+            type: block-logical
+            location: /dev/vde3
+    overrides:
+      ceph_osd:
+        hosts:
+          - name: osh-1
+            conf:
+              storage:
+                failure_domain: "rack"
+                failure_domain_name: "rack1"
+          - name: osh-2
+            conf:
+              storage:
+                failure_domain: "rack"
+                failure_domain_name: "rack1"
+          - name: osh-3
+            conf:
+              storage:
+                failure_domain: "rack"
+                failure_domain_name: "rack2"
+          - name: osh-4
+            conf:
+              storage:
+                failure_domain: "rack"
+                failure_domain_name: "rack2"
+          - name: osh-5
+            conf:
+              storage:
+                failure_domain: "rack"
+                failure_domain_name: "rack3"
+          - name: osh-6
+            conf:
+              storage:
+                failure_domain: "rack"
+                failure_domain_name: "rack3"

 .. NOTE::

-  You have to know the CRUSH Map syntax really well in order for you to be able to manually edit the ascii file.
-
-Buckets
-------
-
-You have a pre-existing Ceph cluster that did not have the rack
-buckets.  You want to restructure the CRUSH hierarchy with the rack
-buckets to a topology that is similar to the one presented earlier in
-this guide.
+   Note that the cinder chart will need an override configured to ensure the cinder pools in Ceph are using the correct **crush_rule**.

 ::

-  root@host3:/# ceph osd crush tree
-  ID  CLASS WEIGHT   TYPE NAME
-   -1       78.47974 root default
-   -2       13.07996     host host1
-    0   hdd  1.09000         osd.0
-    1   hdd  1.09000         osd.1
-    2   hdd  1.09000         osd.2
-    3   hdd  1.09000         osd.3
-    4   hdd  1.09000         osd.4
-    5   hdd  1.09000         osd.5
-    6   hdd  1.09000         osd.6
-    7   hdd  1.09000         osd.7
-    8   hdd  1.09000         osd.8
-    9   hdd  1.09000         osd.9
-   10   hdd  1.09000         osd.10
-   11   hdd  1.09000         osd.11
-   -5       13.07996     host host2
-   12   hdd  1.09000         osd.12
-   13   hdd  1.09000         osd.13
-   14   hdd  1.09000         osd.14
-   15   hdd  1.09000         osd.15
-   16   hdd  1.09000         osd.16
-   17   hdd  1.09000         osd.17
-   18   hdd  1.09000         osd.18
-   19   hdd  1.09000         osd.19
-   20   hdd  1.09000         osd.20
-   21   hdd  1.09000         osd.21
-   22   hdd  1.09000         osd.22
-   23   hdd  1.09000         osd.23
-  -13       13.07996     host host3
-   60   hdd  1.09000         osd.60
-   61   hdd  1.09000         osd.61
-   62   hdd  1.09000         osd.62
-   63   hdd  1.09000         osd.63
-   64   hdd  1.09000         osd.64
-   65   hdd  1.09000         osd.65
-   66   hdd  1.09000         osd.66
-   67   hdd  1.09000         osd.67
-   68   hdd  1.09000         osd.68
-   69   hdd  1.09000         osd.69
-   70   hdd  1.09000         osd.70
-   71   hdd  1.09000         osd.71
-   -9       13.07996     host host4
-   36   hdd  1.09000         osd.36
-   37   hdd  1.09000         osd.37
-   38   hdd  1.09000         osd.38
-   39   hdd  1.09000         osd.39
-   40   hdd  1.09000         osd.40
-   41   hdd  1.09000         osd.41
-   42   hdd  1.09000         osd.42
-   43   hdd  1.09000         osd.43
-   44   hdd  1.09000         osd.44
-   45   hdd  1.09000         osd.45
-   46   hdd  1.09000         osd.46
-   47   hdd  1.09000         osd.47
-  -11       13.07996     host host5
-   48   hdd  1.09000         osd.48
-   49   hdd  1.09000         osd.49
-   50   hdd  1.09000         osd.50
-   51   hdd  1.09000         osd.51
-   52   hdd  1.09000         osd.52
-   53   hdd  1.09000         osd.53
-   54   hdd  1.09000         osd.54
-   55   hdd  1.09000         osd.55
-   56   hdd  1.09000         osd.56
-   57   hdd  1.09000         osd.57
-   58   hdd  1.09000         osd.58
-   59   hdd  1.09000         osd.59
-   -7       13.07996     host host6
-   24   hdd  1.09000         osd.24
-   25   hdd  1.09000         osd.25
-   26   hdd  1.09000         osd.26
-   27   hdd  1.09000         osd.27
-   28   hdd  1.09000         osd.28
-   29   hdd  1.09000         osd.29
-   30   hdd  1.09000         osd.30
-   31   hdd  1.09000         osd.31
-   32   hdd  1.09000         osd.32
-   33   hdd  1.09000         osd.33
-   34   hdd  1.09000         osd.34
-   35   hdd  1.09000         osd.35
-  root@host3:/#
+  pod:
+    replicas:
+      api: 2
+      volume: 1
+      scheduler: 1
+      backup: 1
+  conf:
+    cinder:
+      DEFAULT:
+        backup_driver: cinder.backup.drivers.swift
+    ceph:
+      pools:
+        backup:
+          replicated: 3
+          crush_rule: rack_replicated_rule
+          chunk_size: 8
+        volume:
+          replicated: 3
+          crush_rule: rack_replicated_rule
+          chunk_size: 8

-To include the rack bucket in the CRUSH Map, follow these steps.  First, add
-the required rack buckets with the user-defined names.
+The charts can be updated with these overrides pre or post deployment. If this is a post deployment change then the following steps will apply for a gate based openstack-helm deployment.

 ::

-  root@host5:/# ceph osd crush add-bucket rack1 rack
-  added bucket rack1 type rack to crush map
-  root@host5:/# ceph osd crush add-bucket rack2 rack
-  added bucket rack2 type rack to crush map
-  root@host5:/# ceph osd crush add-bucket rack3 rack
-  added bucket rack3 type rack to crush map
-  root@host5:/# ceph osd tree
-  ID  CLASS WEIGHT   TYPE NAME             STATUS REWEIGHT PRI-AFF
-  -17              0 rack rack3
-  -16              0 rack rack2
-  -15              0 rack rack1
-   -1       78.47974 root default
-  . . .
+  cd /opt/openstack-helm
+  helm upgrade --install ceph-osd ../openstack-helm-infra/ceph-osd --namespace=ceph --values=/tmp/ceph.yaml
+  kubectl delete jobs/ceph-rbd-pool -n ceph
+  helm upgrade --install ceph-client ../openstack-helm-infra/ceph-client --namespace=ceph --values=/tmp/ceph.yaml
+  helm delete cinder --purge
+  helm upgrade --install cinder ./cinder --namespace=openstack --values=/tmp/cinder.yaml

-Move the hosts to the respective rack buckets.
+.. NOTE::

-::
+  There will be a brief interuption of I/O and a data movement of placement groups in Ceph while these changes are
+  applied. The data movement operation can take several minutes to several days to complete.

-  root@host5:/# ceph osd crush move host1 rack=rack1
-  moved item id -2 name 'host1' to location {rack=rack1} in crush map
-  root@host5:/# ceph osd crush move host2 rack=rack1
-  moved item id -5 name 'host2' to location {rack=rack1} in crush map
-
-Move the newly created rack rack1 to the root bucket.  Verify the new
-hierarchy with the ceph CLI.
-
-::
-
-  root@host5:/# ceph osd crush move rack1 root=default
-  moved item id -15 name 'rack1' to location {root=default} in crush map
-  root@host5:/# ceph osd tree
-  ID  CLASS WEIGHT   TYPE NAME                 STATUS REWEIGHT PRI-AFF
-  -17              0 rack rack3
-  -16              0 rack rack2
-   -1       78.47974 root default
-  -15       26.15991     rack rack1
-   -2       13.07996         host host1
-    0   hdd  1.09000             osd.0             up  1.00000 1.00000
-    1   hdd  1.09000             osd.1             up  1.00000 1.00000
-    2   hdd  1.09000             osd.2             up  1.00000 1.00000
-    3   hdd  1.09000             osd.3             up  1.00000 1.00000
-    4   hdd  1.09000             osd.4             up  1.00000 1.00000
-    5   hdd  1.09000             osd.5             up  1.00000 1.00000
-    6   hdd  1.09000             osd.6             up  1.00000 1.00000
-    7   hdd  1.09000             osd.7             up  1.00000 1.00000
-    8   hdd  1.09000             osd.8             up  1.00000 1.00000
-    9   hdd  1.09000             osd.9             up  1.00000 1.00000
-   10   hdd  1.09000             osd.10            up  1.00000 1.00000
-   11   hdd  1.09000             osd.11            up  1.00000 1.00000
-   -5       13.07996         host host2
-   12   hdd  1.09000             osd.12            up  1.00000 1.00000
-   13   hdd  1.09000             osd.13            up  1.00000 1.00000
-   14   hdd  1.09000             osd.14            up  1.00000 1.00000
-   15   hdd  1.09000             osd.15            up  1.00000 1.00000
-   16   hdd  1.09000             osd.16            up  1.00000 1.00000
-   17   hdd  1.09000             osd.17            up  1.00000 1.00000
-   18   hdd  1.09000             osd.18            up  1.00000 1.00000
-   19   hdd  1.09000             osd.19            up  1.00000 1.00000
-   20   hdd  1.09000             osd.20            up  1.00000 1.00000
-   21   hdd  1.09000             osd.21            up  1.00000 1.00000
-   22   hdd  1.09000             osd.22            up  1.00000 1.00000
-   23   hdd  1.09000             osd.23            up  1.00000 1.00000
-  . . .
-
-Repeat the same for rack2.
-
-::
-
-  root@host5:/# ceph osd crush move host3 rack=rack2
-  moved item id -13 name 'host3' to location {rack=rack2} in crush map
-  root@host5:/# ceph osd crush move host4 rack=rack2
-  moved item id -9 name 'host4' to location {rack=rack2} in crush map
-  root@host5:/# ceph osd crush move rack2 root=default
-  moved item id -16 name 'rack2' to location {root=default} in crush map
-
-Repeat the same for rack3.
-
-::
-
-  root@host5:/# ceph osd crush move host5 rack=rack3
-  moved item id -11 name 'host5' to location {rack=rack3} in crush map
-  root@host5:/# ceph osd crush move host6 rack=rack3
-  moved item id -7 name 'host6' to location {rack=rack3} in crush map
-  root@host5:/# ceph osd crush move rack3 root=default
-  moved item id -17 name 'rack3' to location {root=default} in crush map
-
-Extract the CRUSH Map from the in-memory copy and verify.
-
-::
-
-  root@host5:/# ceph osd getcrushmap -o /tmp/cm.bin.racks.6
-  100
-  root@host5:/# crushtool -d /tmp/cm.bin.racks.6 -o /tmp/cm.ascii.racks.6
-  root@host5:/# cat /tmp/cm.ascii.racks.6
-  . . .
-  # buckets
-  host host1 {
-          id -2           # do not change unnecessarily
-          id -3 class hdd         # do not change unnecessarily
-          # weight 13.080
-          alg straw2
-          hash 0  # rjenkins1
-          item osd.0 weight 1.090
-          item osd.1 weight 1.090
-          item osd.2 weight 1.090
-          item osd.3 weight 1.090
-          item osd.4 weight 1.090
-          item osd.5 weight 1.090
-          item osd.6 weight 1.090
-          item osd.7 weight 1.090
-          item osd.8 weight 1.090
-          item osd.9 weight 1.090
-          item osd.10 weight 1.090
-          item osd.11 weight 1.090
-  }
-  host host2 {
-          id -5           # do not change unnecessarily
-          id -6 class hdd         # do not change unnecessarily
-          # weight 13.080
-          alg straw2
-          hash 0  # rjenkins1
-          item osd.12 weight 1.090
-          item osd.13 weight 1.090
-          item osd.14 weight 1.090
-          item osd.15 weight 1.090
-          item osd.16 weight 1.090
-          item osd.18 weight 1.090
-          item osd.19 weight 1.090
-          item osd.17 weight 1.090
-          item osd.20 weight 1.090
-          item osd.21 weight 1.090
-          item osd.22 weight 1.090
-          item osd.23 weight 1.090
-  }
-  rack rack1 {
-          id -15          # do not change unnecessarily
-          id -20 class hdd                # do not change unnecessarily
-          # weight 26.160
-          alg straw2
-          hash 0  # rjenkins1
-          item host1 weight 13.080
-          item host2 weight 13.080
-  }
-  host host3 {
-          id -13          # do not change unnecessarily
-          id -14 class hdd                # do not change unnecessarily
-          # weight 13.080
-          alg straw2
-          hash 0  # rjenkins1
-          item osd.53 weight 1.090
-          item osd.54 weight 1.090
-          item osd.58 weight 1.090
-          item osd.59 weight 1.090
-          item osd.64 weight 1.090
-          item osd.65 weight 1.090
-          item osd.66 weight 1.090
-          item osd.67 weight 1.090
-          item osd.69 weight 1.090
-          item osd.68 weight 1.090
-          item osd.71 weight 1.090
-          item osd.70 weight 1.090
-  }
-  host host4 {
-          id -9           # do not change unnecessarily
-          id -10 class hdd                # do not change unnecessarily
-          # weight 13.080
-          alg straw2
-          hash 0  # rjenkins1
-          item osd.36 weight 1.090
-          item osd.37 weight 1.090
-          item osd.38 weight 1.090
-          item osd.39 weight 1.090
-          item osd.40 weight 1.090
-          item osd.41 weight 1.090
-          item osd.42 weight 1.090
-          item osd.44 weight 1.090
-          item osd.45 weight 1.090
-          item osd.46 weight 1.090
-          item osd.47 weight 1.090
-          item osd.43 weight 1.090
-  }
-  rack rack2 {
-          id -16          # do not change unnecessarily
-          id -19 class hdd                # do not change unnecessarily
-          # weight 26.160
-          alg straw2
-          hash 0  # rjenkins1
-          item host3 weight 13.080
-          item host4 weight 13.080
-  }
-  host host5 {
-          id -11          # do not change unnecessarily
-          id -12 class hdd                # do not change unnecessarily
-          # weight 13.080
-          alg straw2
-          hash 0  # rjenkins1
-          item osd.49 weight 1.090
-          item osd.48 weight 1.090
-          item osd.50 weight 1.090
-          item osd.51 weight 1.090
-          item osd.52 weight 1.090
-          item osd.55 weight 1.090
-          item osd.56 weight 1.090
-          item osd.57 weight 1.090
-          item osd.60 weight 1.090
-          item osd.61 weight 1.090
-          item osd.62 weight 1.090
-          item osd.63 weight 1.090
-  }
-  host host6 {
-          id -7           # do not change unnecessarily
-          id -8 class hdd         # do not change unnecessarily
-          # weight 13.080
-          alg straw2
-          hash 0  # rjenkins1
-          item osd.24 weight 1.090
-          item osd.25 weight 1.090
-          item osd.26 weight 1.090
-          item osd.27 weight 1.090
-          item osd.28 weight 1.090
-          item osd.29 weight 1.090
-          item osd.30 weight 1.090
-          item osd.31 weight 1.090
-          item osd.32 weight 1.090
-          item osd.33 weight 1.090
-          item osd.34 weight 1.090
-          item osd.35 weight 1.090
-  }
-  rack rack3 {
-          id -17          # do not change unnecessarily
-          id -18 class hdd                # do not change unnecessarily
-          # weight 26.160
-          alg straw2
-          hash 0  # rjenkins1
-          item host5 weight 13.080
-          item host6 weight 13.080
-  }
-  root default {
-          id -1           # do not change unnecessarily
-          id -4 class hdd         # do not change unnecessarily
-          # weight 78.480
-          alg straw2
-          hash 0  # rjenkins1
-          item rack1 weight 26.160
-          item rack2 weight 26.160
-          item rack3 weight 26.160
-  }
-
-  # rules
-  rule replicated_rule {
-          id 0
-          type replicated
-          min_size 1
-          max_size 10
-          step take default
-          step chooseleaf firstn 0 type host
-          step emit
-  }
-  rule same_host {
-          id 1
-          type replicated
-          min_size 1
-          max_size 10
-          step take default
-          step choose firstn 0 type osd
-          step emit
-  }
-  rule replicated_rack {
-          id 2
-          type replicated
-          min_size 1
-          max_size 10
-          step take default
-          step chooseleaf firstn 0 type rack
-          step emit
-  }
-
-  # end crush map
-  root@host5:/#
-
-Create a CRUSH Rule with rack as the failure domain.
-
-::
-
-  root@host5:/# ceph osd crush rule create-replicated replicated_rack default rack
-
-Create a ceph pool that uses the new CRUSH Rule.
-
-::
-
-  root@host5:/# ceph osd pool create cmTestPool 2048 2048 replicated replicated_rack
-  pool 'cmTestPool' created
-  root@host5:/# /tmp/utils-checkPGs.py cmTestPool
-  Checking PGs in pool cmTestPool ... Passed
-
-
-utils-checkPGs.py Script
-========================
+The utils-checkPGs.py Script
+============================

 The purpose of the **utils-checkPGs.py** script is to check whether a PG has OSDs
 allocated from the same failure domain.  The violating PGs with their