summaryrefslogtreecommitdiff
path: root/deployment_scripts/sriov.sh
blob: 8a62fdd194bce9acdf46a86a7399aaa5a578a919 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
#!/bin/bash -x
# Copyright 2016 Mellanox Technologies, Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

readonly SCRIPT_DIR=$(dirname "$0")
source $SCRIPT_DIR/common
readonly SCRIPT_MODE=$1
readonly FALLBACK_NUM_VFS=8
readonly SRIOV_ENABLED_FLAG=1
readonly VF_MAC_CACHING_TIMEOUT=1
readonly VF_MAC_CACHING_TIMEOUT_DEF=300
readonly NEW_KERNEL_PARAM="intel_iommu=on"
readonly GRUB_FILE_CENTOS="/boot/grub/grub.conf"
readonly GRUB_FILE_UBUNTU="/boot/grub/grub.cfg"

function get_port_type() {
  if [ $NETWORK_TYPE == 'ethernet' ]; then
    port_type=2
  else
    port_type=1
  fi
  echo $port_type
}

function get_num_probe_vfs () {
  if [ `get_port_type` -eq "2" ]; then
    probe_vfs=`calculate_total_vfs`
  else
    probe_vfs=0
  fi
  echo $probe_vfs
}

function calculate_total_vfs () {
  # validate num of vfs is an integer, 0 <= num <= 64
  if [ "${USER_NUM_OF_VFS}" -ne "${USER_NUM_OF_VFS}" ] 2>/dev/null ||
      [ "${USER_NUM_OF_VFS}" -gt ${MAX_VFS} ] ||
      [ "${USER_NUM_OF_VFS}" -lt ${MIN_VFS} ]; then
    logger_print error "Illegal number of VFs ${USER_NUM_OF_VFS}, value
                        should be an integer between ${MIN_VFS},${MAX_VFS}"
    return 1
  fi
  num_of_vfs=0

  # Set Compute VFs storage network
  if [ $SRIOV == true ]; then

    # If ROLES not set and not controller or compute
    if ([ -z $ROLES ] && [[ ! $ROLE == *controller* ]]) || \
       ([ $ROLE == compute ] || [[ $ROLES == *compute* ]]); then
      num_of_vfs=${USER_NUM_OF_VFS}
    fi
  fi

  # Set Ethernet RDMA storage network
  if [ $ISER == true ] && [ `get_port_type` -eq "2" ] \
     && [ $num_of_vfs -eq 0 ]; then
    num_of_vfs=1
  fi

  # Enforce even num of vfs
  if [ $((${num_of_vfs} % 2)) -eq 1 ]; then
    let num_of_vfs="${num_of_vfs} + 1"
  fi
  echo ${num_of_vfs}
}

# Reduce mac caching time since VF is used for iSER with non permanent MAC
function reduce_mac_caching_timeout () {
  probes=`get_num_probe_vfs`
  if [ $probes -ge 1 ]; then
    timeout=$VF_MAC_CACHING_TIMEOUT
  else
    timeout=$VF_MAC_CACHING_TIMEOUT_DEF
  fi
  sysctl_conf set 'net.ipv4.route.gc_timeout' "$timeout"
}

function is_vxlan_offloading_required () {
  [ $VXLAN_OFFLOADING == true ]
  return $?
}

function set_modprobe_file () {
  PROBE_VFS=`get_num_probe_vfs`
  MLX4_CORE_FILE="/etc/modprobe.d/mlx4_core.conf"
  PORT_TYPE=`get_port_type`
  MLX4_CORE_STR="options mlx4_core
                 enable_64b_cqe_eqe=0"
  if [[ $DEBUG == "true" ]];then
    MLX4_CORE_STR="${MLX4_CORE_STR} debug_level=1"
  fi

  TOTAL_VFS=$1
  MLX4_CORE_STR="${MLX4_CORE_STR} port_type_array=${PORT_TYPE},${PORT_TYPE}"
  if [[ $TOTAL_VFS -gt 0 ]]; then
    if [ $PORT_TYPE -eq 1 ]; then
      num_vfs="${TOTAL_VFS}"
      probe_vf="${TOTAL_VFS}"
    else
      num_vfs="${TOTAL_VFS},0,0"
      probe_vf="${TOTAL_VFS},0,0"
    fi

    MLX4_CORE_STR="${MLX4_CORE_STR} num_vfs=$num_vfs"
    if [[ $PROBE_VFS -gt 0 ]]; then
      MLX4_CORE_STR="${MLX4_CORE_STR} probe_vf=$probe_vf"
    fi
  fi
  MLX4_CORE_STR="${MLX4_CORE_STR} log_num_mgm_entry_size=-1"
  echo ${MLX4_CORE_STR} > ${MLX4_CORE_FILE}

}

function set_kernel_params () {
  if [ "$DISTRO" == "redhat" ]; then
    grub_file=${GRUB_FILE_CENTOS}
    kernel_line=`egrep 'kernel\s+/vmlinuz' ${grub_file} | grep -v '#'`
  elif [ "$DISTRO" == "ubuntu" ]; then
    grub_file=${GRUB_FILE_UBUNTU}
    kernel_line=$(echo "$(egrep 'linux\s+/vmlinuz' ${grub_file} | grep -v '#')" | head -1)
  fi

  if [[ $? -ne 0 ]]; then
    echo "Couldn't find kernel line in grub file" >&2 && return 1
  fi
  if ! grep -q ${NEW_KERNEL_PARAM} ${grub_file} ; then
    line_num=$(echo "$(grep -n "${kernel_line}" ${grub_file} |cut -f1 -d: )" | head -1)
    new_kernel_line="${kernel_line} ${NEW_KERNEL_PARAM}"
    # delete original line
    sed -i "${line_num}d" ${grub_file}
    # insert the corrected line on the same line number
    sed -i "${line_num}i\ ${new_kernel_line}" ${grub_file}
  fi
  reduce_mac_caching_timeout
}

function burn_vfs_in_fw () {
  total_vfs=$1
  if [ $CX == 'ConnectX-3' ]; then
    # required for mlxconfig to discover mlnx devices
    service openibd start &>/dev/null
    service mst start &>/dev/null
    devices=$(mst status -v | grep $(echo $CX | tr -d '-')| grep pciconf | awk '{print $2}')
    for dev in $devices; do
      logger_print debug "device=$dev"
      mlxconfig -d $dev q | grep SRIOV | awk '{print $2}' | grep $SRIOV_ENABLED_FLAG  &>/dev/null
      sriov_enabled=$?
      current_num_of_vfs=`mlxconfig -d $dev q | grep NUM_OF_VFS | awk '{print $2}'`
      if [ $sriov_enabled -eq 0 ] 2>/dev/null; then
        logger_print debug "Detected SR-IOV is already enabled"
      else
        logger_print debug "Detected SR-IOV is disabled"
      fi
      if [[ ! "$total_vfs" == "$current_num_of_vfs" ]] 2>/dev/null; then
        logger_print debug "Current allowed number of VFs is ${current_num_of_vfs}, required number is ${total_vfs}"
        logger_print debug "Trying mlxconfig -y -d ${dev} s SRIOV_EN=1 NUM_OF_VFS=${total_vfs}"
        mlxconfig -y -d $dev s SRIOV_EN=1 NUM_OF_VFS=$total_vfs 2>&1 >/dev/null
        if [ $? -ne 0 ]; then
          logger_print error "Failed changing number of VFs in FW for HCA ${dev}"
        fi
      else
        logger_print debug "Current number of VFs is correctly set to ${current_num_of_vfs} in FW."
      fi
    done
    service mst stop &>/dev/null
  fi
  if [ $CX == 'ConnectX-4' ]; then
    # required for mlxconfig to discover mlnx devices
    service openibd start &>/dev/null
    service mst start &>/dev/null
    devices=$(mst status -v | grep $(echo $CX | tr -d '-') | grep pciconf | awk '{print $2}')
    for dev in $devices; do
      current_fw_vfs=`mlxconfig -d $dev q | grep NUM_OF_VFS | awk '{print $2}'`
      if [ "$total_vfs" -gt "$current_fw_vfs" ]; then
        logger_print debug "device=$dev"
        logger_print debug "Trying mlxconfig -d ${dev} -y set NUM_OF_VFS=${total_vfs}"
        mlxconfig -d $dev -y set NUM_OF_VFS=$total_vfs
      fi
    done
  fi
}

function is_sriov_required () {
  [ $SRIOV == true ] ||
  ( [ $ISER == true ] && [ `get_port_type` -eq "2" ] )
  return $?
}

function configure_sriov () {
  if is_sriov_required; then
    # Calculate the total amount of virtual functions, based on user seclection
    total_vfs=`calculate_total_vfs`
    if [ -z ${total_vfs} ]; then
      exit 1
    fi
    logger_print info "Configuring ${total_vfs} virtual functions
                       (only even number is currently supported)"

    probe_vfs=`get_num_probe_vfs`
    port_type=`get_port_type`
    set_kernel_params &&
    burn_vfs_in_fw $total_vfs
    if [ $CX == 'ConnectX-3' ]; then
      set_modprobe_file $total_vfs &&
      logger_print info "Detected: ConnectX-3 card"
    fi

    if [ $CX == 'ConnectX-4' ]; then
      set_sriov $total_vfs &&
      logger_print info "Detected: ConnectX-4 card"
    fi

    return $?
  else
    logger_print info "Skipping SR-IOV configuration"
    return 0
  fi
}

function validate_sriov () {
  if ! is_sriov_required; then
    logger_print info "Skipping SR-IOV validation, no virtual functions required"
    return 0
  fi
  logger_print info "Validating SR-IOV is enabled, and the required
                     amount of virtual functions exist"
  # get number of VFs
  current_num_vfs=`lspci | grep -i mellanox | grep -i virtual | wc -l`
  total_vfs=`calculate_total_vfs`
  if [ -z ${total_vfs} ]; then
    exit 1
  fi
  # check if kernel was loaded with the new parameter
  grep ${NEW_KERNEL_PARAM} /proc/cmdline
  has_kernel_param_status=$?
  if [ $has_kernel_param_status -eq 0 ]; then
    if [ $current_num_vfs -eq $total_vfs ]; then
      logger_print info "Successfully verified SR-IOV is enabled with ${current_num_vfs} VFs"
      return 0
    fi
  else
    logger_print error "Kernel did not come up with the kernel parameter: ${NEW_KERNEL_PARAM},
                        SR-IOV configuration failed"
    return 1
  fi

  # fallback only if kernel param exists and amount of vfs is not as expcted
  logger_print error "Failed , trying to fallback to ${FALLBACK_NUM_VFS}"
  probe_vfs=`get_num_probe_vfs`
  port_type=`get_port_type`

  if [ $CX == 'ConnectX-3' ]; then
    set_modprobe_file $FALLBACK_NUM_VFS
    service openibd restart &> /dev/null
  fi
  if [ $CX == 'ConnectX-4' ]; then
    set_sriov $FALLBACK_NUM_VFS
  fi

  current_num_vfs=`lspci | grep -i mellanox | grep -i virtual | wc -l`
  if [ $current_num_vfs -eq $FALLBACK_NUM_VFS ]; then
    logger_print info "Fallback to ${FALLBACK_NUM_VFS} succeeded"
    return 0
  else
    logger_print error "Failed to configure SR-IOV"
    return 1
  fi
}

function set_sriov () {
  PORT_TYPE=`get_port_type`
  TOTAL_VFS=$1
  device_up=$PHYSICAL_PORT

  if [ ${#device_up} -eq 0 ]; then
    logger_print error "Failed to find mlx5 up ports in ibdev2netdev."
    exit 1
  else
    if [ "$(lspci | grep -i mellanox | grep -i virtual | wc -l)" -ne "$TOTAL_VFS" ]; then

      if [ ! $REBOOT_REQUIRED == true ] || [ $SCRIPT_MODE == "validate" ] ; then
        res=`echo 0 > /sys/class/net/${device_up}/device/mlx5_num_vfs`
        res=`echo ${TOTAL_VFS} > /sys/class/net/${device_up}/device/mlx5_num_vfs`
        if [ ! $? -eq 0 ]; then
          logger_print error "Failed to write $TOTAL_VFS > /sys/class/net/${device_up}/device/mlx5_num_vfs"
          exit 1
        else
          logger_print debug "Configured total vfs ${TOTAL_VFS} on ${device_up}"
        fi

        # Give MACs to created VFs
        python ./configure_mellanox_vfs.py ${TOTAL_VFS}
      fi

      # Make number of VFs persistent
      persistent_ifup_script=/etc/network/if-up.d/persistent_mlnx_params
      echo "#!/bin/bash" > $persistent_ifup_script
      chmod +x $persistent_ifup_script
      echo "# VFs Persistency" >> $persistent_ifup_script
      echo "if ! lspci | grep -i mellanox | grep -i virtual; then" >> $persistent_ifup_script
      echo "echo 0 > /sys/class/net/${device_up}/device/mlx5_num_vfs" >> $persistent_ifup_script
      echo "echo ${TOTAL_VFS} > /sys/class/net/${device_up}/device/mlx5_num_vfs" >> $persistent_ifup_script
      echo "fi" >> $persistent_ifup_script

      # Make the MAC for iser vf persistent
      echo "# MACs Persistency" >> $persistent_ifup_script
      echo 'if ip link show | grep "vf 0"| grep "MAC 00:00:00:00:00:00"; then' >> $persistent_ifup_script
      echo "python /etc/fuel/plugins/mellanox-plugin-*/configure_mellanox_vfs.py ${TOTAL_VFS}" >> $persistent_ifup_script
      echo "fi" >> $persistent_ifup_script
      echo "if [ -f /etc/init.d/tgt ]; then /etc/init.d/tgt force-reload; else exit 0; fi" >> $persistent_ifup_script

      if [ $REBOOT_REQUIRED == true ] && [ $SCRIPT_MODE == "configure" ] ; then
        logger_print debug "Configured total vfs ${TOTAL_VFS} on ${device_up} will apply \
                            on next reboot as reboot is required"
      fi

      if [ $SCRIPT_MODE == "validate" ] ; then
        logger_print debug "Configured total vfs ${TOTAL_VFS} on ${device_up}"
      fi
    fi
  fi
}
#################

case $SCRIPT_MODE in
  'configure')
    configure_sriov
    ;;
  'validate')
    validate_sriov
    ;;
  *)
    logger_print error "Unsupported execution mode ${SCRIPT_MODE}"
    exit 1
  ;;
esac

# Setting interfaces up
for interface in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | \
sed 's/://'`;do ifconfig $interface up; done
exit $?