#!/usr/bin/env ruby # Copyright 2013 Mirantis, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. begin require 'rubygems' rescue LoadError end require 'facter' require 'json' require 'httpclient' require 'logger' require 'optparse' require 'yaml' require 'ipaddr' require 'rethtool' require 'digest' require 'timeout' require 'uri' require 'optparse' # TODO(vsharshov): replace below lines by this string after excluding Ruby 1.8 require 'pathname' require 'rexml/document' require 'socket' include REXML unless Process.euid == 0 puts "You must be root" exit 1 end ENV['PATH'] = "/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin" AGENT_CONFIG = "/etc/nailgun-agent/config.yaml" # look at https://github.com/torvalds/linux/blob/master/Documentation/devices.txt # KVM virtio volumes has code 252 in CentOS, but 253 in Ubuntu # Please also update the device codes here # https://github.com/stackforge/fuel-astute/blob/master/mcagents/erase_node.rb#L81 # NVMe has code 259 STORAGE_CODES = [3, 8, 9, 65, 66, 67, 68, 69, 70, 71, 104, 105, 106, 107, 108, 109, 110, 111, 202, 251, 252, 253, 259] REMOVABLE_VENDORS = [ "Adaptec", "IBM", "ServeRA", ] # PCI vendor IDs for Adaptec REMOVABLE_PCI_VENDORS = [ "0x1044", "0x9004", "0x9005", ] # Set default data structure for SR-IOV DEFAULT_SRIOV = { "sriov_totalvfs" => 0, "available" => false, "pci_id" => "" } def digest(body) if body.is_a? Hash digest body.map { |k,v| [digest(k),digest(v)].join("=>") }.sort elsif body.is_a? Array body.map{ |v| digest v }.join('|') else [body.class.to_s, body.to_s].join(":") end end def createsig(body) Digest::SHA1.hexdigest( digest body ) end class McollectiveConfig def initialize(logger) @logger = logger @configfile = '/etc/mcollective/server.cfg' end def get_config_by_key(find_key) found_key = nil found_value = nil # This code is from mcollective's sources File.open(@configfile, "r").each do |line| # strip blank spaces, tabs etc off the end of all lines line.gsub!(/\s*$/, "") unless line =~ /^#|^$/ if line =~ /(.+?)\s*=\s*(.+)/ key = $1 val = $2 if key == find_key found_key = key found_value = val end end end end found_value if found_key end def replace_identity(new_id) # check if id complies reqs raise 'Identities can only match /\w\.\-/' unless new_id.to_s.match(/^[\w\.\-]+$/) value_from_config = get_config_by_key('identity') if value_from_config == new_id.to_s @logger.info "MCollective is up to date with identity = #{new_id}" else config = File.open(@configfile, "rb").read if value_from_config # Key found, but it has other value @logger.info "Replacing identity in mcollective server.cfg to new value = '#{new_id}'" config.gsub!(/^identity[ =].*$/, "identity = #{new_id}") File.open(@configfile, "w") { |f| f.write(config) } else # if key was not found config += "\nidentity = #{new_id}\n" @logger.info "Identity in mcollective server.cfg has not been found. Setting to '#{new_id}'" File.open(@configfile, "w") { |f| f.write(config) } end puts `service mcollective restart` end end end class Offloading def initialize(name, sub) @name, @sub = name, sub end def to_json(options = {}) {'name' => @name, 'state' => nil, 'sub' => @sub}.to_json() end def to_s "#{@name}: #{@sub}" end end class NodeAgent API_DEFAULT_ADDRESS = "localhost" API_DEFAULT_PORT = "8443" API_LEGACY_PORT = "8000" def initialize(logger, dry_run) @logger = logger @settings = get_settings() unless dry_run @api_ip = URI(@settings['url']).host || API_DEFAULT_ADDRESS scheme, api_port = get_scheme_and_port @api_url = "#{scheme}://#{@api_ip}:#{api_port}/api" @logger.info("API URL is #{@api_url}") end @facter = facter_system_info @network = _network @numa_topology = get_numa_topology @mpath_devices, @skip_devices = multipath_devices end def get_scheme_and_port scheme, api_port = nil begin res = htclient.get("https://#{@api_ip}:#{API_DEFAULT_PORT}/") scheme, api_port = "https", API_DEFAULT_PORT rescue Errno::ECONNREFUSED @logger.warn("Connection Refused catched when trying connect to HTTPS port. Use plain HTTP") scheme, api_port = "http", API_LEGACY_PORT end return scheme, api_port end # transform string into Dictionary # For example, line: "initrd=/images/bootstrap/initramfs.img ksdevice=bootif lang=" # will be transformed into: {"mco_user"=>"mcollective", "initrd"=>"/images/bootstrap/initramfs.img", "lang"=>nil} def string_to_hash(string) hash = Hash.new string.split(' ').each do |pair| key,value = pair.split(/=/, 2) hash[key] = value end hash end def get_settings agent_settings = YAML.load_file(AGENT_CONFIG) rescue {} cmdline_settings = string_to_hash(File.read("/proc/cmdline")) rescue {} agent_settings.merge(cmdline_settings) end def facter_system_info Facter.loadfacts Facter.to_hash end def put headers = {"Content-Type" => "application/json"} @logger.debug("Trying to put host info into #{@api_url}") res = htclient.put("#{@api_url}/nodes/agent/", _data.to_json, headers) @logger.debug("Response: status: #{res.status} body: #{res.body}") if res.status < 200 or res.status >= 400 @logger.error("HTTP PUT failed: #{res.inspect}") end res end def post headers = {"Content-Type" => "application/json"} @logger.debug("Trying to create host using #{@api_url}") res = htclient.post("#{@api_url}/nodes/", _data.to_json, headers) @logger.debug("Response: status: #{res.status} body: #{res.body}") res end def htclient client = HTTPClient.new client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE client.ssl_config.ssl_version = :TLSv1 client.connect_timeout = 10 client.send_timeout = 10 client.receive_timeout = 10 # (mihgen): Nailgun may hang for a while, but 10sec should be enough for him to respond client end def _get_iface_info(ifname) info = {} info[:name] = ifname info[:addresses] = {} if ifname =~ /^(\D+)(\d+.*)/ # enp0s11, enp0, eth0 info[:type] = $1 # enp, enp, eth info[:number] = $2 # 0s11, 0, 0 end data = `ip a s dev #{ifname}` #2: enp0s3: mtu 1500 qdisc pfifo_fast master br-fw-admin state UP group default qlen 1000 # link/ether 64:de:13:ab:f4:1d brd ff:ff:ff:ff:ff:ff # inet6 fe80::66de:13ff:feab:f41d/64 scope link # valid_lft forever preferred_lft forever data.each_line do |line| case line.strip when /(\d+): #{ifname}: <([^>]*)> mtu (\d+) (.+) state (\w+)/ info[:flags] = $2.split(',') info[:mtu] = $3 info[:state] = $5.downcase when /link\/(\w+) ([\da-f\:]+) brd ([\da-f\:]+)/ info[:addresses][$2.upcase] = { :family => "lladdr" } if $2 != "00:00:00:00:00:00" info[:encapsulation] = case $1 when /loopback/i then 'Loopback' when /IPIP Tunnel/ then 'IPIP' when /Point-to-Point Protocol/ then 'PPP' when /IPv6-in-IPv4/ then '6to4' when /ether/ then'Ethernet' else nil end when /inet (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(\/(\d{1,2}))( brd (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))? scope (\w+)?/ info[:addresses][$1] = { :family => "inet", "prefixlen" => $3 ||32 } info[:addresses][$1][:scope] = ($6.eql?("host") ? "Node" : $6.capitalize) info[:addresses][$1][:netmask] = IPAddr.new("255.255.255.255").mask(($3 ||32).to_i).to_s info[:addresses][$1][:broadcast] = $5 when /inet6 ([a-f0-9\:]+)\/(\d+) scope (\w+)/ info[:addresses][$1] = { :family => "inet6", "prefixlen" => $2, "scope" => ($3.eql?("host") ? "Node" : $3.capitalize) } end end data = `ip -d link show dev #{ifname}` #2: enp0s3: mtu 1500 qdisc pfifo_fast master br-fw-admin state UP mode DEFAULT group default qlen 1000 # link/ether 64:de:13:ab:f4:1d brd ff:ff:ff:ff:ff:ff promiscuity 1 # bridge_slave state forwarding priority 32 cost 4 hairpin off guard off root_block off fastleave off learning on flood on addrgenmode eui64 data.each_line do |line| next if line =~ /^\d+/ if line =~ /state (\w+)/ info[:state] = $1.downcase end if line =~ /vlan id (\d+)/ vid = $1 info[:state][:vlan] = {} info[:state][:vlan][:id] = vid end end info end def _get_all_interfaces_info res = {} res[:interfaces] = {} Facter::Util::IP.get_interfaces().each do |ifname| res[:interfaces][ifname] = _get_iface_info(ifname) end %w[inet inet6].each do |family| #default via 10.109.3.1 dev br-ex #10.109.0.0/24 dev br-fw-admin proto kernel scope link src 10.109.0.4 #10.109.1.0/24 dev br-mgmt proto kernel scope link src 10.109.1.3 #10.109.2.0/24 dev br-storage proto kernel scope link src 10.109.2.3 #10.109.3.0/24 dev br-ex proto kernel scope link src 10.109.3.3 #240.0.0.0/30 dev hapr-host proto kernel scope link src 240.0.0.1 #240.0.0.4/30 dev vr-host-base proto kernel scope link src 240.0.0.5 `ip -f #{family} route show`.each_line do |line| if line =~ /^([^\s]+)\s(.*)$/ rdest = $1 rend = $2 next if not rend =~ /\bdev\s+([^\s]+)\b/ rint = $1 next if not res[:interfaces].has_key?(rint) rent = {:destination => rdest, :family => family} %w[via scope metric proto src].each do |k| rent[k] = $1 if rend =~ /\b#{k}\s+([^\s]+)\b/ end next if rent[:src] and not res[:interfaces][rint].has_key?(rent[:src]) res[:interfaces][rint][:routes] = [] if not res[:interfaces][rint][:routes] res[:interfaces][rint][:routes] << rent end end end res end def _network iface = nil gw = nil route = `ip r list 0/0`.strip # 'default via 10.21.5.1 dev eth0' if route =~ /^default via ?([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) dev ([a-zA-Z0-9_-]+)/ gw = $1 iface = $2 end result = _get_all_interfaces_info result[:default_gateway] = gw result[:default_interface] = iface result[:mac] = @facter["macaddress_#{iface.gsub('-', '_')}"].upcase result end def _get_detailed_cpuinfo real = {} info = {} info[:total] = 0 curr_proc = nil File.open('/proc/cpuinfo').each do |l| case l.strip when /processor\s+:\s(.+)/ info[:total] += 1 curr_proc = $1 info[curr_proc] = {} when /^cpu MHz\s+:\s(.+)/ info[curr_proc][:mhz] = $1 when /^physical id\s+:\s(.+)/ info[curr_proc][:physical_id] = $1 real[$1] = true when /^flags\s+:\s(.+)/ info[curr_proc][:flags] = $1.split when /^address sizes\s+:\s(\d+) bits (\w+), (\d+) bits (\w+)/ info[curr_proc][:address_sizes] = {} info[curr_proc][:address_sizes][$2.to_sym] = $1 info[curr_proc][:address_sizes][$4.to_sym] = $3 when /(.+)\s+:\s(.+)/ value = $2 key = $1.strip.downcase.gsub(/ /, '_') info[curr_proc][key.to_sym] = value end end info[:real] = real.keys.size info end def _get_blkdev_info info = {} if File.directory?('/sys/block/') begin Timeout::timeout(10) do Dir['/sys/block/*'].each do |blkdir| blkdev = File.basename(blkdir) info[blkdev] = Hash.new Dir.glob("/sys/block/#{blkdev}/{size,removable}").each do |g| File.open(g) { |f| info[blkdev][File.basename(g).to_sym] = f.read_nonblock(1024).strip } end Dir.glob("/sys/block/#{blkdev}/device/{model,rev,state,timeout,vendor}").each do |g| File.open(g) { |f| info[blkdev][File.basename(g).to_sym] = f.read_nonblock(1024).strip } end end # of blkdir end # of timeout rescue => e @logger.error("Error '#{e.message}' in gathering disks metadata: #{e.backtrace}") end end # File.directory info end def _get_dmi_info info = {} Dir['/sys/class/dmi/id/*'].each do |key| if File.file?(key) case File.basename(key) when /product_uuid/ File.open(key) {|f| info[:uuid] = f.read_nonblock(1024).strip} when /sys_vendor/ File.open(key) {|f| info[:sys_vendor] = info[:manufacturer] = f.read_nonblock(1024).strip} else File.open(key) {|f| info[File.basename(key).to_sym] = f.read_nonblock(1024).strip} end end end info end def _get_dmidecode_system_info info = {} info[:system] = {} # dmidecode -t system ## dmidecode 3.0 #Getting SMBIOS data from sysfs. #SMBIOS 2.8 present. # #Handle 0x0100, DMI type 1, 27 bytes #System Information # Manufacturer: QEMU # Product Name: Standard PC (i440FX + PIIX, 1996) # Version: pc-i440fx-2.4 # Serial Number: Not Specified # UUID: 0C2041CE-94E4-453D-95DD-1682D5D8E487 # Wake-up Type: Power Switch # SKU Number: Not Specified # Family: Not Specified # #Handle 0x2000, DMI type 32, 11 bytes #System Boot Information # Status: No errors detected `dmidecode -t system`.each_line do |l| case l.strip when /^SMBIOS (\S+)/ info[:dmidecode_version] = $1.strip when /(.+):(.+)/ k = $1 v = $2 info[:system][k.downcase.gsub(/([ -])/, '_').to_sym] = v.strip end end info end # transform input array into array of the objects # Example: # [{ # "state":null, # "sub":[ # { # "state":null, # "sub":[], # "name":"tx-checksum-ipv6" # }, # ........... # ], # "name":"tx-checksumming" # }, # { # "state":null, # "sub":[], # "name":"generic-segmentation-offload" # }, # ............. # ] def _parse_offloading(offloading_arr) return [] if offloading_arr.empty? inner = [] current = offloading_arr.shift() while offloading_arr.any? && offloading_arr.first().start_with?("\t") do inner << offloading_arr.shift()[1..-1] end res = _parse_offloading(offloading_arr) res << Offloading.new(current, _parse_offloading(inner)) end # Gets information about SR-IOV for specified pci slot # using 'lspci' utility. Example of output to parse: # ... # Capabilities: [160 v1] Single Root I/O Virtualization (SR-IOV) # IOVCap: Migration-, Interrupt Message Number: 000 # IOVCtl: Enable- Migration- Interrupt- MSE- ARIHierarchy- # IOVSta: Migration- # Initial VFs: 8, Total VFs: 8, Number of VFs: 0, Function Dependency Link: 01 # VF offset: 128, stride: 4, Device ID: 10ed # Supported Page Size: 00000553, System Page Size: 00000001 # Region 0: Memory at 0000000090040000 (64-bit, prefetchable) # Region 3: Memory at 0000000090060000 (64-bit, prefetchable) # VF Migration: offset: 00000000, BIR: 0 # ... def sriov_info(int, int_bus_info) sriov = DEFAULT_SRIOV.dup lspci = _get_lspci_info(int_bus_info) if lspci.match(/.*Capabilities:.*SR-IOV.*/) sriov["sriov_totalvfs"] = lspci.scan(/\s+Total\s+VFs:\s+(\d+)/).last.first.to_i unless sriov["sriov_totalvfs"] == 0 sriov["available"] = true sriov["sriov_totalvfs"] -= 1 end vf_vendor = File.read("/sys/class/net/#{int}/device/vendor").chomp.gsub(/^0x/, '') vf_device = lspci.scan(/VF\s+.*\s+Device\s+ID:\s+([A-Fa-f0-9]+)/).last.first sriov["pci_id"] = "#{vf_vendor}:#{vf_device}" end sriov rescue DEFAULT_SRIOV end def nic_pci_id(int) vendor = File.read("/sys/class/net/#{int}/device/vendor").chomp.gsub(/^0x/, '') device = File.read("/sys/class/net/#{int}/device/device").chomp.gsub(/^0x/, '') "#{vendor}:#{device}" rescue "" end def nic_numa_node(int_bus_info) numa_node = @numa_topology[:numa_nodes].select { |node| node[:pcidevs].include?(int_bus_info) } numa_node.first[:id].to_i rescue nil end def _is_in_bond(iface_name) File.exist? "/sys/class/net/#{iface_name}/master" rescue False end def _is_in_bridge(iface_name) File.exist? "/sys/class/net/#{iface_name}/brport" rescue False end def _get_iface_bridge_name(iface_name) File.basename(File.readlink("/sys/class/net/#{iface_name}/brport/bridge")) end def _get_iface_bond_name(iface_name) File.basename(File.readlink("/sys/class/net/#{iface_name}/master")) end def _get_interface_mac(iface_name, swaddr) # Get original mac excluding case with empty EEPROM data mac = "00:00:00:00:00:00" # It is a virtual device, lets read address file in sysfs if File.exist? "/sys/devices/virtual/net/#{iface_name}" File.open("/sys/devices/virtual/net/#{iface_name}/address") do |file| mac = file.readlines[0].chomp.downcase end return mac end # It is not a virtual device, lets ask ethtool first perm_addr = `ethtool -P #{iface_name}` begin re = eval '/(?<=Permanent address: )(?!00(:00){5}).+/' rescue SyntaxError re = perm_addr.match(/(00(:00){5})+/).nil? ? /[0-9a-f]+(:[0-9a-f]+){5}$/ : nil end mac = perm_addr.match(re)[0] rescue swaddr mac.downcase end def _get_parent_interface(iface_name) if _is_in_bond(iface_name) bond_name = _get_iface_bond_name(iface_name) if _is_in_bridge(bond_name) return _get_iface_bridge_name(bond_name) else return bond_name end elsif _is_in_bridge(iface_name) return _get_iface_bridge_name(iface_name) else iface_name end end def _detailed detailed_meta = { :system => _system_info, :interfaces => [], :cpu => { :total => (@facter['processorcount'].to_i rescue nil), :real => (@facter['physicalprocessorcount'].to_i rescue nil), :spec => [], }, :disks => [], :memory => (_dmi_memory or _facter_memory), :pci_devices => _get_pci_dev_list, :numa_topology => @numa_topology, } admin_mac = (_master_ip_and_mac[:mac] or @network[:mac]) rescue nil begin (@network[:interfaces] or {} rescue {}).each do |int, intinfo| #next if not intinfo.has_key?(:name) #int = intinfo[:name] # Send info about physical interfaces only next if int =~ /.*@.*/ next if intinfo[:encapsulation] !~ /^Ethernet.*/ # Avoid virtual devices like loopback, tunnels, bonding, vlans ... # TODO(vsharshov): replace below lines by this string after excluding Ruby 1.8 # next if File.realpath("/sys/class/net/#{int}") =~ /virtual/ next if Pathname.new("/sys/class/net/#{int}").realpath.to_s =~ /virtual/ # Avoid wireless next if File.exist?("/sys/class/net/#{int}/phy80211") || File.exist?("/sys/class/net/#{int}/wireless") # Skip virtual functions next if File.exists?("/sys/class/net/#{int}/device/physfn") int_meta = {:name => int} int_meta[:interface_properties] = {} int_meta[:state] = intinfo[:state] (intinfo[:addresses] or {} rescue {}).each do |addr, addrinfo| if (addrinfo[:family] rescue nil) =~ /lladdr/ # Get original mac excluding case with empty EEPROM data perm_addr = `ethtool -P #{int}` int_meta[:mac] = _get_interface_mac(int_meta[:name], addr) int_meta[:pxe] = _get_interface_mac(_get_parent_interface(int_meta[:name]), addr) == admin_mac.downcase begin int_info = Rethtool::InterfaceSettings.new(int) int_meta[:driver] = int_info.driver int_meta[:bus_info] = int_info.bus_info int_meta[:max_speed] = int_info.best_mode.speed if int_info.current_mode.speed == :unknown int_meta[:current_speed] = nil else int_meta[:current_speed] = int_info.current_mode.speed end rescue int_meta[:current_speed] = nil end unless int_meta[:driver] # Rethtool::InterfaceSettings calls two ioctls: with # ETHTOOL_CMD_GSET and ETHTOOL_CMD_GDRVINFO commands. # But for virtio adapters the first is not implemented, # but the second is. So try to get driver info at least # in this fallback chain. int_meta[:driver], int_meta[:bus_info] = _get_interface_driver_info(int) end elsif (addrinfo[:family] rescue nil) =~ /^inet$/ int_meta[:ip] = addr int_meta[:netmask] = addrinfo[:netmask] if addrinfo[:netmask] end end begin # this stuff will put all non-fixed offloading mode into array # collect names of non-fixed offloading modes # Example of ethtool -k ethX output: # tx-checksumming: on # tx-checksum-ipv4: on # tx-checksum-ip-generic: off [fixed] # tx-checksum-ipv6: on # tx-checksum-fcoe-crc: off [fixed] # tx-checksum-sctp: on # scatter-gather: on # tx-scatter-gather: on # tx-scatter-gather-fraglist: off [fixed] # generic-segmentation-offload: on offloading_data = `ethtool -k #{int}`.split("\n").reject { |offloading| offloading.include?("Features for") || offloading.include?("fixed") }.map { |offloading| offloading.split(':')[0] } # transform raw data into array of objects int_meta[:offloading_modes] = _parse_offloading(offloading_data) rescue # in case if we have no `ethtool` package installed we should # return empty array to support nailgun's rest api call int_meta[:offloading_modes] = [] end # Getting SR-IOV info int_meta[:interface_properties][:sriov] = sriov_info(int, int_meta[:bus_info]) # Get PCI-ID int_meta[:interface_properties][:pci_id] = nic_pci_id(int) # Get numa node int_meta[:interface_properties][:numa_node] = nic_numa_node(int_meta[:bus_info]) detailed_meta[:interfaces] << int_meta end rescue Exception => e @logger.error("Error '#{e.message}' in gathering interfaces metadata: #{e.backtrace}") end begin (_get_detailed_cpuinfo or {} rescue {}).each do |cpu, cpuinfo| if cpu =~ /^[\d]+/ and cpuinfo frequency = cpuinfo[:mhz].to_i rescue nil begin # ohai returns current frequency, try to get max if possible max_frequency = `cat /sys/devices/system/cpu/cpu#{cpu}/cpufreq/cpuinfo_max_freq 2>/dev/null`.to_i / 1000 frequency = max_frequency if max_frequency > 0 rescue end detailed_meta[:cpu][:spec] << { :frequency => frequency, :model => (cpuinfo[:model_name].gsub(/ +/, " ") rescue nil) } end end rescue Exception => e @logger.error("Error '#{e.message}' in gathering cpu metadata: #{e.backtrace}") end begin Timeout::timeout(30) do @logger.debug("Trying to find block devices") # ohai reports the disk size according to /sys/block/#{bname} # which is always measured in 512 bytes blocks, no matter what # the physical (minimal unit which can be atomically written) # or logical (minimal # unit which can be addressed) block sizes are, see # http://lxr.free-electrons.com/source/include/linux/types.h?v=4.4#L124 # http://lxr.free-electrons.com/source/drivers/scsi/sd.c?v=4.4#L2340 block_size = 512 (_get_blkdev_info or {} rescue {}).each do |bname, binfo| @logger.debug("Found block device: #{bname}") @logger.debug("Block device info: #{binfo.inspect}") dname = bname.gsub(/!/, '/') next if @skip_devices.include?(dname) if physical_data_storage_devices.map{|d| d[:name]}.include?(bname) && binfo @logger.debug("Block device seems to be physical data storage: #{bname}") block = physical_data_storage_devices.select{|d| d[:name] == bname}[0] if block[:removable] =~ /^1$/ && ! REMOVABLE_VENDORS.include?(binfo[:vendor]) pci_vendor_id = _get_pci_vendor_id(bname) @logger.debug("Block device #{bname} is removable. PCI vendor ID: #{pci_vendor_id}") unless REMOVABLE_PCI_VENDORS.include?(pci_vendor_id) next end @logger.debug("Block device #{bname} is accepted by PCI vendor ID") end detailed_meta[:disks] << { :name => dname, :model => binfo[:model], :size => (binfo[:size].to_i * block_size), :disk => block[:disk], :extra => block[:extra], :removable => block[:removable], :paths => nil, :opt_io => get_opt_io.fetch(dname, block_size) } elsif @mpath_devices.has_key?(dname) device = @mpath_devices[dname] detailed_meta[:disks] << { :name => 'mapper/' + device["DM_NAME"], :model => binfo[:model], :size => (binfo[:size].to_i * block_size), :disk => dname, :extra => _disk_id_by_name(dname), :removable => 0, :paths => device["DM_BLKDEVS_USED"].map{|name| _disk_path_by_name(name)}.join(', ') } end end @logger.debug("Detailed meta disks: #{detailed_meta[:disks].inspect}") end rescue Exception => e @logger.error("Error '#{e.message}' in gathering disks metadata: #{e.backtrace}") end detailed_meta end def _get_interface_driver_info(int) cmd_driver = Rethtool::EthtoolCmdDriver.new cmd_driver.cmd = Rethtool::ETHTOOL_CMD_GDRVINFO data = Rethtool.ioctl(int, cmd_driver) [data.driver.pack('c*').delete("\000"), data.bus_info.pack('c*').delete("\000")] rescue => e @logger.error("Error '#{e.message}' while obtaining #{int} driver info.") [nil, nil] end def multipath_devices dmsetup = `/sbin/dmsetup info -c --nameprefixes --noheadings -o blkdevname,subsystem,blkdevs_used,name,uuid` # Example output: # DM_BLKDEVNAME='dm-0':DM_SUBSYSTEM='mpath':DM_BLKDEVS_USED='sdb,sda':DM_NAME='31234567890abcdef':DM_UUID='mpath-31234567890abcdef' # DM_BLKDEVNAME='dm-1':DM_SUBSYSTEM='mpath':DM_BLKDEVS_USED='sdc,sdd':DM_NAME='92344567890abcdef':DM_UUID='mpath-92344567890abcdef' mpath_devices = {} mapping = [] unless dmsetup.include?("No devices found") dmsetup.lines.each do |line| device = {} line.split(/:/).each do |key_value| k, v = key_value.split('=') device[k] = v.strip().gsub(/'/, '') end next unless device["DM_SUBSYSTEM"] == 'mpath' device["DM_BLKDEVS_USED"] = device["DM_BLKDEVS_USED"].split(',') device["DM_BLKDEVS_USED"].each do | name | mapping << name end mpath_devices[device["DM_BLKDEVNAME"]] = device end end [mpath_devices, mapping] rescue => e @logger.error("Error '#{e.message}' while scanning for multipath devices.") [{}, []] end def _get_pci_vendor_id(devname) Timeout::timeout(30) do udevadm_walk = {} devpath = nil # expected output of `udevadm info --attribute-walk --name=#{devname}`: # # Udevadm info starts with the device specified by the devpath and then # walks up the chain of parent devices. It prints for every device # found, all possible attributes in the udev rules key format. # A rule to match, can be composed by the attributes of the device # and the attributes from one single parent device. # # looking at device '/devices/pci0000:00/0000:00:1e.0/0000:0d:02.0/8:0:0:1/block/sdc': # KERNEL=="sdc" # SUBSYSTEM=="block" # DRIVER=="" # ATTR{ro}=="0" # ATTR{size}=="30881792" # ATTR{removable}=="1" # # looking at parent device '/devices/pci0000:00/0000:00:1e.0/0000:0d:02.0': # Disk adapter plugged into PCIe slot, we need it's PCI vendor ID # KERNELS=="0000:0d:02.0" # SUBSYSTEMS=="pci" # DRIVERS=="" # ATTRS{device}=="0x9030" # ATTRS{vendor}=="0x10b5" # # looking at parent device '/devices/pci0000:00/0000:00:1e.0': # PCIe slot reported as a PCI bridge device, it's PCI vendor ID is NOT what we need # KERNELS=="0000:00:1e.0" # SUBSYSTEMS=="pci" # DRIVERS=="" # ATTRS{device}=="0x244e" # ATTRS{vendor}=="0x8086" # # looking at parent device '/devices/pci0000:00': # KERNELS=="pci0000:00" # SUBSYSTEMS=="" # DRIVERS=="" `udevadm info --attribute-walk --name=#{devname}`.split("\n").each do |line| line.strip! next unless line.start_with?('looking', 'KERNEL', 'SUBSYSTEM', 'DRIVER', 'ATTR') if line.start_with?('looking') devpath = line.split("'")[1] udevadm_walk[devpath] = {} else key, value = line.split("==").each { |a| a.strip! } udevadm_walk[devpath][key] = value.gsub(/(^")|("$)/, '') end end # We need a vendor ID of a disk adapter rather than vendor ID of the PCIe slot where it's plugged into. # Therefore we should pick the device with SUBSYSTEMS==pci having the longest devpath. # For the example given above, vendor ID should be found as '0x10b5'. # Next ID of '0x8086' belongs to PCIe slot to which PCIe RAID disk adapter is inserted. devpath = Hash[udevadm_walk.select { |k, v| v['SUBSYSTEMS'] == 'pci' }].keys.max udevadm_walk[devpath]['ATTRS{vendor}'] end rescue => e @logger.error("Error '#{e.message}' in obtaining PCI vendor ID: #{e.backtrace}") end def _disk_id_by_name(name) dn = "/dev/disk/by-id" basepath = Dir["#{dn}/**?"].select{|f| /\/#{name}$/.match(File.readlink(f))} basepath.map{|p| p.split("/")[2..-1].join("/")} end def _disk_path_by_name(name) dn = "/dev/disk/by-path" basepath = Dir["#{dn}/**?"].find{|f| /\/#{name}$/.match(File.readlink(f))} basepath.split("/")[2..-1].join("/") if basepath end # Sample mdadm --detail /dev/md127 output: # /dev/md127: # Version : 1.2 # Creation Time : Thu Oct 29 16:12:00 2015 # Raid Level : raid1 # Array Size : 1048000 (1023.61 MiB 1073.15 MB) # Used Dev Size : 1048000 (1023.61 MiB 1073.15 MB) # Raid Devices : 2 # Total Devices : 2 # Persistence : Superblock is persistent # # Update Time : Sun Nov 1 00:57:31 2015 # State : clean # Active Devices : 2 # Working Devices : 2 # Failed Devices : 0 # Spare Devices : 0 # # Name : agordeev:123 (local to host agordeev) # UUID : 7aa70afc:742a9fa6:45f9f5a1:25a2585f # Events : 20 # # Number Major Minor RaidDevice State # 0 252 2 0 active sync /dev/dm-2 # 1 252 3 1 active sync /dev/dm-3 # def _parse_md(data) md = {} begin description, _, components = data.split(/Number\s+Major\s+Minor\s+RaidDevice\s+(State\s+)?/m) line_patterns = ['Version', 'Raid Level', 'Raid Devices', 'Active Devices', 'Spare Devices', 'Failed Devices', 'State', 'UUID', 'Container'] for line in (description.split("\n")[1..-1] rescue []) line.strip! next if line == "" line_patterns.each { |pattern| md[pattern] = line.split(" : ").last if line.start_with?(pattern) } end md['devices'] = [] for line in (components.split("\n") rescue []) line.strip! next if line == "" md['devices'] << line.split().last end rescue Exception => e @logger.error("Error '#{e.message}' in parsing MD: #{e.backtrace}") end md end def _find_fake_raid_mds() mds = [] devices = [] begin Dir["/sys/block/*"].each do |block_device_dir| basename_dir = File.basename(block_device_dir) devname = basename_dir.gsub(/!/, '/') next unless devname.start_with?('md') md_data = _parse_md(`mdadm --detail /dev/#{devname}`) next if md_data['Raid Level'] == 'container' if md_data.has_key?("Container") devices.concat((md_data['devices'] or [])) mds << devname end end rescue Exception => e @logger.error("Error '#{e.message}' in finding fake raid MDs: #{e.backtrace}") end return mds, devices end def physical_data_storage_devices @blocks ||= [] return @blocks unless @blocks.empty? @logger.debug("Trying to get list of physical devices") raise "Path /sys/block does not exist" unless File.exists?("/sys/block") mds, devices = _find_fake_raid_mds() @logger.debug("Found fake RAIDs: #{mds}") @logger.debug("Found components of fake RAIDs: #{devices}") Dir["/sys/block/*"].each do |block_device_dir| basename_dir = File.basename(block_device_dir) # Entries in /sys/block for cciss look like cciss!c0d1 while # the entries in /dev look like /dev/cciss/c0d1. udevadm uses # the entry in /dev so we need to replace the ! to get a valid # device name. devname = basename_dir.gsub(/!/, '/') # Skipping MD if it's a container. Also skipping underlying # devices from which that container is composed. next if devices.include?("/dev/#{devname}") next if devname.start_with?('md') and not mds.include?(devname) @logger.debug("Getting udev properties for device: #{devname}") properties = `udevadm info --query=property --export --name=#{devname}`.split("\n").inject({}) do |result, raw_propety| key, value = raw_propety.split(/\=/) result.update(key.strip => value.strip.chomp("'").reverse.chomp("'").reverse) end @logger.debug("Device #{devname} udev properties: #{properties.inspect}") @logger.debug("Trying to find out if device #{devname} is removable or not") if File.exists?("/sys/block/#{basename_dir}/removable") removable = File.open("/sys/block/#{basename_dir}/removable"){ |f| f.read_nonblock(1024).strip } end @logger.debug("Device #{devname} removable parameter: #{removable.inspect}") if STORAGE_CODES.include?(properties['MAJOR'].to_i) @logger.debug("Device #{devname} seems to be appropriate") # Exclude LVM volumes (in CentOS - 253, in Ubuntu - 252) using additional check # Exclude any storage device connected through USB by the default next if properties['DEVPATH'].include?('virtual/block/dm') || (properties['ID_BUS'] == 'usb' && !@settings.has_key?("report_usb_block_devices")) @blocks << { :name => basename_dir, :disk => _disk_path_by_name(devname) || devname, :extra => _disk_id_by_name(devname) || [], :removable => removable, } end end @logger.debug("Final list of physical devices is: #{@blocks.inspect}") @blocks end def get_opt_io return @opt_io_res if defined?(@opt_io_res) @opt_io_res = {} # example output: # sda 4096 0 # sdb 512 2048 output = `lsblk --nodeps --bytes --noheadings --output NAME,MIN-IO,OPT-IO` output.split("\n").each do |line| name, min_io, opt_io = line.split() @opt_io_res[name] = opt_io.to_i != 0 ? opt_io.to_i : min_io.to_i end @opt_io_res rescue => e @logger.error("Error '#{e.message}' in obtaining optimal io size: #{e.backtrace}") @opt_io_res ||= {} end def _is_virtualbox @facter['productname'] == "VirtualBox" end def _is_virtual @facter[:is_virtual] end # JFYI: if /QEMU/ doesn't matched in /proc/cpuinfo # ohai[:virtualization] will return empty hash on kvm systems # So, this code have exactly same behavior. # But in my opinion here should be returned real value. def _manufacturer if _is_virtualbox @facter['productname'] elsif (@facter['manufacturer'].upcase != 'QEMU' && @facter['is_virtual']) @facter['virtual'] else @facter['manufacturer'] end end def _product_name unless _is_virtual @facter['productname'] end end def _serial @facter['serialnumber'] end # Returns unique identifier of machine # * for kvm virtual node will contain virsh UUID # * for physical HW that would be unique chassis id (from BIOS settings) # * for other hypervizors - not tested def uuid node_uuid = @facter['uuid'] node_uuid && node_uuid.strip end def _system_info { :manufacturer => _manufacturer, :serial => _serial, :uuid => uuid, :runtime_uuid => @settings['runtime_uuid'], :product => _product_name, :family => (_get_dmidecode_system_info[:system][:family].strip rescue nil), :version => _get_dmi_info[:chassis_version], :fqdn => (@facter['fqdn'].strip rescue @facter['hostname'].strip rescue nil), }.delete_if { |key, value| value.nil? or value.empty? or value == "Not Specified" } end def _size(size, unit) case unit when /^kb$/i size * 1024 when /^mb$/i size * 1048576 when /^gb$/i size * 1073741824 end end def _dmi_memory dmi = `/usr/sbin/dmidecode` info = {:devices => [], :total => 0, :maximum_capacity => 0, :slots => 0} return nil if $?.to_i != 0 dmi.split(/\n\n/).each do |group| if /^Physical Memory Array$/.match(group) if /^\s*Maximum Capacity:\s+(\d+)\s+(mb|gb|kb)/i.match(group) info[:maximum_capacity] += _size($1.to_i, $2) end if /^\s*Number Of Devices:\s+(\d+)/i.match(group) info[:slots] += $1.to_i end elsif /^Memory Device$/.match(group) device_info = {} if /^\s*Size:\s+(\d+)\s+(mb|gb|kb)/i.match(group) size = _size($1.to_i, $2) device_info[:size] = size info[:total] += size else next end if /^\s*Speed:\s+(\d+)\s+MHz/i.match(group) device_info[:frequency] = $1.to_i end if /^\s*Type:\s+(.*?)$/i.match(group) device_info[:type] = $1 end #if /^\s*Locator:\s+(.*?)$/i.match(group) # device_info[:locator] = $1 #end info[:devices].push(device_info) end end if info[:total] == 0 nil else info end end def _facter_memory info = {} size = @facter['memorysize'].gsub(/(kb|mb|gb)$/i, "").to_i rescue (return nil) info[:total] = _size(size, $1) info end def _get_ip_mac_pair_for(local_addr) @network[:interfaces].each do |int, intinfo| next unless intinfo.has_key?(:addresses) intinfo[:addresses].each do |k, v| # Here we need to check family because IPAddr.new with bad # data works very slow on some environments # https://bugs.launchpad.net/fuel/+bug/1284571 if v[:family] == 'inet' && !(IPAddr.new(k) rescue nil).nil? net = IPAddr.new("#{k}/#{v[:netmask]}") if net.include? local_addr mac = intinfo[:addresses].find { |_, info| info[:family] == 'lladdr' }[0] return {:ip => k, :mac => mac} end end end end {} end def _master_ip_and_mac_for_multirack rv = {} if File.exist?('/etc/astute.yaml') conf = YAML::load_file('/etc/astute.yaml') return {} unless conf.is_a?(Hash) e_point_name = conf.fetch('network_scheme', {}).fetch('roles', {}).fetch('admin/pxe', nil) e_point_ips = conf.fetch('network_scheme', {}).fetch('endpoints', {}).fetch(e_point_name, {}).fetch('IP', []) e_point_ips.each do |admin_ip| rv = _get_ip_mac_pair_for(admin_ip) break unless rv.empty? end end return rv end def _master_ip_and_mac rv = _get_ip_mac_pair_for(@api_ip) return (rv.empty? ? _master_ip_and_mac_for_multirack : rv) end def _data res = { :mac => (@network[:mac] rescue nil), :ip => (@facter['ipaddress'] rescue nil), :os_platform => (@facter['operatingsystem'].downcase rescue nil) } begin detailed_data = _detailed master_data=_master_ip_and_mac res.merge!({ :ip => (( master_data[:ip] or @facter['ipaddress']) rescue nil), :mac => (( master_data[:mac] or @network[:mac]) rescue nil), :manufacturer => _manufacturer, :platform_name => _product_name, :meta => detailed_data }) rescue Exception => e @logger.error("Error '#{e.message}' in metadata calculation: #{e.backtrace}") end res[:status] = @node_state if @node_state res[:is_agent] = true res[:agent_checksum] = createsig(res) res end def _get_pci_dev_list return {} if `cat /etc/nailgun_systemtype`.chomp != 'bootstrap' lshw_timeout = @settings['lshw_timeout'] || 60 Timeout::timeout(lshw_timeout) do lshw_path = `which lshw`.chomp if $?.success? data = `#{lshw_path} -json -sanitize` return JSON.parse(data) if $?.success? @logger.warn("Can't get data from lshw. Reason: lshw exited with status #{$?.exitstatus}") else @logger.warn("Can't find lshw. Reason: 'which lshw' returned exit status #{$?.exitstatus}") end end {} rescue => e @logger.warn("Can't get data from lshw. Reason: #{e.message}") {} end def get_numa_topology # Output EXAMPLE: # # # # # # # # # # ... # # # # # # # # # doc = Document.new `lstopo --no-caches --of xml` topology = {:numa_nodes => [], :supported_hugepages => supported_hugepages, :distances => [["1.0"]]} doc.elements.each('//distances/') do |dist| topology[:distances] = dist.elements.collect{|v| v.attributes['value']} .each_slice(dist.attributes['nbobjs'].to_i).to_a end numa_node = "//object[@type='NUMANode']" element = doc.elements["//object[@type='NUMANode']"] ? numa_node : "//object[@type='Machine']" doc.elements.each(element) do |numa| struct = {:id=> nil, :cpus => [], :memory => nil, :pcidevs => []} struct[:id] = numa.attributes['os_index'].to_i struct[:memory] = numa.attributes['local_memory'].to_i numa.elements.each("#{numa.xpath}//[@type='PU']") do |pu| struct[:cpus] << pu.attributes['os_index'].to_i end numa.elements.each("#{numa.xpath}//[@type='PCIDev']") do |pcidev| struct[:pcidevs] << pcidev.attributes['pci_busid'] end topology[:numa_nodes] << struct end topology rescue => e @logger.error "Something went wrong with parsing lstopo: #{e.backtrace}" nil end def supported_hugepages return [2048, 1048576] if _get_detailed_cpuinfo['0'][:flags].include?('pdpe1gb') return [2048] if _get_detailed_cpuinfo['0'][:flags].include?('pse') [] end def _get_lspci_info(device) lspci_path = `which lspci`.chomp if $?.success? data = `#{lspci_path} -vvv -s #{device}` if $?.success? return data else @logger.warn("Can't get data from lspci. Reason: lspci exited with status #{$?.exitstatus}") "" end else @logger.warn("Can't find lspci. Reason: 'which lspci' returned exit status #{$?.exitstatus}") "" end rescue => e @logger.warn("Can't get data from lspci for #{device} slot. Reason: #{e.message}") "" end def update_state @node_state = nil if File.exist?("/etc/nailgun_systemtype") fl = File.open("/etc/nailgun_systemtype", "r") system_type = fl.readline.rstrip @node_state = "discover" if system_type == "bootstrap" end end def print s = _data.to_json @logger.info("Data collected by nailgun-agent:") @logger.info(s) end end def write_data_to_file(logger, filename, data) if File.exist?(filename) File.open(filename, 'r') do |fo| text = fo.read end else text = '' end if text != data begin File.open(filename, 'w') do |fo| fo.write(data) end logger.info("Wrote data to file '#{filename}'. Data: #{data}") rescue Exception => e logger.warning("Can't write data to file '#{filename}'. Reason: #{e.message}") end else logger.info("File '#{filename}' is up to date.") end end def provisioned? Socket.gethostname != 'bootstrap' end dry_run = false OptionParser.new do |opts| opts.banner = "Usage: nailgun-agent [options]" opts.on("-d", "--dry-run", "Only print collected information, don't send it anywhere.") do |_d| dry_run = true end end.parse! logger = Logger.new(STDOUT) if File.exist?('/etc/nailgun_uid') logger.level = Logger::INFO else logger.level = Logger::DEBUG end # random sleep is here to prevent target nodes # from reporting to master node all at once unless dry_run sleep_time = rand(30) logger.debug("Sleep for #{sleep_time} seconds before sending request") sleep(sleep_time) end if File.exist?('/etc/nailgun-agent/nodiscover') logger.info("Discover prevented by /etc/nailgun-agent/nodiscover presence.") exit 1 end agent = NodeAgent.new(logger, dry_run) agent.update_state if dry_run agent.print exit 0 end begin unless File.exist?('/etc/nailgun_uid') resp = agent.post # We must not log 409 as error, after node is provisioned there will be no # /etc/nailgun_uid, it will be created after put request if [409, 403].include? resp.status resp = agent.put end else resp = agent.put # Handle case when node was removed, but nailgun_uid exist if resp.status == 400 resp = agent.post end end unless [201, 200].include? resp.status logger.error resp.body exit 1 end new_id = JSON.parse(resp.body)['id'] mc_config = McollectiveConfig.new(logger) unless provisioned? mc_config.replace_identity(new_id) end write_data_to_file(logger, '/etc/nailgun_uid', new_id.to_s) rescue => ex # NOTE(mihgen): There is no need to retry - cron will do it for us logger.error "#{ex.message}\n#{ex.backtrace}" end