PT LAG and ECMP resolution require JSON-RPC ID number as input for later cancellation

Change-Id: I3d0139f88df1407ea1f5c1678055be2bb852038b
This commit is contained in:
Vitaliy Kharechko 2016-09-22 14:55:21 +03:00
parent 363c4003e3
commit a72b75082e
4 changed files with 177 additions and 3 deletions

View File

@ -15,9 +15,12 @@
import json
from collections import OrderedDict
from agentconnection import AgentConnection
from broadviewconfig import BroadViewLibConfig
import fcntl
class AgentAPI(object):
__serial = 1
__serial = None
__cfg = BroadViewLibConfig()
def __init__(self):
self.__httpMethod = "POST"
self.__feature = None
@ -55,6 +58,88 @@ class AgentAPI(object):
if self.__payload:
ret = self.__payload["method"]
return ret
def getLastUsedSerial(self):
'''
Some requests to the agent, like PT's get lag resolution with a
non-zero collection interval, require knowlege of the JSON RPC
serial number because it is used as a key/index for later
cancelling the collection. While this violates layering, this is
no big deal if only one client (or client instance) ever connects
to an agent in the data center, and the client never generates
(within reason) a request with a duplicate ID that maps to an ID
used by the agent as an index.
When multiple clients, or even clients that are restarted enter
the picture, requiring clients to generate IDs that are
unique across applications and hosts is obviously more difficult.
Such difficulties are communication among clients within
the datacenter to ensure that no client uses the same IDs as any
other, and requiring each client to persist the last used ID in
case of crash or restart.
The strategy we take here to deal with this is to use a file to
store the last used ID, and protect that file with a SYSV file
lock. If a client wants to make a request, the lock is acquired,
the last used ID is read from the file, the ID is incremented by
1 and the file rewritten, followed by release of the lock. Sharing
across nodes in the datacenter can be acheived by all hosts using
a lock file that is located in shared filesystem location, such
as NFS.
Obviously all apps on a host or within the datacenter that access
a given agent must use the same ID file. If there are multiple agents
in the datacenter (which is going to be the case, as no interesting
datacenter is going to have one switch), there must be one (and only
one) id file per switch/agent.
We can eliminate this need for request ID management once the
agent adopts a method where it allocates and returns handles for
objects and tasks that it is managing to be used by clients to
refer to those same objects and tasks in subsequent requests.
This function exposes the last used ID so that the client can use
it in a subsequent (cancellation) request.
'''
return AgentAPI.__serial
def getIDFileLocation(self):
return AgentAPI.__cfg.getRequestIDFile()
def getNextSerial(self):
'''
Get the next request ID, updating the shared ID file. The name
of the ID file comes from a configuration file named
/etc/broadviewlib.conf via a setting named json_rpc_id_path. Here
is an example conf file that sets the ID file path to /tmp/foo.txt:
[misc]
json_rpc_id_path = /tmp/foo.txt
'''
filename = self.getIDFileLocation()
f = open(filename, "a").close() # create if needed
f = open(filename, "r+")
fd = f.fileno()
fcntl.lockf(fd, fcntl.LOCK_EX)
serial = f.read()
if len(serial) == 0:
serial = 1 # new file, read zero bytes, so initialize
else:
serial = int(serial)
f.seek(0)
f.write("{}".format(serial + 1))
fcntl.lockf(fd, fcntl.LOCK_UN)
f.close()
AgentAPI.__serial = serial # XXX not thread safe
return serial
def _send(self, o, timeout):
self.__payload = {}
@ -62,8 +147,7 @@ class AgentAPI(object):
self.__payload["asic-id"] = o["asic-id"]
self.__payload["method"] = o["method"]
self.__payload["params"] = o["params"]
self.__payload["id"] = AgentAPI.__serial
AgentAPI.__serial = AgentAPI.__serial + 1
self.__payload["id"] = self.getNextSerial()
conn = AgentConnection(self.__host, self.__port, self.__feature, timeout)
r = conn.makeRequest(self)
conn.close()

View File

@ -16,6 +16,27 @@ import ConfigParser
import json
import unittest
class BroadViewLibConfig():
cfg = None
def __init__(self):
if not BroadViewLibConfig.cfg:
try:
BroadViewLibConfig.cfg = ConfigParser.ConfigParser()
BroadViewLibConfig.cfg.read("/etc/broadviewlib.conf")
except:
BroadViewLibConfig.cfg = None
pass
def getRequestIDFile(self):
ret = "/tmp/bvserial.txt"
try:
ret = BroadViewLibConfig.cfg.get("misc", "json_rpc_id_path")
except:
pass
return ret
class BroadViewBSTSwitches():
cfg = None
bst_switches = None

View File

@ -343,6 +343,8 @@ class GetPacketTraceLAGResolution(AgentAPI):
rep = None
if status == 200:
self.__json = json["report"]
print self.__json
self.__json[0]["cancellation-id"] = self.getLastUsedSerial()
rep = PTParser()
rep.process(json)
else:
@ -402,6 +404,7 @@ class GetPacketTraceECMPResolution(AgentAPI):
rep = None
if status == 200:
self.__json = json["report"]
self.__json[0]["cancellation-id"] = self.getLastUsedSerial()
rep = PTParser()
rep.process(json)
else:
@ -461,6 +464,7 @@ class GetPacketTraceProfile(AgentAPI):
rep = None
if status == 200:
self.__json = json["report"]
self.__json[0]["cancellation-id"] = self.getLastUsedSerial()
rep = PTParser()
rep.process(json)
else:

View File

@ -236,10 +236,24 @@ class PTCommand():
def handleCancelProfile(self, args):
usage = False
reqid = None
usage, asic, host, port = self.getASICHostPort(args)
if not usage:
for arg in args:
if "id:" in arg:
v = arg.split(":")
if len(v) == 2:
reqid = int(v[1])
else:
print "invalid id: bad argument count"
usage = True
if reqid == None:
print "missing id argument"
usage = True
if not usage:
x = CancelPacketTraceProfile(host, port)
x.setASIC(asic)
x.setId(reqid)
status = x.send(self._timeout)
if status != 200:
print "failure: {}".format(status)
@ -249,13 +263,31 @@ class PTCommand():
def helpCancelProfile(self, name):
print name
print
print "args:"
print
print " id:id (see cancellation-id output from get-profile command)"
print " id:id"
def handleCancelLAGResolution(self, args):
usage = False
reqid = None
usage, asic, host, port = self.getASICHostPort(args)
if not usage:
for arg in args:
if "id:" in arg:
v = arg.split(":")
if len(v) == 2:
reqid = int(v[1])
else:
print "invalid id: bad argument count"
usage = True
if reqid == None:
print "missing id argument"
if not usage:
x = CancelPacketTraceLAGResolution(host, port)
x.setASIC(asic)
x.setId(reqid)
status = x.send(self._timeout)
if status != 200:
print "failure: {}".format(status)
@ -265,13 +297,30 @@ class PTCommand():
def helpCancelLAGResolution(self, name):
print name
print
print "args:"
print
print " id:id (see cancellation-id output from get-lag-resolution command"
def handleCancelECMPResolution(self, args):
usage = False
reqid = None
usage, asic, host, port = self.getASICHostPort(args)
if not usage:
for arg in args:
if "id:" in arg:
v = arg.split(":")
if len(v) == 2:
reqid = int(v[1])
else:
print "invalid id: bad argument count"
usage = True
if reqid == None:
print "missing id argument"
if not usage:
x = CancelPacketTraceECMPResolution(host, port)
x.setASIC(asic)
x.setId(reqid)
status = x.send(self._timeout)
if status != 200:
print "failure: {}".format(status)
@ -281,6 +330,10 @@ class PTCommand():
def helpCancelECMPResolution(self, name):
print name
print
print "args:"
print
print " id:id (see cancellation-id output from get-ecmp-resolution command"
def handleCancelSendDropPacket(self, args):
usage = False
@ -450,6 +503,10 @@ class PTCommand():
print " port-list:port[,port][,port]...[,port]"
print " collection-interval: interval"
print " drop-packet: [0|1]"
print
print "notes:"
print
print " use the cancellation-id output as id arg for cancel-lag-resolution command to cancel when collection-interval non-zero"
def handleGetECMPResolution(self, args):
usage = False
@ -510,6 +567,10 @@ class PTCommand():
print " port-list:port[,port][,port]...[,port]"
print " collection-interval: interval"
print " drop-packet: [0|1]"
print
print "notes:"
print
print " use the cancellation-id output as id arg for cancel-ecmp-resolution command to cancel when collection-interval non-zero"
def handleGetProfile(self, args):
usage = False
@ -575,6 +636,10 @@ class PTCommand():
print " port-list:port[,port][,port]...[,port]"
print " collection-interval: interval"
print " drop-packet: [0|1]"
print
print "notes:"
print
print " use the cancellation-id output as id arg for cancel-profile command to cancel when collection-interval non-zero"
def handleGetDropReason(self, args):
usage = False