diff --git a/broadview_lib/config/agentapi.py b/broadview_lib/config/agentapi.py index 128c834..c58caa5 100644 --- a/broadview_lib/config/agentapi.py +++ b/broadview_lib/config/agentapi.py @@ -15,9 +15,12 @@ import json from collections import OrderedDict from agentconnection import AgentConnection +from broadviewconfig import BroadViewLibConfig +import fcntl class AgentAPI(object): - __serial = 1 + __serial = None + __cfg = BroadViewLibConfig() def __init__(self): self.__httpMethod = "POST" self.__feature = None @@ -55,6 +58,88 @@ class AgentAPI(object): if self.__payload: ret = self.__payload["method"] return ret + + def getLastUsedSerial(self): + ''' + Some requests to the agent, like PT's get lag resolution with a + non-zero collection interval, require knowlege of the JSON RPC + serial number because it is used as a key/index for later + cancelling the collection. While this violates layering, this is + no big deal if only one client (or client instance) ever connects + to an agent in the data center, and the client never generates + (within reason) a request with a duplicate ID that maps to an ID + used by the agent as an index. + + When multiple clients, or even clients that are restarted enter + the picture, requiring clients to generate IDs that are + unique across applications and hosts is obviously more difficult. + Such difficulties are communication among clients within + the datacenter to ensure that no client uses the same IDs as any + other, and requiring each client to persist the last used ID in + case of crash or restart. + + The strategy we take here to deal with this is to use a file to + store the last used ID, and protect that file with a SYSV file + lock. If a client wants to make a request, the lock is acquired, + the last used ID is read from the file, the ID is incremented by + 1 and the file rewritten, followed by release of the lock. Sharing + across nodes in the datacenter can be acheived by all hosts using + a lock file that is located in shared filesystem location, such + as NFS. + + Obviously all apps on a host or within the datacenter that access + a given agent must use the same ID file. If there are multiple agents + in the datacenter (which is going to be the case, as no interesting + datacenter is going to have one switch), there must be one (and only + one) id file per switch/agent. + + We can eliminate this need for request ID management once the + agent adopts a method where it allocates and returns handles for + objects and tasks that it is managing to be used by clients to + refer to those same objects and tasks in subsequent requests. + + This function exposes the last used ID so that the client can use + it in a subsequent (cancellation) request. + + + ''' + + return AgentAPI.__serial + + def getIDFileLocation(self): + return AgentAPI.__cfg.getRequestIDFile() + + def getNextSerial(self): + + ''' + Get the next request ID, updating the shared ID file. The name + of the ID file comes from a configuration file named + /etc/broadviewlib.conf via a setting named json_rpc_id_path. Here + is an example conf file that sets the ID file path to /tmp/foo.txt: + + [misc] + + json_rpc_id_path = /tmp/foo.txt + + ''' + + filename = self.getIDFileLocation() + + f = open(filename, "a").close() # create if needed + f = open(filename, "r+") + fd = f.fileno() + fcntl.lockf(fd, fcntl.LOCK_EX) + serial = f.read() + if len(serial) == 0: + serial = 1 # new file, read zero bytes, so initialize + else: + serial = int(serial) + f.seek(0) + f.write("{}".format(serial + 1)) + fcntl.lockf(fd, fcntl.LOCK_UN) + f.close() + AgentAPI.__serial = serial # XXX not thread safe + return serial def _send(self, o, timeout): self.__payload = {} @@ -62,8 +147,7 @@ class AgentAPI(object): self.__payload["asic-id"] = o["asic-id"] self.__payload["method"] = o["method"] self.__payload["params"] = o["params"] - self.__payload["id"] = AgentAPI.__serial - AgentAPI.__serial = AgentAPI.__serial + 1 + self.__payload["id"] = self.getNextSerial() conn = AgentConnection(self.__host, self.__port, self.__feature, timeout) r = conn.makeRequest(self) conn.close() diff --git a/broadview_lib/config/broadviewconfig.py b/broadview_lib/config/broadviewconfig.py index f90fd66..a5e9873 100644 --- a/broadview_lib/config/broadviewconfig.py +++ b/broadview_lib/config/broadviewconfig.py @@ -16,6 +16,27 @@ import ConfigParser import json import unittest +class BroadViewLibConfig(): + cfg = None + + def __init__(self): + if not BroadViewLibConfig.cfg: + try: + BroadViewLibConfig.cfg = ConfigParser.ConfigParser() + BroadViewLibConfig.cfg.read("/etc/broadviewlib.conf") + except: + BroadViewLibConfig.cfg = None + pass + + def getRequestIDFile(self): + ret = "/tmp/bvserial.txt" + try: + ret = BroadViewLibConfig.cfg.get("misc", "json_rpc_id_path") + except: + pass + + return ret + class BroadViewBSTSwitches(): cfg = None bst_switches = None diff --git a/broadview_lib/config/pt.py b/broadview_lib/config/pt.py index 52b2502..dccb4cd 100644 --- a/broadview_lib/config/pt.py +++ b/broadview_lib/config/pt.py @@ -343,6 +343,8 @@ class GetPacketTraceLAGResolution(AgentAPI): rep = None if status == 200: self.__json = json["report"] + print self.__json + self.__json[0]["cancellation-id"] = self.getLastUsedSerial() rep = PTParser() rep.process(json) else: @@ -402,6 +404,7 @@ class GetPacketTraceECMPResolution(AgentAPI): rep = None if status == 200: self.__json = json["report"] + self.__json[0]["cancellation-id"] = self.getLastUsedSerial() rep = PTParser() rep.process(json) else: @@ -461,6 +464,7 @@ class GetPacketTraceProfile(AgentAPI): rep = None if status == 200: self.__json = json["report"] + self.__json[0]["cancellation-id"] = self.getLastUsedSerial() rep = PTParser() rep.process(json) else: diff --git a/broadview_lib/tools/bv-ptctl.py b/broadview_lib/tools/bv-ptctl.py index b39b979..aef4f4d 100644 --- a/broadview_lib/tools/bv-ptctl.py +++ b/broadview_lib/tools/bv-ptctl.py @@ -236,10 +236,24 @@ class PTCommand(): def handleCancelProfile(self, args): usage = False + reqid = None usage, asic, host, port = self.getASICHostPort(args) + if not usage: + for arg in args: + if "id:" in arg: + v = arg.split(":") + if len(v) == 2: + reqid = int(v[1]) + else: + print "invalid id: bad argument count" + usage = True + if reqid == None: + print "missing id argument" + usage = True if not usage: x = CancelPacketTraceProfile(host, port) x.setASIC(asic) + x.setId(reqid) status = x.send(self._timeout) if status != 200: print "failure: {}".format(status) @@ -249,13 +263,31 @@ class PTCommand(): def helpCancelProfile(self, name): print name + print + print "args:" + print + print " id:id (see cancellation-id output from get-profile command)" + print " id:id" def handleCancelLAGResolution(self, args): usage = False + reqid = None usage, asic, host, port = self.getASICHostPort(args) + if not usage: + for arg in args: + if "id:" in arg: + v = arg.split(":") + if len(v) == 2: + reqid = int(v[1]) + else: + print "invalid id: bad argument count" + usage = True + if reqid == None: + print "missing id argument" if not usage: x = CancelPacketTraceLAGResolution(host, port) x.setASIC(asic) + x.setId(reqid) status = x.send(self._timeout) if status != 200: print "failure: {}".format(status) @@ -265,13 +297,30 @@ class PTCommand(): def helpCancelLAGResolution(self, name): print name + print + print "args:" + print + print " id:id (see cancellation-id output from get-lag-resolution command" def handleCancelECMPResolution(self, args): usage = False + reqid = None usage, asic, host, port = self.getASICHostPort(args) + if not usage: + for arg in args: + if "id:" in arg: + v = arg.split(":") + if len(v) == 2: + reqid = int(v[1]) + else: + print "invalid id: bad argument count" + usage = True + if reqid == None: + print "missing id argument" if not usage: x = CancelPacketTraceECMPResolution(host, port) x.setASIC(asic) + x.setId(reqid) status = x.send(self._timeout) if status != 200: print "failure: {}".format(status) @@ -281,6 +330,10 @@ class PTCommand(): def helpCancelECMPResolution(self, name): print name + print + print "args:" + print + print " id:id (see cancellation-id output from get-ecmp-resolution command" def handleCancelSendDropPacket(self, args): usage = False @@ -450,6 +503,10 @@ class PTCommand(): print " port-list:port[,port][,port]...[,port]" print " collection-interval: interval" print " drop-packet: [0|1]" + print + print "notes:" + print + print " use the cancellation-id output as id arg for cancel-lag-resolution command to cancel when collection-interval non-zero" def handleGetECMPResolution(self, args): usage = False @@ -510,6 +567,10 @@ class PTCommand(): print " port-list:port[,port][,port]...[,port]" print " collection-interval: interval" print " drop-packet: [0|1]" + print + print "notes:" + print + print " use the cancellation-id output as id arg for cancel-ecmp-resolution command to cancel when collection-interval non-zero" def handleGetProfile(self, args): usage = False @@ -575,6 +636,10 @@ class PTCommand(): print " port-list:port[,port][,port]...[,port]" print " collection-interval: interval" print " drop-packet: [0|1]" + print + print "notes:" + print + print " use the cancellation-id output as id arg for cancel-profile command to cancel when collection-interval non-zero" def handleGetDropReason(self, args): usage = False