CORD-229 - updated switch provisioning to be more consistent with retries
Change-Id: I1123da0ea22c97f8bf9b002580104f0ea2ee7f79
diff --git a/switchq/switchq.go b/switchq/switchq.go
index ca10f8f..b586d75 100644
--- a/switchq/switchq.go
+++ b/switchq/switchq.go
@@ -34,20 +34,19 @@
}
}
-func (c *Config) provision(rec AddressRec) error {
- log.Printf("[debug] Verifing that device '%s (%s)' isn't already in a provisioning state",
- rec.Name, rec.MAC)
+func (c *Config) getProvisionedState(rec AddressRec) (int, string, error) {
+ log.Printf("[debug] Fetching provisioned state of device '%s' (%s, %s)",
+ rec.Name, rec.IP, rec.MAC)
resp, err := http.Get(c.ProvisionURL + rec.MAC)
- log.Printf("%s%s", c.ProvisionURL, rec.MAC)
if err != nil {
- log.Printf("[error] Error while retrieving provisioning state for device '%s (%s)' : %s",
- rec.Name, rec.MAC, err)
- return err
+ log.Printf("[error] Error while retrieving provisioning state for device '%s (%s, %s)' : %s",
+ rec.Name, rec.IP, rec.MAC, err)
+ return -1, "", err
}
if resp.StatusCode != 404 && int(resp.StatusCode/100) != 2 {
- log.Printf("[error] Error while retrieving provisioning state for device '%s (%s)' : %s",
- rec.Name, rec.MAC, resp.Status)
- return fmt.Errorf(resp.Status)
+ log.Printf("[error] Error while retrieving provisioning state for device '%s (%s, %s)' : %s",
+ rec.Name, rec.IP, rec.MAC, resp.Status)
+ return -1, "", fmt.Errorf(resp.Status)
}
defer resp.Body.Close()
if resp.StatusCode != 404 {
@@ -55,27 +54,32 @@
var raw interface{}
err = decoder.Decode(&raw)
if err != nil {
- log.Printf("[error] Unable to unmarshal status response from provisioning service for device '%s (%s)' : %s",
- rec.Name, rec.MAC, err)
- return err
+ log.Printf("[error] Unmarshal provisioning service response for device '%s (%s, %s)' : %s",
+ rec.Name, rec.IP, rec.MAC, err)
+ return -1, "", err
}
status := raw.(map[string]interface{})
switch int(status["status"].(float64)) {
case 0, 1: // "PENDING", "RUNNING"
- log.Printf("[info] Device '%s (%s)' is already scheduled to be provisioned",
- rec.Name, rec.MAC)
- return nil
+ return int(status["status"].(float64)), "", nil
case 2: // "COMPLETE"
- // noop
+ return 2, "", nil
case 3: // "FAILED"
- c.storage.ClearProvisioned(rec.MAC)
+ return 3, status["message"].(string), nil
default:
err = fmt.Errorf("unknown provisioning status : %d", status["status"])
log.Printf("[error] received unknown provisioning status for device '%s (%s)' : %s",
rec.Name, rec.MAC, err)
- return err
+ return -1, "", err
}
}
+
+ // If we end up here that means that no record was found in the provisioning, so return
+ // a status of -1, w/o an error
+ return -1, "", nil
+}
+
+func (c *Config) provision(rec AddressRec) error {
log.Printf("[info] POSTing to '%s' for provisioning of '%s (%s)'", c.ProvisionURL, rec.Name, rec.MAC)
data := map[string]string{
"id": rec.MAC,
@@ -95,7 +99,7 @@
hc := http.Client{}
var b []byte
- b, err = json.Marshal(data)
+ b, err := json.Marshal(data)
if err != nil {
log.Printf("[error] Unable to marshal provisioning data : %s", err)
return err
@@ -107,7 +111,7 @@
}
req.Header.Add("Content-Type", "application/json")
- resp, err = hc.Do(req)
+ resp, err := hc.Do(req)
if err != nil {
log.Printf("[error] Unable to POST request to provisioner : %s", err)
return err
@@ -119,8 +123,6 @@
return err
}
- now := time.Now()
- c.storage.MarkProvisioned(rec.MAC, &now)
return nil
}
@@ -142,8 +144,49 @@
return err
}
+ if last == nil {
+ log.Printf("[debug] no TTL for device '%s' (%s, %s)",
+ rec.Name, rec.IP, rec.MAC)
+ } else {
+ log.Printf("[debug] TTL for device '%s' (%s, %s) is %v",
+ rec.Name, rec.IP, rec.MAC, *last)
+ }
+
+ // Verify if the provision status of the node is complete, if in an error state then TTL means
+ // nothing
+ state, message, err := c.getProvisionedState(rec)
+ switch state {
+ case 0, 1: // Pending or Running
+ log.Printf("[debug] device '%s' (%s, %s) is being provisioned",
+ rec.Name, rec.IP, rec.MAC)
+ return nil
+ case 2: // Complete
+ log.Printf("[debug] device '%s' (%s, %s) has completed provisioning",
+ rec.Name, rec.IP, rec.MAC)
+ // If no last record then set the TTL
+ if last == nil {
+ now := time.Now()
+ last = &now
+ c.storage.MarkProvisioned(rec.MAC, last)
+ log.Printf("[debug] Storing TTL for device '%s' (%s, %s) as %v",
+ rec.Name, rec.IP, rec.MAC, now)
+ return nil
+ }
+ case 3: // Failed
+ log.Printf("[debug] device '%s' (%s, %s) failed last provisioning with message '%s', reattempt",
+ rec.Name, rec.IP, rec.MAC, message)
+ c.storage.ClearProvisioned(rec.MAC)
+ last = nil
+ default: // No record
+ }
+
// If TTL is 0 then we will only provision a switch once.
if last == nil || (c.ttl > 0 && time.Since(*last) > c.ttl) {
+ if last != nil {
+ c.storage.ClearProvisioned(rec.MAC)
+ log.Printf("[debug] device '%s' (%s, %s) TTL expired, reprovisioning",
+ rec.Name, rec.IP, rec.MAC)
+ }
c.provision(rec)
} else if c.ttl == 0 {
log.Printf("[debug] device '%s' (%s, %s) has completed its one time provisioning, with a TTL set to %s",