blob: 696f58f7665082f4eb9c87825f0f075388f1aacb [file] [log] [blame]
David K. Bainbridgeb5415042016-05-13 17:06:10 -07001package main
2
3import (
David K. Bainbridgeefa951d2016-05-26 10:54:25 -07004 "bytes"
5 "encoding/json"
David K. Bainbridgeb5415042016-05-13 17:06:10 -07006 "fmt"
7 "log"
David K. Bainbridgeefa951d2016-05-26 10:54:25 -07008 "net/http"
David K. Bainbridgeb5415042016-05-13 17:06:10 -07009 "net/url"
David K. Bainbridgeefa951d2016-05-26 10:54:25 -070010 "os/exec"
David K. Bainbridgeb5415042016-05-13 17:06:10 -070011 "regexp"
12 "strconv"
13 "strings"
David K. Bainbridgeefa951d2016-05-26 10:54:25 -070014 "time"
David K. Bainbridgeb5415042016-05-13 17:06:10 -070015
16 maas "github.com/juju/gomaasapi"
17)
18
19// Action how to get from there to here
20type Action func(*maas.MAASObject, MaasNode, ProcessingOptions) error
21
22// Transition the map from where i want to be from where i might be
23type Transition struct {
24 Target string
25 Current string
26 Using Action
27}
28
29// ProcessingOptions used to determine on what hosts to operate
30type ProcessingOptions struct {
31 Filter struct {
32 Zones struct {
33 Include []string
34 Exclude []string
35 }
36 Hosts struct {
37 Include []string
38 Exclude []string
39 }
40 }
41 Mappings map[string]interface{}
42 Verbose bool
43 Preview bool
44 AlwaysRename bool
David K. Bainbridgeefa951d2016-05-26 10:54:25 -070045 ProvTracker Tracker
46 ProvisionURL string
47 ProvisionTTL time.Duration
David K. Bainbridgeb5415042016-05-13 17:06:10 -070048}
49
50// Transitions the actual map
51//
52// Currently this is a hand compiled / optimized "next step" table. This should
53// really be generated from the state machine chart input. Once this has been
54// accomplished you should be able to determine the action to take given your
55// target state and your current state.
David K. Bainbridgeefa951d2016-05-26 10:54:25 -070056var Transitions = map[string]map[string][]Action{
David K. Bainbridgeb5415042016-05-13 17:06:10 -070057 "Deployed": {
David K. Bainbridgeefa951d2016-05-26 10:54:25 -070058 "New": []Action{Reset, Commission},
59 "Deployed": []Action{Provision, Done},
60 "Ready": []Action{Reset, Aquire},
61 "Allocated": []Action{Reset, Deploy},
62 "Retired": []Action{Reset, AdminState},
63 "Reserved": []Action{Reset, AdminState},
64 "Releasing": []Action{Reset, Wait},
65 "DiskErasing": []Action{Reset, Wait},
66 "Deploying": []Action{Reset, Wait},
67 "Commissioning": []Action{Reset, Wait},
68 "Missing": []Action{Reset, Fail},
69 "FailedReleasing": []Action{Reset, Fail},
70 "FailedDiskErasing": []Action{Reset, Fail},
71 "FailedDeployment": []Action{Reset, Fail},
72 "Broken": []Action{Reset, Fail},
73 "FailedCommissioning": []Action{Reset, Fail},
David K. Bainbridgeb5415042016-05-13 17:06:10 -070074 },
75}
76
77const (
78 // defaultStateMachine Would be nice to drive from a graph language
79 defaultStateMachine string = `
80 (New)->(Commissioning)
81 (Commissioning)->(FailedCommissioning)
82 (FailedCommissioning)->(New)
83 (Commissioning)->(Ready)
84 (Ready)->(Deploying)
85 (Ready)->(Allocated)
86 (Allocated)->(Deploying)
87 (Deploying)->(Deployed)
88 (Deploying)->(FailedDeployment)
89 (FailedDeployment)->(Broken)
90 (Deployed)->(Releasing)
91 (Releasing)->(FailedReleasing)
92 (FailedReleasing)->(Broken)
93 (Releasing)->(DiskErasing)
94 (DiskErasing)->(FailedEraseDisk)
95 (FailedEraseDisk)->(Broken)
96 (Releasing)->(Ready)
97 (DiskErasing)->(Ready)
David K. Bainbridgeefa951d2016-05-26 10:54:25 -070098 (Broken)->(Ready)
99 (Deployed)->(Provisioning)
100 (Provisioning)->(ProvisionError)
101 (ProvisionError)->(Provisioning)
102 (Provisioning)->(Provisioned)`
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700103)
104
105// updateName - changes the name of the MAAS node based on the configuration file
106func updateNodeName(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
107 macs := node.MACs()
108
109 // Get current node name and strip off domain name
110 current := node.Hostname()
111 if i := strings.IndexRune(current, '.'); i != -1 {
112 current = current[:i]
113 }
114 for _, mac := range macs {
115 if entry, ok := options.Mappings[mac]; ok {
116 if name, ok := entry.(map[string]interface{})["hostname"]; ok && current != name.(string) {
117 nodesObj := client.GetSubObject("nodes")
118 nodeObj := nodesObj.GetSubObject(node.ID())
119 log.Printf("RENAME '%s' to '%s'\n", node.Hostname(), name.(string))
120
121 if !options.Preview {
122 nodeObj.Update(url.Values{"hostname": []string{name.(string)}})
123 }
124 }
125 }
126 }
127 return nil
128}
129
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700130// Reset we are at the target state, nothing to do
131var Reset = func(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
132 if options.Verbose {
133 log.Printf("RESET: %s", node.Hostname())
134 }
135
136 if options.AlwaysRename {
137 updateNodeName(client, node, options)
138 }
139
140 options.ProvTracker.Clear(node.ID())
141
142 return nil
143}
144
145// Provision we are at the target state, nothing to do
146var Provision = func(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
147 if options.Verbose {
148 log.Printf("CHECK PROVISION: %s", node.Hostname())
149 }
150
151 if options.AlwaysRename {
152 updateNodeName(client, node, options)
153 }
154
155 record, err := options.ProvTracker.Get(node.ID())
156 if options.Verbose {
157 log.Printf("[info] Current state of node '%s' is '%s'", node.Hostname(), record.State.String())
158 }
159 if err != nil {
160 log.Printf("[warn] unable to retrieve provisioning state of node '%s' : %s", node.Hostname(), err)
161 } else if record.State == Unprovisioned || record.State == ProvisionError {
162 var err error = nil
163 var callout *url.URL
164 log.Printf("PROVISION '%s'", node.Hostname())
165 if len(options.ProvisionURL) > 0 {
166 if options.Verbose {
167 log.Printf("[info] Provisioning callout to '%s'", options.ProvisionURL)
168 }
169 callout, err = url.Parse(options.ProvisionURL)
170 if err != nil {
171 log.Printf("[error] Failed to parse provisioning URL '%s' : %s", options.ProvisionURL, err)
172 } else {
173 ips := node.IPs()
174 ip := ""
175 if len(ips) > 0 {
176 ip = ips[0]
177 }
178 switch callout.Scheme {
179 // If the scheme is a file, then we will execute the refereced file
180 case "", "file":
181 if options.Verbose {
182 log.Printf("[info] executing local script file '%s'", callout.Path)
183 }
184 record.State = Provisioning
185 record.Timestamp = time.Now().Unix()
186 options.ProvTracker.Set(node.ID(), record)
187 err = exec.Command(callout.Path, node.ID(), node.Hostname(), ip).Run()
188 if err != nil {
189 log.Printf("[error] Failed to execute '%s' : %s", options.ProvisionURL, err)
190 } else {
191 if options.Verbose {
192 log.Printf("[info] Marking node '%s' with ID '%s' as provisioned",
193 node.Hostname(), node.ID())
194 }
195 record.State = Provisioned
196 options.ProvTracker.Set(node.ID(), record)
197 }
198
199 default:
200 if options.Verbose {
201 log.Printf("[info] POSTing to '%s'", options.ProvisionURL)
202 }
203 data := map[string]string{
204 "id": node.ID(),
205 "name": node.Hostname(),
206 "ip": ip,
207 }
208 hc := http.Client{}
209 var b []byte
210 b, err = json.Marshal(data)
211 if err != nil {
212 log.Printf("[error] Unable to marshal node data : %s", err)
213 } else {
214 var req *http.Request
215 var resp *http.Response
216 if options.Verbose {
217 log.Printf("[debug] POSTing data '%s'", string(b))
218 }
219 req, err = http.NewRequest("POST", options.ProvisionURL, bytes.NewReader(b))
220 if err != nil {
221 log.Printf("[error] Unable to construct POST request to provisioner : %s",
222 err)
223 } else {
224 req.Header.Add("Content-Type", "application/json")
225 resp, err = hc.Do(req)
226 if err != nil {
227 log.Printf("[error] Unable to process POST request : %s",
228 err)
229 } else {
230 defer resp.Body.Close()
231 if resp.StatusCode == 202 {
232 record.State = Provisioning
233 } else {
234 record.State = ProvisionError
235 }
236 options.ProvTracker.Set(node.ID(), record)
237 }
238 }
239 }
240 }
241 }
242 }
243
244 if err != nil {
245 if options.Verbose {
246 log.Printf("[warn] Not marking node '%s' with ID '%s' as provisioned, because of error '%s'",
247 node.Hostname(), node.ID(), err)
248 record.State = ProvisionError
249 options.ProvTracker.Set(node.ID(), record)
250 }
251 }
252 } else if record.State == Provisioning && time.Since(time.Unix(record.Timestamp, 0)) > options.ProvisionTTL {
253 log.Printf("[error] Provisioning of node '%s' has passed provisioning TTL of '%v'",
254 node.Hostname(), options.ProvisionTTL)
255 record.State = ProvisionError
256 options.ProvTracker.Set(node.ID(), record)
257 } else if record.State == Provisioning {
258 callout, err := url.Parse(options.ProvisionURL)
259 if err != nil {
260 log.Printf("[error] Unable to parse provisioning URL '%s' : %s", options.ProvisionURL, err)
261 } else if callout.Scheme != "file" {
262 var req *http.Request
263 var resp *http.Response
264 if options.Verbose {
265 log.Printf("[info] Fetching provisioning state for node '%s'", node.Hostname())
266 }
267 req, err = http.NewRequest("GET", options.ProvisionURL+"/"+node.ID(), nil)
268 if err != nil {
269 log.Printf("[error] Unable to construct GET request to provisioner : %s", err)
270 } else {
271 hc := http.Client{}
272 resp, err = hc.Do(req)
273 if err != nil {
274 log.Printf("[error] Failed to quest provision state for node '%s' : %s",
275 node.Hostname(), err)
276 } else {
277 switch resp.StatusCode {
278 case 200: // OK - provisioning completed
279 if options.Verbose {
280 log.Printf("[info] Marking node '%s' with ID '%s' as provisioned",
281 node.Hostname(), node.ID())
282 }
283 record.State = Provisioned
284 options.ProvTracker.Set(node.ID(), record)
285 case 202: // Accepted - in the provisioning state
286 // Noop, presumably alread in this state
287 default: // Consider anything else an erorr
288 record.State = ProvisionError
289 options.ProvTracker.Set(node.ID(), record)
290 }
291 }
292 }
293 }
294 } else if options.Verbose {
295 log.Printf("[info] Not invoking provisioning for '%s', currned state is '%s'", node.Hostname(),
296 record.State.String())
297 }
298
299 return nil
300}
301
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700302// Done we are at the target state, nothing to do
303var Done = func(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
304 // As devices are normally in the "COMPLETED" state we don't want to
305 // log this fact unless we are in verbose mode. I suspect it would be
306 // nice to log it once when the device transitions from a non COMPLETE
307 // state to a complete state, but that would require keeping state.
308 if options.Verbose {
309 log.Printf("COMPLETE: %s", node.Hostname())
310 }
311
312 if options.AlwaysRename {
313 updateNodeName(client, node, options)
314 }
315
316 return nil
317}
318
319// Deploy cause a node to deploy
320var Deploy = func(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
321 log.Printf("DEPLOY: %s", node.Hostname())
322
323 if options.AlwaysRename {
324 updateNodeName(client, node, options)
325 }
326
327 if !options.Preview {
328 nodesObj := client.GetSubObject("nodes")
329 myNode := nodesObj.GetSubObject(node.ID())
330 // Start the node with the trusty distro. This should really be looked up or
331 // a parameter default
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700332 _, err := myNode.CallPost("start", url.Values{"distro_series": []string{"trusty"}})
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700333 if err != nil {
334 log.Printf("ERROR: DEPLOY '%s' : '%s'", node.Hostname(), err)
335 return err
336 }
337 }
338 return nil
339}
340
341// Aquire aquire a machine to a specific operator
342var Aquire = func(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
343 log.Printf("AQUIRE: %s", node.Hostname())
344 nodesObj := client.GetSubObject("nodes")
345
346 if options.AlwaysRename {
347 updateNodeName(client, node, options)
348 }
349
350 if !options.Preview {
351 // With a new version of MAAS we have to make sure the node is linked
352 // to the subnet vid DHCP before we move to the Aquire state. To do this
353 // We need to unlink the interface to the subnet and then relink it.
354 //
355 // Iterate through all the interfaces on the node, searching for ones
356 // that are valid and not DHCP and move them to DHCP
357 ifcsObj := client.GetSubObject("nodes").GetSubObject(node.ID()).GetSubObject("interfaces")
358 ifcsListObj, err := ifcsObj.CallGet("", url.Values{})
359 if err != nil {
360 return err
361 }
362
363 ifcsArray, err := ifcsListObj.GetArray()
364 if err != nil {
365 return err
366 }
367
368 for _, ifc := range ifcsArray {
369 ifcMap, err := ifc.GetMap()
370 if err != nil {
371 return err
372 }
373
374 // Iterate over the links assocated with the interface, looking for
375 // links with a subnect as well as a mode of "auto"
376 links, ok := ifcMap["links"]
377 if ok {
378 linkArray, err := links.GetArray()
379 if err != nil {
380 return err
381 }
382
383 for _, link := range linkArray {
384 linkMap, err := link.GetMap()
385 if err != nil {
386 return err
387 }
388 subnet, ok := linkMap["subnet"]
389 if ok {
390 subnetMap, err := subnet.GetMap()
391 if err != nil {
392 return err
393 }
394
395 val, err := linkMap["mode"].GetString()
396 if err != nil {
397 return err
398 }
399
400 if val == "auto" {
401 // Found one we like, so grab the subnet from the data and
402 // then relink this as DHCP
403 cidr, err := subnetMap["cidr"].GetString()
404 if err != nil {
405 return err
406 }
407
408 fifcID, err := ifcMap["id"].GetFloat64()
409 if err != nil {
410 return err
411 }
412 ifcID := strconv.Itoa(int(fifcID))
413
414 flID, err := linkMap["id"].GetFloat64()
415 if err != nil {
416 return err
417 }
418 lID := strconv.Itoa(int(flID))
419
420 ifcObj := ifcsObj.GetSubObject(ifcID)
421 _, err = ifcObj.CallPost("unlink_subnet", url.Values{"id": []string{lID}})
422 if err != nil {
423 return err
424 }
425 _, err = ifcObj.CallPost("link_subnet", url.Values{"mode": []string{"DHCP"}, "subnet": []string{cidr}})
426 if err != nil {
427 return err
428 }
429 }
430 }
431 }
432 }
433 }
434 _, err = nodesObj.CallPost("acquire",
435 url.Values{"name": []string{node.Hostname()}})
436 if err != nil {
437 log.Printf("ERROR: AQUIRE '%s' : '%s'", node.Hostname(), err)
438 return err
439 }
440 }
441 return nil
442}
443
444// Commission cause a node to be commissioned
445var Commission = func(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
446 updateNodeName(client, node, options)
447
448 // Need to understand the power state of the node. We only want to move to "Commissioning" if the node
449 // power is off. If the node power is not off, then turn it off.
450 state := node.PowerState()
451 switch state {
452 case "on":
453 // Attempt to turn the node off
454 log.Printf("POWER DOWN: %s", node.Hostname())
455 if !options.Preview {
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700456 //POST /api/1.0/nodes/{system_id}/ op=stop
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700457 nodesObj := client.GetSubObject("nodes")
458 nodeObj := nodesObj.GetSubObject(node.ID())
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700459 _, err := nodeObj.CallPost("stop", url.Values{"stop_mode": []string{"soft"}})
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700460 if err != nil {
461 log.Printf("ERROR: Commission '%s' : changing power start to off : '%s'", node.Hostname(), err)
462 }
463 return err
464 }
465 break
466 case "off":
467 // We are off so move to commissioning
468 log.Printf("COMISSION: %s", node.Hostname())
469 if !options.Preview {
470 nodesObj := client.GetSubObject("nodes")
471 nodeObj := nodesObj.GetSubObject(node.ID())
472
473 updateNodeName(client, node, options)
474
475 _, err := nodeObj.CallPost("commission", url.Values{})
476 if err != nil {
477 log.Printf("ERROR: Commission '%s' : '%s'", node.Hostname(), err)
478 }
479 return err
480 }
481 break
482 default:
483 // We are in a state from which we can't move forward.
484 log.Printf("ERROR: %s has invalid power state '%s'", node.Hostname(), state)
485 break
486 }
487 return nil
488}
489
490// Wait a do nothing state, while work is being done
491var Wait = func(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
492 log.Printf("WAIT: %s", node.Hostname())
493 return nil
494}
495
496// Fail a state from which we cannot, currently, automatically recover
497var Fail = func(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
498 log.Printf("FAIL: %s", node.Hostname())
499 return nil
500}
501
502// AdminState an administrative state from which we should make no automatic transition
503var AdminState = func(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
504 log.Printf("ADMIN: %s", node.Hostname())
505 return nil
506}
507
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700508func findActions(target string, current string) ([]Action, error) {
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700509 targets, ok := Transitions[target]
510 if !ok {
511 log.Printf("[warn] unable to find transitions to target state '%s'", target)
512 return nil, fmt.Errorf("Could not find transition to target state '%s'", target)
513 }
514
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700515 actions, ok := targets[current]
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700516 if !ok {
517 log.Printf("[warn] unable to find transition from current state '%s' to target state '%s'",
518 current, target)
519 return nil, fmt.Errorf("Could not find transition from current state '%s' to target state '%s'",
520 current, target)
521 }
522
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700523 return actions, nil
524}
525
526// ProcessActions
527func ProcessActions(actions []Action, client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
528 var err error
529 for _, action := range actions {
530 if err = action(client, node, options); err != nil {
531 log.Printf("[error] Error while processing action for node '%s' : %s", node.Hostname, err)
532 break
533 }
534 }
535 return err
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700536}
537
538// ProcessNode something
539func ProcessNode(client *maas.MAASObject, node MaasNode, options ProcessingOptions) error {
540 substatus, err := node.GetInteger("substatus")
541 if err != nil {
542 return err
543 }
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700544 actions, err := findActions("Deployed", MaasNodeStatus(substatus).String())
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700545 if err != nil {
546 return err
547 }
548
549 if options.Preview {
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700550 ProcessActions(actions, client, node, options)
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700551 } else {
David K. Bainbridgeefa951d2016-05-26 10:54:25 -0700552 go ProcessActions(actions, client, node, options)
David K. Bainbridgeb5415042016-05-13 17:06:10 -0700553 }
554 return nil
555}
556
557func buildFilter(filter []string) ([]*regexp.Regexp, error) {
558
559 results := make([]*regexp.Regexp, len(filter))
560 for i, v := range filter {
561 r, err := regexp.Compile(v)
562 if err != nil {
563 return nil, err
564 }
565 results[i] = r
566 }
567 return results, nil
568}
569
570func matchedFilter(include []*regexp.Regexp, target string) bool {
571 for _, e := range include {
572 if e.MatchString(target) {
573 return true
574 }
575 }
576 return false
577}
578
579// ProcessAll something
580func ProcessAll(client *maas.MAASObject, nodes []MaasNode, options ProcessingOptions) []error {
581 errors := make([]error, len(nodes))
582 includeHosts, err := buildFilter(options.Filter.Hosts.Include)
583 if err != nil {
584 log.Fatalf("[error] invalid regular expression for include filter '%s' : %s", options.Filter.Hosts.Include, err)
585 }
586
587 includeZones, err := buildFilter(options.Filter.Zones.Include)
588 if err != nil {
589 log.Fatalf("[error] invalid regular expression for include filter '%v' : %s", options.Filter.Zones.Include, err)
590 }
591
592 for i, node := range nodes {
593 // For hostnames we always match on an empty filter
594 if len(includeHosts) >= 0 && matchedFilter(includeHosts, node.Hostname()) {
595
596 // For zones we don't match on an empty filter
597 if len(includeZones) >= 0 && matchedFilter(includeZones, node.Zone()) {
598 err := ProcessNode(client, node, options)
599 if err != nil {
600 errors[i] = err
601 } else {
602 errors[i] = nil
603 }
604 } else {
605 if options.Verbose {
606 log.Printf("[info] ignoring node '%s' as its zone '%s' didn't match include zone name filter '%v'",
607 node.Hostname(), node.Zone(), options.Filter.Zones.Include)
608 }
609 }
610 } else {
611 if options.Verbose {
612 log.Printf("[info] ignoring node '%s' as it didn't match include hostname filter '%v'",
613 node.Hostname(), options.Filter.Hosts.Include)
614 }
615 }
616 }
617 return errors
618}