blob: a107db835f2454b1324834b93d3fc06ce810b733 [file] [log] [blame]
Matteo Scandolo37efb3d2017-08-09 16:36:09 -07001#!/usr/bin/env python
Matteo Scandolo60b640f2017-08-08 13:05:22 -07002
3# Copyright 2017-present Open Networking Foundation
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
Zack Williamsce63eb02017-02-28 10:46:22 -070017# imagebuilder.py
18# rebuilds/fetches docker container images per their git status in repo
19# in addition to docker, needs `sudo apt-get install python-git`
20
21import argparse
22import datetime
23import git
24import json
25import logging
26import os
27import re
28import string
29import sys
30import tarfile
31import tempfile
32import time
33import xml.etree.ElementTree as ET
34import yaml
35
36global args
37global conf
38global build_tag
39global buildable_images
40global pull_only_images
41
42
43def setup_logging(name=None, logfile=False):
44 global args
45
46 if name:
47 log = logging.getLogger("-".join([__name__, name]))
48 else:
49 log = logging.getLogger(__name__)
50
51 slh = logging.StreamHandler(sys.stdout)
52 slh.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
53 slh.setLevel(logging.DEBUG)
54
55 log.addHandler(slh)
56
57 # secondary logging to a file, always DEBUG level
58 if logfile:
59 fn = os.path.join(conf.logdir, "%s.log" % name)
60 flh = logging.FileHandler(fn)
61 flh.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
62 flh.setLevel(logging.DEBUG)
63 log.addHandler(flh)
64
65 return log
66
Zack Williams12d029c2017-10-31 14:11:39 -070067
Zack Williamsce63eb02017-02-28 10:46:22 -070068LOG = setup_logging()
69
70
71def parse_args():
72 global args
73
74 parser = argparse.ArgumentParser()
75
76 parser.add_argument('-c', '--container_list', default='docker_images.yml',
77 type=argparse.FileType('r'),
78 help="YAML Config and master container list")
79
80 # -f is optional, so using type=argparse.FileType is problematic
81 parser.add_argument('-f', '--filter_images', default=None, action='store',
82 help="YAML file restricting images to build/fetch")
83
84 parser.add_argument('-a', '--actions_taken', default=None,
85 help="Save a YAML file with actions taken during run")
86
87 # FIXME - the -b and -p options are currently unimplemented
88 group = parser.add_mutually_exclusive_group()
89
90 group.add_argument('-b', '--build_force', action="store_true",
91 help="Build (don't fetch) all internal containers")
92
93 group.add_argument('-p', '--pull_force', action="store_true",
94 help="Only pull containers, fail if build required")
95
96 parser.add_argument('-d', '--dry_run', action="store_true",
97 help="Don't build/fetch anything")
98
99 parser.add_argument('-g', '--graph', default=None,
100 help="Filename for DOT graph file of image dependency")
101
102 parser.add_argument('-l', '--build_log_dir', action="store",
103 help="Log build output to this dir if set")
104
105 parser.add_argument('-r', '--repo_root', default="..", action="store",
106 help="Repo root directory")
107
108 parser.add_argument('-t', '--build_tag', default=None, action="store",
109 help="tag all images built/pulled using this tag")
110
111 parser.add_argument('-v', '--verbosity', action='count', default=1,
112 help="Repeat to increase log level")
113
114 args = parser.parse_args()
115
116 if args.verbosity > 1:
117 LOG.setLevel(logging.DEBUG)
118 else:
119 LOG.setLevel(logging.INFO)
120
121
122def load_config():
123 global args
124 global conf
125 global buildable_images
126 global pull_only_images
127 global build_tag
128
129 try:
130 cl_abs = os.path.abspath(args.container_list.name)
131 LOG.info("Master container list file: %s" % cl_abs)
132
133 conf = yaml.safe_load(args.container_list)
134 except yaml.YAMLError:
135 LOG.exception("Problem loading container list file")
136 sys.exit(1)
137
138 if args.build_tag:
139 build_tag = args.build_tag
140 else:
141 build_tag = conf['docker_build_tag']
142
143 if args.filter_images is None:
144 buildable_images = conf['buildable_images']
145 pull_only_images = conf['pull_only_images']
146 else:
147 fi_abs = os.path.abspath(args.filter_images)
148
149 LOG.info("Filtering image list per 'docker_image_whitelist' in: %s" %
150 fi_abs)
151 try:
152 fi_fh = open(fi_abs, 'r')
153 filter_list = yaml.safe_load(fi_fh)
154 fi_fh.close()
155
156 if 'docker_image_whitelist' not in filter_list:
157 LOG.error("No 'docker_image_whitelist' defined in: %s" %
158 fi_abs)
159 sys.exit(1)
160
161 # fail if filter list specifies tags
162 for f_i in filter_list['docker_image_whitelist']:
163 (name, tag) = split_name(f_i)
164 if tag:
165 LOG.error("filter list may not be tagged")
166 sys.exit(1)
167
168 buildable_images = [img for img in conf['buildable_images']
169 if split_name(img['name'])[0]
170 in filter_list['docker_image_whitelist']]
171
172 pull_only_images = [img for img in conf['pull_only_images']
173 if split_name(img)[0]
174 in filter_list['docker_image_whitelist']]
175
176 except:
177 LOG.exception("Problem with filter list file")
178 sys.exit(1)
179
180
181def split_name(input_name):
182 """ split a docker image name in the 'name:tag' format into components """
183
184 name = input_name
185 tag = None
186
187 # split name:tag if given in combined format
188 name_tag_split = string.split(input_name, ":")
189
190 if len(name_tag_split) > 1: # has tag, return separated version
191 name = name_tag_split[0]
192 tag = name_tag_split[1]
193
194 return (name, tag)
195
196
197class RepoRepo():
198 """ git repo managed by repo tool"""
199
200 manifest_branch = ""
201
202 def __init__(self, name, path, remote):
203
204 self.name = name
205 self.path = path
206 self.remote = remote
207 self.git_url = "%s%s" % (remote, name)
208
209 try:
210 self.git_repo_o = git.Repo(self.abspath())
211 LOG.debug("Repo - %s, path: %s" % (name, path))
212
213 self.head_commit = self.git_repo_o.head.commit.hexsha
214 LOG.debug(" head commit: %s" % self.head_commit)
215
216 commit_t = time.gmtime(self.git_repo_o.head.commit.committed_date)
217 self.head_commit_t = time.strftime("%Y-%m-%dT%H:%M:%SZ", commit_t)
218 LOG.debug(" commit date: %s" % self.head_commit_t)
219
220 self.clean = not self.git_repo_o.is_dirty(untracked_files=True)
221 LOG.debug(" clean: %s" % self.clean)
222
223 # list of untracked files (expensive operation)
224 self.untracked_files = self.git_repo_o.untracked_files
225 for u_file in self.untracked_files:
226 LOG.debug(" Untracked: %s" % u_file)
227
228 except Exception:
229 LOG.exception("Error with git repo: %s" % name)
230 sys.exit(1)
231
232 def abspath(self):
233 global args
234 return os.path.abspath(os.path.join(args.repo_root, self.path))
235
236 def path_clean(self, test_path, branch=""):
237 """ Is working tree on branch and no untracked files in path? """
238 global conf
239
240 if not branch:
241 branch = self.manifest_branch
242
243 LOG.debug(" Looking for changes in path: %s" % test_path)
244
245 p_clean = True
246
247 # diff between branch head and working tree (None)
248 branch_head = self.git_repo_o.commit(branch)
249 diff = branch_head.diff(None, paths=test_path)
250
251 if diff:
252 p_clean = False
253
254 for diff_obj in diff:
255 LOG.debug(" file not on branch: %s" % diff_obj)
256
257 # remove . to compare paths using .startswith()
258 if test_path == ".":
259 test_path = ""
260
261 for u_file in self.untracked_files:
262 if u_file.startswith(test_path):
263 LOG.debug(" untracked file in path: %s" % u_file)
264 p_clean = False
265
266 return p_clean
267
268
269class RepoManifest():
270 """ parses manifest XML file used by repo tool"""
271
272 def __init__(self):
273 global args
274 global conf
275
276 self.manifest_xml = {}
277 self.repos = {}
278 self.branch = ""
279
280 self.manifest_file = os.path.abspath(
281 os.path.join(args.repo_root,
282 ".repo/manifest.xml"))
283
284 LOG.info("Loading manifest file: %s" % self.manifest_file)
285
286 try:
287 tree = ET.parse(self.manifest_file)
288 self.manifest_xml = tree.getroot()
289 except Exception:
290 LOG.exception("Error loading repo manifest")
291 sys.exit(1)
292
293 # Find the default branch
294 default = self.manifest_xml.find('default')
295 self.branch = "%s/%s" % (default.attrib['remote'],
296 default.attrib['revision'])
297
298 # Find the remote URL for these repos
299 remote = self.manifest_xml.find('remote')
300 self.remote = remote.attrib['review']
301
302 LOG.info("Manifest is on branch '%s' with remote '%s'" %
303 (self.branch, self.remote))
304
305 project_repos = {}
306
307 for project in self.manifest_xml.iter('project'):
308 repo_name = project.attrib['name']
309 rel_path = project.attrib['path']
310 abs_path = os.path.abspath(os.path.join(args.repo_root,
311 project.attrib['path']))
312
313 if os.path.isdir(abs_path):
314 project_repos[repo_name] = rel_path
315 else:
316 LOG.debug("Repo in manifest but not checked out: %s" %
317 repo_name)
318
319 for repo_name, repo_path in project_repos.iteritems():
320 self.repos[repo_name] = RepoRepo(repo_name, repo_path, self.remote)
321 self.repos[repo_name].manifest_branch = self.branch
322
323 def get_repo(self, repo_name):
324 return self.repos[repo_name]
325
Zack Williams12d029c2017-10-31 14:11:39 -0700326
Zack Williamsce63eb02017-02-28 10:46:22 -0700327# DockerImage Status Constants
328
329DI_UNKNOWN = 'unknown' # unknown status
330DI_EXISTS = 'exists' # already exists in docker, has an image_id
331
332DI_BUILD = 'build' # needs to be built
333DI_FETCH = 'fetch' # needs to be fetched (pulled)
334DI_ERROR = 'error' # build or other fatal failure
335
336
337class DockerImage():
338
339 def __init__(self, name, repo_name=None, repo_d=None, path=".",
340 context=".", dockerfile='Dockerfile', labels=None,
341 tags=None, image_id=None, components=None, status=DI_UNKNOWN):
342
343 LOG.debug("New DockerImage object from name: %s" % name)
344
345 # name to pull as, usually what is provided on creation.
346 # May be changed by create_tags
347 self.raw_name = name
348
349 # Python's mutable defaults is a landmine
350 if labels is None:
351 self.labels = {}
352 else:
353 self.labels = labels
354
355 self.repo_name = repo_name
356 self.repo_d = repo_d
357 self.path = path
358 self.context = context
359 self.dockerfile = dockerfile
360 self.tags = [] # tags are added to this later in __init__
361 self.image_id = image_id
362 self.components = components
363 self.status = status
364
Zack Williams12d029c2017-10-31 14:11:39 -0700365 self.parent_names = [] # names of parents from _find_parent_names()
366 self.parents = [] # list of parent DockerImage object
Zack Williamsce63eb02017-02-28 10:46:22 -0700367 self.children = [] # list of child DockerImage objects
368
369 # split name:tag if given in combined format
370 (image_name, image_tag) = split_name(name)
371 if image_tag: # has tag
372 self.name = image_name
373 self.tags.append(image_tag)
374 else: # no tag
375 self.name = image_name
376
377 # Add the build tag if exists
378 if build_tag not in self.tags:
379 self.tags.append(build_tag)
380
381 # split names from tag list
382 if tags is not None:
383 for tag in tags:
384 thistag = ""
385 (tag_name, tag_tag) = split_name(tag)
386 if tag_tag: # has name also, use just tag
387 thistag = tag_tag
388 else: # just a bare tag
389 thistag = tag_name
390
391 if thistag not in self.tags: # don't duplicate tags
392 self.tags.append(thistag)
393
394 # self.clean only applies to this container
395 self.clean = self._context_clean()
Zack Williams12d029c2017-10-31 14:11:39 -0700396 self._find_parent_names()
Zack Williamsce63eb02017-02-28 10:46:22 -0700397
398 def __str__(self):
399 return self.name
400
401 def buildable(self):
402 """ Can this image be built from a Dockerfile? """
403 if self.repo_name: # has a git repo to be built from
404 return True
405 return False
406
407 def _context_clean(self):
408 """ Determine if this is repo and context is clean """
409
410 if self.buildable():
411
412 # check if on master branch
413 repo_clean = self.repo_d.clean
414
415 # only check the Docker context for cleanliness
416 context_path = os.path.normpath(
417 os.path.join(self.path, self.context))
418 context_clean = self.repo_d.path_clean(context_path)
419
420 # check of subcomponents are clean
421 components_clean = self.components_clean()
422
Zack Williams12d029c2017-10-31 14:11:39 -0700423 LOG.debug(" Build Context Cleanliness - "
424 "repo: %s, context: %s, components: %s" %
Zack Williamsce63eb02017-02-28 10:46:22 -0700425 (repo_clean, context_clean, components_clean))
426
427 if context_clean and repo_clean and components_clean:
428 return True
429 else:
430 return False
431
432 return True # unbuildable images are clean
433
Zack Williams12d029c2017-10-31 14:11:39 -0700434 def parents_clean(self):
Zack Williamsce63eb02017-02-28 10:46:22 -0700435 """ if all parents are clean """
436
437 if self.buildable():
Zack Williams12d029c2017-10-31 14:11:39 -0700438 if not self.clean:
Zack Williamsce63eb02017-02-28 10:46:22 -0700439 return False
Zack Williams12d029c2017-10-31 14:11:39 -0700440 else:
441 for parent in self.parents:
442 if not parent.parents_clean():
443 return False
444 else:
445 return True
Zack Williamsce63eb02017-02-28 10:46:22 -0700446
447 return True # unbuildable images are clean
448
449 def compare_labels(self, other_labels):
450 """ Returns True if image label-schema.org labels match dict """
451
452 comparable_labels_re = [
453 r".*name$",
454 r".*vcs-url$",
455 r".*vcs-ref$",
456 ]
457
458 for clr in comparable_labels_re: # loop on all comparable labels
459 for label in self.labels: # loop on all labels
460 if re.match(clr, label) is not None: # if label matches re
461 # and label exists in other, and values are same
462 if label in other_labels and \
463 self.labels[label] == other_labels[label]:
464 pass # continue through loop
465 else:
466 LOG.info("Non-matching label: %s" % label)
467 return False # False when first difference found
468
469 return True # only when every label matches
470
471 def same_name(self, other_name):
472 """ compare image name (possibly with tag) against image name/tag """
473
474 (o_name, o_tag) = split_name(other_name)
475
476 if o_tag is None and self.name == o_name:
477 return True
478 elif self.name == o_name and o_tag in self.tags:
479 return True
480
481 return False
482
483 def components_clean(self):
484
485 if self.buildable() and self.components is not None:
486 for component in self.components:
487 if not component['repo_d'].clean or \
488 not component['repo_d'].path_clean(component['path']):
489 return False
490
491 return True
492
493 def component_labels(self):
494 """ returns a dict of labels for subcomponents """
495
496 if self.buildable() and self.components is not None:
497
498 comp_l = {}
499
500 for component in self.components:
501
502 LOG.debug(" component %s generating child labels" %
503 component['repo_name'])
504
505 prefix = "org.opencord.component.%s." % component['repo_name']
506
507 comp_l[prefix + "vcs-url"] = component['repo_d'].git_url
508
509 if component['repo_d'].clean and \
510 component['repo_d'].path_clean(component['path']):
511 clean = True
512 else:
513 clean = False
514
515 if clean:
516 comp_l[prefix + "version"] = self.repo_d.manifest_branch
517 comp_l[prefix + "vcs-ref"] = \
518 component['repo_d'].head_commit
519 else:
520 comp_l[prefix + "version"] = "dirty"
521 comp_l[prefix + "vcs-ref"] = ""
522
523 return comp_l
524
525 return None
526
527 def child_labels(self, repo_list=None):
528 """ return a dict of labels to apply to child images """
529
Zack Williams12d029c2017-10-31 14:11:39 -0700530 LOG.debug(" Generating child labels from parent: %s" % self.name)
Zack Williamsce63eb02017-02-28 10:46:22 -0700531
532 # only create labels when they haven't already been created
533 if repo_list is None:
534 repo_list = []
535
Zack Williams12d029c2017-10-31 14:11:39 -0700536 LOG.debug(" Already labeled with: %s" % ", ".join(repo_list))
Zack Williamsce63eb02017-02-28 10:46:22 -0700537
538 cl = {}
539
540 if self.buildable() and self.repo_name not in repo_list:
541
542 LOG.debug(" Adding parent labels from repo: %s" % self.repo_name)
543
544 prefix = "org.opencord.component.%s." % self.repo_name
545
546 cl[prefix + "vcs-url"] = self.repo_d.git_url
547
548 if self.clean:
549 cl[prefix + "version"] = self.repo_d.manifest_branch
550 cl[prefix + "vcs-ref"] = self.repo_d.head_commit
551 else:
552 cl[prefix + "version"] = "dirty"
553 cl[prefix + "vcs-ref"] = ""
554
555 repo_list.append(self.repo_name)
556
557 # include component labels if present
558 if self.components is not None:
559 cl.update(self.component_labels())
560
561 # recursively find labels up the parent chain
Zack Williams12d029c2017-10-31 14:11:39 -0700562 if self.parents is not None:
563 for parent in self.parents:
564 cl.update(parent.child_labels(repo_list))
Zack Williamsce63eb02017-02-28 10:46:22 -0700565
566 return cl
567
568 def create_labels(self):
569 """ Create label-schema.org labels for image """
570
571 if self.buildable():
572
573 LOG.debug("Creating labels for: %s" % self.name)
574
575 self.labels['org.label-schema.name'] = self.name
576 self.labels['org.label-schema.schema-version'] = "1.0"
577
578 # org.label-schema.build-date
579 time_now = datetime.datetime.utcnow()
580 build_date = time_now.strftime("%Y-%m-%dT%H:%M:%SZ")
581 self.labels['org.label-schema.build-date'] = build_date
582
583 # git version related labels
584 self.labels['org.label-schema.vcs-url'] = self.repo_d.git_url
585
586 if self.clean:
587 self.labels['org.label-schema.version'] = \
588 self.repo_d.manifest_branch
589 self.labels['org.label-schema.vcs-ref'] = \
590 self.repo_d.head_commit
591 self.labels['org.opencord.vcs-commit-date'] = \
592 self.repo_d.head_commit_t
593 else:
594 self.labels['org.label-schema.version'] = "dirty"
595 self.labels['org.label-schema.vcs-ref'] = ""
596
597 # include component labels if present
598 if self.components is not None:
599 self.labels.update(self.component_labels())
600
601 def create_tags(self):
602 """ Create docker tags as needed """
603
604 if self.buildable():
605 LOG.debug("Creating tags for image: %s" % self.name)
606
607 # if clean and parents clean, add tags for branch/commit
Zack Williams12d029c2017-10-31 14:11:39 -0700608 if self.parents_clean():
Zack Williamsce63eb02017-02-28 10:46:22 -0700609 if build_tag not in self.tags:
610 self.tags.append(build_tag)
611
612 commit_tag = self.repo_d.head_commit
613 if commit_tag not in self.tags:
614 self.tags.append(commit_tag)
615
616 # pulling is done via raw_name, set tag to commit
617 self.raw_name = "%s:%s" % (self.name, commit_tag)
618
Zack Williams12d029c2017-10-31 14:11:39 -0700619 LOG.debug("All tags: %s" % ", ".join(self.tags))
Zack Williamsce63eb02017-02-28 10:46:22 -0700620
Zack Williams12d029c2017-10-31 14:11:39 -0700621 def _find_parent_names(self):
622 """ set self.parent_names using Dockerfile FROM lines """
Zack Williamsce63eb02017-02-28 10:46:22 -0700623
624 if self.buildable():
625 # read contents of Dockerfile into df
626 with open(self.dockerfile_abspath()) as dfh:
Zack Williams12d029c2017-10-31 14:11:39 -0700627 dfl = dfh.readlines()
Zack Williamsce63eb02017-02-28 10:46:22 -0700628
Zack Williams12d029c2017-10-31 14:11:39 -0700629 parent_names = []
630 frompatt = re.compile(r'^FROM\s+([\w/_:.-]+)', re.MULTILINE)
Zack Williamsce63eb02017-02-28 10:46:22 -0700631
Zack Williams12d029c2017-10-31 14:11:39 -0700632 for line in dfl:
633 fromline = re.search(frompatt, line)
634 if fromline:
635 parent_names.append(fromline.group(1))
636
637 self.parent_names = parent_names # may have tag
638
639 LOG.debug(" Parents: %s" % ", ".join(self.parent_names))
Zack Williamsce63eb02017-02-28 10:46:22 -0700640
641 def dockerfile_abspath(self):
642 """ returns absolute path to Dockerfile for this image """
643
644 if self.buildable():
645 return os.path.join(self.repo_d.abspath(),
646 self.path, self.dockerfile)
647 else:
648 return None
649
650 def dockerfile_rel_path(self):
651 """ returns the path relative to the context of the Dockerfile """
652
653 if self.buildable():
654 if self.context is ".":
655 return self.dockerfile
656 else:
657 return os.path.normpath(os.path.join(self.path,
658 self.dockerfile))
659 else:
660 return None
661
662 def context_tarball(self):
663 """ returns a filehandle to a tarball (tempfile) for the image """
664
665 if self.buildable():
666
667 context_path = os.path.normpath(
668 os.path.join(self.repo_d.abspath(),
669 self.path, self.context))
670
671 LOG.info("Creating context tarball of path: %s" % context_path)
672
673 t_fh = tempfile.NamedTemporaryFile()
674 t = tarfile.open(mode='w', fileobj=t_fh, dereference=True)
675
Zack Williams03677532017-11-16 09:50:09 -0700676 # exclude git directories anywhere in the context
677 exclusion_list = ['**/.git']
Zack Williamsce63eb02017-02-28 10:46:22 -0700678
Matteo Scandolobb6db7f2017-10-02 10:18:03 -0700679 docker_ignore = os.path.join(context_path, '.dockerignore')
680 if os.path.exists(docker_ignore):
681 for line in open(docker_ignore).readlines():
Zack Williams12d029c2017-10-31 14:11:39 -0700682 # slightly out of spec, we allow whitespace before comments
683 # https://docs.docker.com/engine/reference/builder/#dockerignore-file
Matteo Scandolobb6db7f2017-10-02 10:18:03 -0700684 if line.strip()[0] is not '#':
685 exclusion_list.append(line.strip().rstrip('\/'))
Zack Williams12d029c2017-10-31 14:11:39 -0700686
687 LOG.debug("Exclusion list: %s" % exclusion_list)
Matteo Scandolobb6db7f2017-10-02 10:18:03 -0700688
Zack Williamsce63eb02017-02-28 10:46:22 -0700689 # see docker-py source for context
690 for path in sorted(
691 DockerUtils.exclude_paths(context_path, exclusion_list)):
692 t.add(os.path.join(context_path, path),
693 arcname=path,
694 recursive=False)
695
696 # add sub-components to tarball if required
697 if self.components is not None:
698 for component in self.components:
699 c_ctx_p = os.path.normpath(
700 os.path.join(component['repo_d'].abspath(),
701 component['path']))
702
703 LOG.info("Adding component %s at context %s" %
704 (component['repo_name'], c_ctx_p))
705
706 # walk component source path
707 for path in sorted(
708 DockerUtils.exclude_paths(c_ctx_p, exclusion_list)):
709
710 # path to where to put files in the archive
711 cf_dest = os.path.normpath(
712 os.path.join(component['dest'], path))
713
714 t.add(os.path.join(c_ctx_p, path),
715 arcname=cf_dest,
716 recursive=False)
717
718 # t.list() # prints all files in tarball
719 t.close()
720 t_fh.seek(0)
721 return t_fh
722
723 else:
724 return None
725
726 def buildargs(self):
727 """ returns array of labels in docker buildargs compliant format """
728 ba_a = {}
729
730 for label_k in self.labels:
731 ba_re = re.compile(r'\W') # non alpha/num/_ chars
732 ba_label = ba_re.sub('_', label_k)
733 ba_a[ba_label] = self.labels[label_k]
734
735 return ba_a
736
737
738class DockerBuilder():
739
740 def __init__(self, repo_manifest):
741
742 global buildable_images
743 global pull_only_images
744
745 self.rm = repo_manifest
746 self.dc = None # Docker Client object
747
748 self.images = []
749
750 # arrays of images, used for write_actions
751 self.all = []
752 self.preexisting = []
753 self.obsolete = []
754 self.pulled = []
755 self.failed_pull = []
756 self.obsolete_pull = []
757 self.built = []
758 self.failed_build = []
759
760 # create dict of images, setting defaults
761 for image in buildable_images:
762
763 repo_d = self.rm.get_repo(image['repo'])
764
765 if "components" in image:
766 components = []
767
768 for component in image['components']:
769 comp = {}
770 comp['repo_name'] = component['repo']
771 comp['repo_d'] = self.rm.get_repo(component['repo'])
772 comp['dest'] = component['dest']
773 comp['path'] = component.get('path', '.')
774 components.append(comp)
775 else:
776 components = None
777
778 # set the full name in case this is pulled
779 full_name = "%s:%s" % (image['name'], build_tag)
780
781 img_o = DockerImage(full_name, image['repo'], repo_d,
782 image.get('path', '.'),
783 image.get('context', '.'),
784 image.get('dockerfile', 'Dockerfile'),
785 components=components)
786
787 self.images.append(img_o)
788
789 # add misc images
790 for misc_image in pull_only_images:
791 img_o = DockerImage(misc_image)
792 self.images.append(img_o)
793
794 if not args.dry_run:
795 self._docker_connect()
796
797 self.create_dependency()
798 self.find_preexisting()
799
800 if args.graph is not None:
801 self.dependency_graph(args.graph)
802
803 self.process_images()
804
805 if args.actions_taken is not None:
806 self.write_actions_file(args.actions_taken)
807
808 def _docker_connect(self):
809 """ Connect to docker daemon """
810
Zack Williams12d029c2017-10-31 14:11:39 -0700811 try:
812 self.dc = DockerClient()
813 except requests.ConnectionError:
814 LOG.debug("Docker connection not available")
815 sys.exit(1)
Zack Williamsce63eb02017-02-28 10:46:22 -0700816
817 if self.dc.ping():
818 LOG.debug("Docker server is responding")
819 else:
820 LOG.error("Unable to ping docker server")
821 sys.exit(1)
822
823 def find_preexisting(self):
824 """ find images that already exist in Docker and mark """
825
826 if self.dc:
827 LOG.debug("Evaluating already built/fetched Docker images")
828
829 # get list of images from docker
830 pe_images = self.dc.images()
831
832 for pe_image in pe_images:
833 raw_tags = pe_image['RepoTags']
834
835 self.all.append({
836 'id': pe_image['Id'],
837 'tags': raw_tags,
838 })
839
840 # ignoring all <none>:<none> images, reasonable?
841 if raw_tags and "<none>:<none>" not in raw_tags:
842 LOG.debug(" Preexisting Image - ID: %s, tags: %s" %
843 (pe_image['Id'], ",".join(raw_tags)))
844
845 image = self.find_image(raw_tags[0])
846
847 if image is not None:
848 if image.compare_labels(pe_image['Labels']):
849 LOG.debug(" Image %s has up-to-date labels" %
850 pe_image['Id'])
851
852 self.preexisting.append({
853 'id': pe_image['Id'],
854 'tags': raw_tags,
Andy Bavierafaa5302017-08-15 08:56:15 -0700855 'base': image.name.split(":")[0],
Zack Williamsce63eb02017-02-28 10:46:22 -0700856 })
857
858 image.image_id = pe_image['Id']
859 image.status = DI_EXISTS
860
861 else:
862 LOG.debug(" Image %s has obsolete labels" %
863 pe_image['Id'])
864
865 self.obsolete.append({
866 'id': pe_image['Id'],
867 'tags': raw_tags,
868 })
869
870 def find_image(self, image_name):
871 """ return image object matching name """
872 LOG.debug("attempting to find image for: %s" % image_name)
873
874 for image in self.images:
875 if image.same_name(image_name):
876 return image
877 return None
878
879 def create_dependency(self):
880 """ set parent/child links for images """
881
Zack Williams12d029c2017-10-31 14:11:39 -0700882 # List of lists of parents images. Done in two steps for clarity
883 lol_of_parents = [img.parent_names for img in self.images
884 if img.parent_names is not []]
885
886 # flat list of all parent image names, with dupes
887 parents_with_dupes = [parent for parent_sublist in lol_of_parents
888 for parent in parent_sublist]
Zack Williamsce63eb02017-02-28 10:46:22 -0700889
890 # remove duplicates
891 parents = list(set(parents_with_dupes))
892
893 LOG.info("All parent images: %s" % ", ".join(parents))
894
895 # list of "external parents", ones not built internally
896 external_parents = []
897
898 for parent_name in parents:
899 LOG.debug("Evaluating parent image: %s" % parent_name)
900 internal_parent = False
901
902 # match on p_name, without tag
903 (p_name, p_tag) = split_name(parent_name)
904
905 for image in self.images:
906 if image.same_name(p_name): # internal image is a parent
907 internal_parent = True
908 LOG.debug(" Internal parent: %s" % image.name)
909 break
910
911 if not internal_parent: # parent is external
912 LOG.debug(" External parent: %s" % parent_name)
913 external_parents.append(parent_name)
914
915 # add unique external parents to image list
916 for e_p_name in set(external_parents):
917 LOG.debug(" Creating external parent image object: %s" % e_p_name)
918 img_o = DockerImage(e_p_name)
919 self.images.append(img_o)
920
921 # now that all images (including parents) are in list, associate them
Zack Williams12d029c2017-10-31 14:11:39 -0700922 for image in filter(lambda img: img.parent_names is not [],
Zack Williamsce63eb02017-02-28 10:46:22 -0700923 self.images):
924
925 LOG.debug("Associating image: %s" % image.name)
926
Zack Williams12d029c2017-10-31 14:11:39 -0700927 for parent_name in image.parent_names:
Zack Williamsce63eb02017-02-28 10:46:22 -0700928
Zack Williams12d029c2017-10-31 14:11:39 -0700929 parent = self.find_image(parent_name)
930 image.parents.append(parent)
Zack Williamsce63eb02017-02-28 10:46:22 -0700931
Zack Williams12d029c2017-10-31 14:11:39 -0700932 if parent is not None:
933 LOG.debug(" internal image '%s' is parent of '%s'" %
934 (parent.name, image.name))
935 parent.children.append(image)
936
937 else:
938 LOG.debug(" external image '%s' is parent of '%s'" %
939 (image.parent_name, image.name))
Zack Williamsce63eb02017-02-28 10:46:22 -0700940
941 # loop again now that parents are linked to create labels
942 for image in self.images:
943 image.create_labels()
944 image.create_tags()
945
946 # if image has parent, get labels from parent(s)
Zack Williams12d029c2017-10-31 14:11:39 -0700947 if image.parents is not None:
948 for parent in image.parents:
949 LOG.debug("Adding parent labels from %s to child %s" %
950 (parent.name, image.name))
Zack Williamsce63eb02017-02-28 10:46:22 -0700951
Zack Williams12d029c2017-10-31 14:11:39 -0700952 # don't create component labels for same repo as image
953 repo_list = [image.repo_name]
954 image.labels.update(parent.child_labels(repo_list))
Zack Williamsce63eb02017-02-28 10:46:22 -0700955
956 def dependency_graph(self, graph_fn):
957 """ save a DOT dependency graph to a file """
958
959 graph_fn_abs = os.path.abspath(graph_fn)
960
961 LOG.info("Saving DOT dependency graph to: %s" % graph_fn_abs)
962
963 try:
964 import graphviz
965 except ImportError:
966 LOG.error('graphviz pip module not found')
967 raise
968
969 dg = graphviz.Digraph(comment='Image Dependency Graph',
970 graph_attr={'rankdir': 'LR'})
971
972 component_nodes = []
973
974 # Use raw names, so they match with what's in Dockerfiles
975 # delete colons as python graphviz module breaks with them
976 for image in self.images:
977 name_g = image.raw_name.replace(':', '\n')
978 dg.node(name_g)
979
Zack Williams12d029c2017-10-31 14:11:39 -0700980 if image.parents is not None:
981 for parent in image.parents:
982 name_p = parent.raw_name.replace(':', '\n')
983 dg.edge(name_p, name_g)
Zack Williamsce63eb02017-02-28 10:46:22 -0700984
985 if image.components is not None:
986 for component in image.components:
987 name_c = "component - %s" % component['repo_name']
988 if name_c not in component_nodes:
989 dg.node(name_c)
990 component_nodes.append(name_c)
991 dg.edge(name_c, name_g, "", {'style': 'dashed'})
992
993 with open(graph_fn_abs, 'w') as g_fh:
994 g_fh.write(dg.source)
995
996 def write_actions_file(self, actions_fn):
997
998 actions_fn_abs = os.path.abspath(actions_fn)
999
1000 LOG.info("Saving actions as YAML to: %s" % actions_fn_abs)
1001
1002 actions = {
1003 "ib_pulled": self.pulled,
1004 "ib_built": self.built,
1005 "ib_preexisting_images": self.preexisting,
1006 "ib_obsolete_images": self.obsolete,
1007 "ib_failed_pull": self.failed_pull,
1008 "ib_obsolete_pull": self.obsolete_pull,
1009 "ib_failed_build": self.failed_build,
1010 }
1011
1012 with open(actions_fn_abs, 'w') as a_fh:
1013 yaml.safe_dump(actions, a_fh)
1014 LOG.debug(yaml.safe_dump(actions))
1015
1016 def process_images(self):
1017 """ determine whether to build/fetch images """
1018
1019 # upstream images (have no parents), must be fetched
Zack Williams12d029c2017-10-31 14:11:39 -07001020 must_fetch_a = filter(lambda img: img.parents is [], self.images)
Zack Williamsce63eb02017-02-28 10:46:22 -07001021
1022 for image in must_fetch_a:
1023 if image.status is not DI_EXISTS:
1024 image.status = DI_FETCH
1025
1026 # images that can be built or fetched (have parents)
Zack Williams12d029c2017-10-31 14:11:39 -07001027 b_or_f_a = filter(lambda img: img.parents is not [], self.images)
Zack Williamsce63eb02017-02-28 10:46:22 -07001028
1029 for image in b_or_f_a:
Zack Williams12d029c2017-10-31 14:11:39 -07001030 if not image.parents_clean():
Zack Williamsce63eb02017-02-28 10:46:22 -07001031 # must be built if not clean
1032 image.status = DI_BUILD
1033 elif image.status is not DI_EXISTS:
1034 # try to fetch if clean and doesn't exist
1035 image.status = DI_FETCH
1036 # otherwise, image is clean and exists (image.status == DI_EXISTS)
1037
1038 c_and_e_a = filter(lambda img: img.status is DI_EXISTS, self.images)
1039 LOG.info("Preexisting and clean images: %s" %
1040 ", ".join(c.name for c in c_and_e_a))
1041
1042 upstream_a = filter(lambda img: (img.status is DI_FETCH and
Zack Williams12d029c2017-10-31 14:11:39 -07001043 img.parents is []), self.images)
Zack Williamsce63eb02017-02-28 10:46:22 -07001044 LOG.info("Upstream images that must be fetched: %s" %
1045 ", ".join(u.raw_name for u in upstream_a))
1046
1047 fetch_a = filter(lambda img: (img.status is DI_FETCH and
Zack Williams12d029c2017-10-31 14:11:39 -07001048 img.parents is not []), self.images)
Zack Williamsce63eb02017-02-28 10:46:22 -07001049 LOG.info("Clean, buildable images to attempt to fetch: %s" %
1050 ", ".join(f.raw_name for f in fetch_a))
1051
1052 build_a = filter(lambda img: img.status is DI_BUILD, self.images)
1053 LOG.info("Buildable images, due to unclean context or parents: %s" %
1054 ", ".join(b.raw_name for b in build_a))
1055
1056 # OK to fetch upstream in any case as they should reduce number of
1057 # layers pulled/built later
1058
1059 for image in upstream_a:
1060 if not self._fetch_image(image):
1061 LOG.info("Unable to fetch upstream image: %s" % image.raw_name)
1062 # FIXME: fail if the upstream image can't be fetched ?
1063
1064 fetch_sort = sorted(fetch_a, key=(lambda img: len(img.children)),
1065 reverse=True)
1066
1067 for image in fetch_sort:
1068 if not self._fetch_image(image):
1069 # if didn't fetch, build
1070 image.status = DI_BUILD
1071
1072 while True:
1073 buildable_images = self.get_buildable()
1074 if buildable_images:
1075 for image in buildable_images:
1076 self._build_image(image)
1077 else:
1078 LOG.debug("No more images to build, ending build loop")
1079 break
1080
1081 def get_buildable(self):
1082 """ Returns list of images that can be built"""
1083
1084 buildable = []
1085
1086 for image in filter(lambda img: img.status is DI_BUILD, self.images):
Zack Williams12d029c2017-10-31 14:11:39 -07001087 for parent in image.parents:
1088 if parent.status is DI_EXISTS:
1089 if image not in buildable: # build once if two parents
1090 buildable.append(image)
Zack Williamsce63eb02017-02-28 10:46:22 -07001091
1092 LOG.debug("Buildable images: %s" %
1093 ', '.join(image.name for image in buildable))
1094
1095 return buildable
1096
1097 def tag_image(self, image):
1098 """ Applies tags to an image """
1099
1100 for tag in image.tags:
1101
1102 LOG.info("Tagging id: '%s', repo: '%s', tag: '%s'" %
1103 (image.image_id, image.name, tag))
1104
1105 if self.dc is not None:
1106 self.dc.tag(image.image_id, image.name, tag=tag)
1107
1108 def _fetch_image(self, image):
1109
1110 LOG.info("Attempting to fetch docker image: %s" % image.raw_name)
1111
1112 if self.dc is not None:
1113 try:
1114 for stat_json in self.dc.pull(image.raw_name,
1115 stream=True):
1116
1117 # sometimes Docker's JSON is dirty, per:
1118 # https://github.com/docker/docker-py/pull/1081/
1119 stat_s = stat_json.strip()
1120 stat_list = stat_s.split("\r\n")
1121
1122 for s_j in stat_list:
1123 stat_d = json.loads(s_j)
1124
1125 if 'stream' in stat_d:
1126 for stat_l in stat_d['stream'].split('\n'):
1127 LOG.debug(stat_l)
1128
1129 if 'status' in stat_d:
1130 for stat_l in stat_d['status'].split('\n'):
1131 noisy = ["Extracting", "Downloading",
1132 "Waiting", "Download complete",
1133 "Pulling fs layer", "Pull complete",
1134 "Verifying Checksum",
1135 "Already exists"]
1136 if stat_l in noisy:
1137 LOG.debug(stat_l)
1138 else:
1139 LOG.info(stat_l)
1140
1141 if 'error' in stat_d:
1142 LOG.error(stat_d['error'])
1143 sys.exit(1)
1144
Zack Williams12d029c2017-10-31 14:11:39 -07001145 except (DockerErrors.NotFound, DockerErrors.ImageNotFound) as e:
1146 LOG.warning("Image could not be pulled: %s , %s" %
1147 (e.errno, e.strerror))
1148
1149 self.failed_pull.append({
1150 "tags": [image.raw_name, ],
1151 })
1152
1153 if not image.parents:
1154 LOG.error("Pulled image required to build, not available!")
1155 sys.exit(1)
1156
1157 return False
1158
Zack Williamsce63eb02017-02-28 10:46:22 -07001159 except:
1160 LOG.exception("Error pulling docker image")
1161
1162 self.failed_pull.append({
1163 "tags": [image.raw_name, ],
1164 })
1165
1166 return False
1167
1168 # obtain the image_id by inspecting the pulled image. Seems unusual
1169 # that the Docker API `pull` method doesn't provide it when the
1170 # `build` method does
1171 pulled_image = self.dc.inspect_image(image.raw_name)
1172
1173 # check to make sure that image that was downloaded has the labels
1174 # that we expect it to have, otherwise return false, trigger build
1175 if not image.compare_labels(
1176 pulled_image['ContainerConfig']['Labels']):
1177 LOG.info("Tried fetching image %s, but labels didn't match" %
1178 image.raw_name)
1179
1180 self.obsolete_pull.append({
1181 "id": pulled_image['Id'],
1182 "tags": pulled_image['RepoTags'],
1183 })
1184 return False
1185
1186 image.image_id = pulled_image['Id']
1187 LOG.info("Fetched image %s, id: %s" %
1188 (image.raw_name, image.image_id))
1189
1190 self.pulled.append({
1191 "id": pulled_image['Id'],
1192 "tags": pulled_image['RepoTags'],
Andy Bavier09410472017-08-15 14:29:35 -07001193 "base": image.name.split(":")[0],
Zack Williamsce63eb02017-02-28 10:46:22 -07001194 })
1195
1196 self.tag_image(image)
1197 image.status = DI_EXISTS
1198 return True
1199
1200 def _build_image(self, image):
1201
1202 LOG.info("Building docker image for %s" % image.raw_name)
1203
1204 if self.dc is not None:
1205
1206 build_tag = "%s:%s" % (image.name, image.tags[0])
1207
1208 buildargs = image.buildargs()
1209 context_tar = image.context_tarball()
1210 dockerfile = image.dockerfile_rel_path()
1211
1212 for key, val in buildargs.iteritems():
1213 LOG.debug("Buildarg - %s : %s" % (key, val))
1214
1215 bl_path = ""
1216 start_time = datetime.datetime.utcnow()
1217
1218 if(args.build_log_dir):
1219 bl_name = "%s_%s" % (start_time.strftime("%Y%m%dT%H%M%SZ"),
1220 re.sub(r'\W', '_', image.name))
1221 bl_path = os.path.abspath(
1222 os.path.join(args.build_log_dir, bl_name))
1223
1224 LOG.info("Build log: %s" % bl_path)
1225 bl_fh = open(bl_path, 'w+', 0) # 0 = unbuffered writes
1226 else:
1227 bl_fh = None
1228
1229 try:
1230 LOG.info("Building image: %s" % image)
1231
1232 for stat_d in self.dc.build(tag=build_tag,
1233 buildargs=buildargs,
1234 custom_context=True,
1235 fileobj=context_tar,
1236 dockerfile=dockerfile,
1237 rm=True,
1238 forcerm=True,
1239 pull=False,
1240 stream=True,
1241 decode=True):
1242
1243 if 'stream' in stat_d:
1244
1245 if bl_fh:
1246 bl_fh.write(stat_d['stream'].encode('utf-8'))
1247
1248 for stat_l in stat_d['stream'].split('\n'):
1249 if(stat_l):
1250 LOG.debug(stat_l)
1251 if stat_d['stream'].startswith("Successfully built "):
1252 siid = stat_d['stream'].split(' ')[2]
1253 short_image_id = siid.strip()
1254 LOG.debug("Short Image ID: %s" % short_image_id)
1255
1256 if 'status' in stat_d:
1257 for stat_l in stat_d['status'].split('\n'):
1258 if(stat_l):
1259 LOG.info(stat_l)
1260
1261 if 'error' in stat_d:
1262 LOG.error(stat_d['error'])
1263 image.status = DI_ERROR
1264 sys.exit(1)
1265
1266 except:
1267 LOG.exception("Error building docker image")
1268
1269 self.failed_build.append({
1270 "tags": [build_tag, ],
1271 })
1272
1273 return
1274
1275 finally:
1276 if(bl_fh):
1277 bl_fh.close()
1278
1279 # the image ID given by output isn't the full SHA256 id, so find
1280 # and set it to the full one
1281 built_image = self.dc.inspect_image(short_image_id)
1282 image.image_id = built_image['Id']
1283
1284 end_time = datetime.datetime.utcnow()
1285 duration = end_time - start_time # duration is a timedelta
1286
1287 LOG.info("Built Image: %s, duration: %s, id: %s" %
1288 (image.name, duration, image.image_id))
1289
1290 self.built.append({
1291 "id": image.image_id,
1292 "tags": [build_tag, ],
1293 "push_name": image.raw_name,
1294 "build_log": bl_path,
1295 "duration": duration.total_seconds(),
Andy Bavier09410472017-08-15 14:29:35 -07001296 "base": image.name.split(":")[0],
Zack Williamsce63eb02017-02-28 10:46:22 -07001297 })
1298
1299 self.tag_image(image)
1300 image.status = DI_EXISTS
1301
1302
1303if __name__ == "__main__":
1304 parse_args()
1305 load_config()
1306
1307 # only include docker module if not a dry run
1308 if not args.dry_run:
1309 try:
Zack Williams12d029c2017-10-31 14:11:39 -07001310 import requests
Zack Williamsce63eb02017-02-28 10:46:22 -07001311 from distutils.version import LooseVersion
1312 from docker import __version__ as docker_version
Zack Williams12d029c2017-10-31 14:11:39 -07001313
1314 # handle the docker-py v1 to v2 API differences
Zack Williamsce63eb02017-02-28 10:46:22 -07001315 if LooseVersion(docker_version) >= LooseVersion('2.0.0'):
1316 from docker import APIClient as DockerClient
Zack Williamsce63eb02017-02-28 10:46:22 -07001317 else:
1318 from docker import Client as DockerClient
Zack Williams12d029c2017-10-31 14:11:39 -07001319
1320 from docker import utils as DockerUtils
1321 from docker import errors as DockerErrors
1322
Zack Williamsce63eb02017-02-28 10:46:22 -07001323 except ImportError:
1324 LOG.error("Unable to load python docker module (dry run?)")
1325 sys.exit(1)
1326
1327 rm = RepoManifest()
1328 db = DockerBuilder(rm)