blob: 8627ff8eb93300983579b3e7e24e33bc060af8d2 [file] [log] [blame]
Matteo Scandoloa3213822018-05-21 18:17:28 -07001#!/usr/bin/env python
2
3# Copyright 2017-present Open Networking Foundation
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17# imagebuilder.py
18# rebuilds/fetches docker container images per their git status in repo
19# in addition to docker, needs `sudo apt-get install python-git`
20
21import argparse
22import datetime
23import git
24import json
25import logging
26import os
27import re
28import string
29import sys
30import tarfile
31import tempfile
32import time
33import xml.etree.ElementTree as ET
34import yaml
35
36global args
37global conf
38global build_tag
39global buildable_images
40global pull_only_images
41
42DOCKER_PY_VERSION = 0
43
44
45def setup_logging(name=None, logfile=False):
46 global args
47
48 if name:
49 log = logging.getLogger("-".join([__name__, name]))
50 else:
51 log = logging.getLogger(__name__)
52
53 slh = logging.StreamHandler(sys.stdout)
54 slh.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
55 slh.setLevel(logging.DEBUG)
56
57 log.addHandler(slh)
58
59 # secondary logging to a file, always DEBUG level
60 if logfile:
61 fn = os.path.join(conf.logdir, "%s.log" % name)
62 flh = logging.FileHandler(fn)
63 flh.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
64 flh.setLevel(logging.DEBUG)
65 log.addHandler(flh)
66
67 return log
68
69
70LOG = setup_logging()
71
72
73def parse_args():
74 global args
75
76 parser = argparse.ArgumentParser()
77
78 parser.add_argument('-c', '--container_list', default='docker_images.yml',
79 type=argparse.FileType('r'),
80 help="YAML Config and master container list")
81
82 # -f is optional, so using type=argparse.FileType is problematic
83 parser.add_argument('-f', '--filter_images', default=None, action='store',
84 help="YAML file restricting images to build/fetch")
85
86 parser.add_argument('-a', '--actions_taken', default=None,
87 help="Save a YAML file with actions taken during run")
88
89 group = parser.add_mutually_exclusive_group()
90 group.add_argument('-b', '--build', action="store_true", default=False,
91 help="Build (don't fetch) all internal images, nocache")
92 group.add_argument('-p', '--pull', action="store_true", default=False,
93 help="Only pull containers, fail if build required")
94
95 parser.add_argument('-d', '--dry_run', action="store_true",
96 help="Don't build/fetch anything")
97
98 parser.add_argument('-g', '--graph', default=None,
99 help="Filename for DOT graph file of image dependency")
100
101 parser.add_argument('-l', '--build_log_dir', action="store",
102 help="Log build output to this dir if set")
103
104 parser.add_argument('-r', '--repo_root', default="../..", action="store",
105 help="Repo root directory")
106
107 parser.add_argument('-t', '--build_tag', default=None, action="store",
108 help="tag all images built/pulled using this tag")
109
110 parser.add_argument('-v', '--verbosity', action='count', default=1,
111 help="Repeat to increase log level")
112
113 parser.add_argument('-x', '--force', action="store_true",
114 help="Force removal of tags (may delete images)")
115
116 args = parser.parse_args()
117
118 if args.verbosity > 1:
119 LOG.setLevel(logging.DEBUG)
120 else:
121 LOG.setLevel(logging.INFO)
122
123
124def load_config():
125 global args
126 global conf
127 global buildable_images
128 global pull_only_images
129 global build_tag
130
131 try:
132 cl_abs = os.path.abspath(args.container_list.name)
133 LOG.info("Master container list file: %s" % cl_abs)
134
135 conf = yaml.safe_load(args.container_list)
136 except yaml.YAMLError:
137 LOG.exception("Problem loading container list file")
138 sys.exit(1)
139
140 if args.build_tag:
141 build_tag = args.build_tag
142 else:
143 build_tag = conf['docker_build_tag']
144
145 if args.filter_images is None:
146 buildable_images = conf['buildable_images']
147 pull_only_images = conf['pull_only_images']
148 else:
149 fi_abs = os.path.abspath(args.filter_images)
150
151 LOG.info("Filtering image list per 'docker_image_whitelist' in: %s" %
152 fi_abs)
153 try:
154 fi_fh = open(fi_abs, 'r')
155 filter_list = yaml.safe_load(fi_fh)
156 fi_fh.close()
157
158 if 'docker_image_whitelist' not in filter_list:
159 LOG.error("No 'docker_image_whitelist' defined in: %s" %
160 fi_abs)
161 sys.exit(1)
162
163 # fail if pull_only_images in docker_images.yml doesn't have tags
164 for i in conf['pull_only_images']:
165 (name, tag) = split_name(i)
166 if not tag:
167 LOG.error("Images in docker_images.yml must be tagged")
168 sys.exit(1)
169
170 buildable_images = [img for img in conf['buildable_images']
171 if split_name(img['name'])[0]
172 in filter_list['docker_image_whitelist']]
173
174 pull_only_images = [img for img in conf['pull_only_images']
175 if split_name(img)[0]
176 in map(lambda x: split_name(x)[0],
177 filter_list['docker_image_whitelist'])]
178
179 pull_only_images = map(override_tags(
180 filter_list['docker_image_whitelist']),
181 pull_only_images)
182
183 except KeyError:
184 LOG.exception("Problem with filter list file")
185 sys.exit(1)
186
187
188def override_tags(image_list_with_tags):
189
190 untagged_whitelist = map(lambda x: split_name(x)[0], image_list_with_tags)
191
192 def inner(i):
193 img_name = split_name(i)[0]
194 tag_override = split_name(image_list_with_tags[
195 untagged_whitelist.index(img_name)])[1]
196 if tag_override:
197 return "%s:%s" % (img_name, tag_override)
198 return i
199 return inner
200
201
202def split_name(input_name):
203 """ split a docker image name in the 'name:tag' format into components """
204
205 name = input_name
206 tag = None
207
208 # split name:tag if given in combined format
209 name_tag_split = string.split(input_name, ":")
210
211 if len(name_tag_split) > 1: # has tag, return separated version
212 name = name_tag_split[0]
213 tag = name_tag_split[1]
214
215 return (name, tag)
216
217
218class RepoRepo():
219 """ git repo managed by repo tool"""
220
221 def __init__(self, name, path, remote_url, remote_branch, short_branch):
222
223 self.name = name
224 self.path = path
225 self.git_url = "%s%s" % (remote_url, name)
226 self.remote_branch = remote_branch
227 self.short_branch = short_branch
228 self.git_tags = []
229
230 try:
231 self.git_repo_o = git.Repo(self.abspath())
232 LOG.debug("Repo - %s, path: %s" % (name, path))
233
234 self.head_commit = self.git_repo_o.head.commit.hexsha
235 LOG.debug(" head commit: %s" % self.head_commit)
236
237 commit_t = time.gmtime(self.git_repo_o.head.commit.committed_date)
238 self.head_commit_t = time.strftime("%Y-%m-%dT%H:%M:%SZ", commit_t)
239 LOG.debug(" commit date: %s" % self.head_commit_t)
240
241 for tag in self.git_repo_o.tags:
242 if tag.commit == self.git_repo_o.head.commit:
243 self.git_tags.append(str(tag))
244
245 if self.git_tags:
246 LOG.debug(" tags referring to this commit: %s" %
247 ", ".join(self.git_tags))
248 else:
249 LOG.debug(" No git tags refer to this commit")
250
251 self.clean = not self.git_repo_o.is_dirty(untracked_files=True)
252 LOG.debug(" clean: %s" % self.clean)
253
254 # list of untracked files (expensive operation)
255 self.untracked_files = self.git_repo_o.untracked_files
256 for u_file in self.untracked_files:
257 LOG.debug(" Untracked: %s" % u_file)
258
259 except Exception:
260 LOG.exception("Error with git repo: %s" % name)
261 sys.exit(1)
262
263 def abspath(self):
264 global args
265 return os.path.abspath(os.path.join(args.repo_root, self.path))
266
267 def path_clean(self, test_path, branch=""):
268 """ Is working tree on branch and no untracked files in path? """
269 global conf
270
271 if not branch:
272 branch = self.remote_branch
273
274 LOG.debug(" Looking for changes in path: %s" % test_path)
275
276 p_clean = True
277
278 # diff between branch head and working tree (None)
279 branch_head = self.git_repo_o.commit(branch)
280 diff = branch_head.diff(None, paths=test_path)
281
282 if diff:
283 p_clean = False
284
285 for diff_obj in diff:
286 LOG.debug(" file not on branch: %s" % diff_obj)
287
288 # remove . to compare paths using .startswith()
289 if test_path == ".":
290 test_path = ""
291
292 for u_file in self.untracked_files:
293 if u_file.startswith(test_path):
294 LOG.debug(" untracked file in path: %s" % u_file)
295 p_clean = False
296
297 return p_clean
298
299
300class RepoManifest():
301 """ parses manifest XML file used by repo tool"""
302
303 def __init__(self):
304 global args
305 global conf
306
307 self.manifest_xml = {}
308 self.repos = {}
309 self.branch = ""
310
311 self.manifest_file = os.path.abspath(
312 os.path.join(args.repo_root,
313 ".repo/manifest.xml"))
314
315 LOG.info("Loading manifest file: %s" % self.manifest_file)
316
317 try:
318 tree = ET.parse(self.manifest_file)
319 self.manifest_xml = tree.getroot()
320 except Exception:
321 LOG.exception("Error loading repo manifest")
322 sys.exit(1)
323
324 # Find the branch names
325 default = self.manifest_xml.find('default')
326
327 self.short_branch = default.attrib['revision']
328 self.remote_branch = "%s/%s" % (default.attrib['remote'],
329 default.attrib['revision'])
330
331 # Find the remote URL for these repos
332 remote = self.manifest_xml.find('remote')
333 self.remote_url = remote.attrib['review']
334
335 LOG.info("Manifest is on remote branch '%s' with remote url '%s'" %
336 (self.remote_branch, self.remote_url))
337
338 project_repos = {}
339
340 for project in self.manifest_xml.iter('project'):
341 repo_name = project.attrib['name']
342 rel_path = project.attrib['path']
343 abs_path = os.path.abspath(os.path.join(args.repo_root,
344 project.attrib['path']))
345
346 if os.path.isdir(abs_path):
347 project_repos[repo_name] = rel_path
348 else:
349 LOG.debug("Repo in manifest but not checked out: %s" %
350 repo_name)
351
352 for repo_name, repo_path in project_repos.iteritems():
353 self.repos[repo_name] = RepoRepo(repo_name, repo_path,
354 self.remote_url,
355 self.remote_branch,
356 self.short_branch)
357
358 def get_repo(self, repo_name):
359 return self.repos[repo_name]
360
361
362# DockerImage Status Constants
363
364DI_UNKNOWN = 'unknown' # unknown status
365DI_EXISTS = 'exists' # already exists in docker, has an image_id
366
367DI_BUILD = 'build' # needs to be built
368DI_FETCH = 'fetch' # needs to be fetched (pulled)
369DI_ERROR = 'error' # build or other fatal failure
370
371
372class DockerImage():
373
374 def __init__(self, name, repo_name=None, repo_d=None, path=".",
375 context=".", dockerfile='Dockerfile', labels=None,
376 tags=None, image_id=None, components=None, status=DI_UNKNOWN):
377
378 LOG.debug("New DockerImage object from name: %s" % name)
379
380 # name to pull as, usually what is provided on creation.
381 # May be changed by create_tags
382 self.raw_name = name
383
384 # Python's mutable defaults is a landmine
385 if labels is None:
386 self.labels = {}
387 else:
388 self.labels = labels
389
390 self.repo_name = repo_name
391 self.repo_d = repo_d
392 self.path = path
393 self.context = context
394 self.dockerfile = dockerfile
395 self.tags = [] # tags are added to this later in __init__
396 self.image_id = image_id
397 self.components = components
398 self.status = status
399
400 self.parent_names = [] # names of parents from _find_parent_names()
401 self.parents = [] # list of parent DockerImage object
402 self.children = [] # list of child DockerImage objects
403
404 # split name:tag if given in combined format
405 (image_name, image_tag) = split_name(name)
406 if image_tag: # has tag
407 self.name = image_name
408 self.tags.append(image_tag)
409 else: # no tag
410 self.name = image_name
411
412 # Add the build tag if exists
413 if build_tag not in self.tags:
414 self.tags.append(build_tag)
415
416 # split names from tag list
417 if tags is not None:
418 for tag in tags:
419 thistag = ""
420 (tag_name, tag_tag) = split_name(tag)
421 if tag_tag: # has name also, use just tag
422 thistag = tag_tag
423 else: # just a bare tag
424 thistag = tag_name
425
426 if thistag not in self.tags: # don't duplicate tags
427 self.tags.append(thistag)
428
429 # self.clean only applies to this container
430 self.clean = self._context_clean()
431 self._find_parent_names()
432
433 def __str__(self):
434 return self.name
435
436 def buildable(self):
437 """ Can this image be built from a Dockerfile? """
438 if self.repo_name: # has a git repo to be built from
439 return True
440 return False
441
442 def _context_clean(self):
443 """ Determine if this is repo and context is clean """
444
445 if self.buildable():
446
447 # check if on master branch
448 repo_clean = self.repo_d.clean
449
450 # only check the Docker context for cleanliness
451 context_path = os.path.normpath(
452 os.path.join(self.path, self.context))
453 context_clean = self.repo_d.path_clean(context_path)
454
455 # check of subcomponents are clean
456 components_clean = self.components_clean()
457
458 LOG.debug(" Build Context Cleanliness - "
459 "repo: %s, context: %s, components: %s" %
460 (repo_clean, context_clean, components_clean))
461
462 if context_clean and repo_clean and components_clean:
463 return True
464 else:
465 return False
466
467 return True # unbuildable images are clean
468
469 def parents_clean(self):
470 """ Returns true if self and all parents are clean """
471
472 if self.buildable():
473 if not self.clean:
474 return False
475 else:
476 for parent in self.parents:
477 if not parent.parents_clean():
478 return False
479 else:
480 return True
481
482 return True # unbuildable images are clean
483
484 def compare_labels(self, other_labels):
485 """ Returns True if image label-schema.org labels match dict """
486
487 comparable_labels_re = [
488 r".*name$",
489 r".*vcs-url$",
490 r".*vcs-ref$",
491 r".*version$",
492 ]
493
494 for clr in comparable_labels_re: # loop on all comparable labels
495 for label in self.labels: # loop on all labels
496 if re.match(clr, label) is not None: # if label matches re
497 # and label exists in other, and values are same
498 if label in other_labels and \
499 self.labels[label] == other_labels[label]:
500 pass # continue through loop
501 else:
502 LOG.info("Non-matching label: %s" % label)
503 return False # False when first difference found
504
505 LOG.debug(" All labels matched")
506 return True # only when every label matches
507
508 def same_name(self, other_name):
509 """ compare image name (possibly with tag) against image name/tag """
510
511 (o_name, o_tag) = split_name(other_name)
512
513 if o_tag is None and self.name == o_name:
514 return True
515 elif self.name == o_name and o_tag in self.tags:
516 return True
517
518 return False
519
520 def components_clean(self):
521
522 if self.buildable() and self.components is not None:
523 for component in self.components:
524 if not component['repo_d'].clean or \
525 not component['repo_d'].path_clean(component['path']):
526 return False
527
528 return True
529
530 def component_labels(self):
531 """ returns a dict of labels for subcomponents """
532
533 if self.buildable() and self.components is not None:
534
535 comp_l = {}
536
537 for component in self.components:
538
539 LOG.debug(" component %s generating child labels" %
540 component['repo_name'])
541
542 prefix = "org.opencord.component.%s." % component['repo_name']
543
544 comp_l[prefix + "vcs-url"] = component['repo_d'].git_url
545
546 if component['repo_d'].clean and \
547 component['repo_d'].path_clean(component['path']):
548 clean = True
549 else:
550 clean = False
551
552 if clean:
553 comp_l[prefix + "version"] = "%s-%s" % \
554 (self.repo_d.short_branch, self.repo_d.head_commit)
555 comp_l[prefix + "vcs-ref"] = \
556 component['repo_d'].head_commit
557 else:
558 comp_l[prefix + "version"] = "dirty"
559 comp_l[prefix + "vcs-ref"] = ""
560
561 return comp_l
562
563 return None
564
565 def child_labels(self, repo_list=None):
566 """ return a dict of labels to apply to child images """
567
568 LOG.debug(" Generating child labels from parent: %s" % self.name)
569
570 # only create labels when they haven't already been created
571 if repo_list is None:
572 repo_list = []
573
574 LOG.debug(" Already labeled with: %s" % ", ".join(repo_list))
575
576 cl = {}
577
578 if self.buildable() and self.repo_name not in repo_list:
579
580 LOG.debug(" Adding parent labels from repo: %s" % self.repo_name)
581
582 prefix = "org.opencord.component.%s." % self.repo_name
583
584 cl[prefix + "vcs-url"] = self.repo_d.git_url
585
586 if self.clean:
587 cl[prefix + "version"] = "%s-%s" % (self.repo_d.short_branch,
588 self.repo_d.head_commit)
589 cl[prefix + "vcs-ref"] = self.repo_d.head_commit
590 else:
591 cl[prefix + "version"] = "dirty"
592 cl[prefix + "vcs-ref"] = ""
593
594 repo_list.append(self.repo_name)
595
596 # include component labels if present
597 if self.components is not None:
598 cl.update(self.component_labels())
599
600 # recursively find labels up the parent chain
601 if self.parents is not None:
602 for parent in self.parents:
603 cl.update(parent.child_labels(repo_list))
604
605 return cl
606
607 def create_labels(self):
608 """ Create label-schema.org labels for image """
609
610 if self.buildable():
611
612 LOG.debug("Creating labels for: %s" % self.name)
613
614 self.labels['org.label-schema.name'] = self.name
615 self.labels['org.label-schema.schema-version'] = "1.0"
616
617 # org.label-schema.build-date
618 time_now = datetime.datetime.utcnow()
619 build_date = time_now.strftime("%Y-%m-%dT%H:%M:%SZ")
620 self.labels['org.label-schema.build-date'] = build_date
621
622 # git version related labels
623 self.labels['org.label-schema.vcs-url'] = self.repo_d.git_url
624
625 if self.clean:
626 self.labels['org.label-schema.version'] = \
627 "%s-%s" % (self.repo_d.short_branch,
628 self.repo_d.head_commit)
629 self.labels['org.label-schema.vcs-ref'] = \
630 self.repo_d.head_commit
631 self.labels['org.opencord.vcs-commit-date'] = \
632 self.repo_d.head_commit_t
633 else:
634 self.labels['org.label-schema.version'] = "dirty"
635 self.labels['org.label-schema.vcs-ref'] = ""
636
637 # include component labels if present
638 if self.components is not None:
639 self.labels.update(self.component_labels())
640
641 def create_tags(self):
642 """ Create docker tags as needed """
643
644 if self.buildable():
645 LOG.debug("Creating tags for image: %s" % self.name)
646
647 # if clean and parents clean, add tags for branch/commit
648 if self.parents_clean():
649
650 # add build tag
651 if build_tag not in self.tags:
652 self.tags.append(build_tag)
653
654 # add branch tag
655 branch_tag = self.repo_d.short_branch
656 if branch_tag not in self.tags:
657 self.tags.append(branch_tag)
658
659 # Add <branch>-<commit> tag, which is used to pull
660 commit_tag = "%s-%s" % (self.repo_d.short_branch,
661 self.repo_d.head_commit)
662 if commit_tag not in self.tags:
663 self.tags.append(commit_tag)
664
665 # this is most specific tag, so pull using it
666 self.raw_name = "%s:%s" % (self.name, commit_tag)
667
668 # add all tags in git that point at the commit
669 for gt in self.repo_d.git_tags:
670 if gt not in self.tags:
671 self.tags.append(gt)
672
673 LOG.debug("All tags: %s" % ", ".join(self.tags))
674
675 def _find_parent_names(self):
676 """ set self.parent_names using Dockerfile FROM lines """
677
678 if self.buildable():
679 # read contents of Dockerfile into df
680 with open(self.dockerfile_abspath()) as dfh:
681 dfl = dfh.readlines()
682
683 parent_names = []
684 frompatt = re.compile(r'^FROM\s+([\w/_:.-]+)', re.MULTILINE)
685
686 for line in dfl:
687 fromline = re.search(frompatt, line)
688 if fromline:
689 parent_names.append(fromline.group(1))
690
691 self.parent_names = parent_names # may have tag
692
693 LOG.debug(" Parents: %s" % ", ".join(self.parent_names))
694
695 def dockerfile_abspath(self):
696 """ returns absolute path to Dockerfile for this image """
697
698 if self.buildable():
699 return os.path.join(self.repo_d.abspath(),
700 self.path, self.dockerfile)
701 else:
702 return None
703
704 def dockerfile_rel_path(self):
705 """ returns the path relative to the context of the Dockerfile """
706
707 if self.buildable():
708 if self.context is ".":
709 return self.dockerfile
710 else:
711 return os.path.normpath(os.path.join(self.path,
712 self.dockerfile))
713 else:
714 return None
715
716 def context_tarball(self):
717 """ returns a filehandle to a tarball (tempfile) for the image """
718
719 if self.buildable():
720
721 context_path = os.path.normpath(
722 os.path.join(self.repo_d.abspath(),
723 self.path, self.context))
724
725 LOG.info("Creating context tarball of path: %s" % context_path)
726
727 t_fh = tempfile.NamedTemporaryFile()
728 t = tarfile.open(mode='w', fileobj=t_fh, dereference=True)
729
730 # exclude git directories anywhere in the context
731 exclusion_list = ['**/.git']
732
733 docker_ignore = os.path.join(context_path, '.dockerignore')
734 if os.path.exists(docker_ignore):
735 for line in open(docker_ignore).readlines():
736 # slightly out of spec, we allow whitespace before comments
737 # https://docs.docker.com/engine/reference/builder/#dockerignore-file
738 if line.strip()[0] is not '#':
739 exclusion_list.append(line.strip().rstrip('\/'))
740
741 LOG.debug("Exclusion list: %s" % exclusion_list)
742
743 # see docker-py source for context
744 for path in sorted(
745 DockerUtils.exclude_paths(context_path, exclusion_list)):
746 t.add(os.path.join(context_path, path),
747 arcname=path,
748 recursive=False)
749
750 # add sub-components to tarball if required
751 if self.components is not None:
752 for component in self.components:
753 c_ctx_p = os.path.normpath(
754 os.path.join(component['repo_d'].abspath(),
755 component['path']))
756
757 LOG.info("Adding component %s at context %s" %
758 (component['repo_name'], c_ctx_p))
759
760 # walk component source path
761 for path in sorted(
762 DockerUtils.exclude_paths(c_ctx_p, exclusion_list)):
763
764 # path to where to put files in the archive
765 cf_dest = os.path.normpath(
766 os.path.join(component['dest'], path))
767
768 t.add(os.path.join(c_ctx_p, path),
769 arcname=cf_dest,
770 recursive=False)
771
772 # t.list() # prints all files in tarball
773 t.close()
774 t_fh.seek(0)
775 return t_fh
776
777 else:
778 return None
779
780 def buildargs(self):
781 """ returns array of labels in docker buildargs compliant format """
782 ba_a = {}
783
784 for label_k in self.labels:
785 ba_re = re.compile(r'\W') # non alpha/num/_ chars
786 ba_label = ba_re.sub('_', label_k)
787 ba_a[ba_label] = self.labels[label_k]
788
789 return ba_a
790
791
792class DockerBuilder():
793
794 def __init__(self, repo_manifest):
795
796 global buildable_images
797 global pull_only_images
798
799 self.rm = repo_manifest
800 self.dc = None # Docker Client object
801
802 self.images = []
803
804 # arrays of images, used for write_actions
805 self.preexisting = []
806 self.obsolete = []
807 self.pulled = []
808 self.failed_pull = []
809 self.obsolete_pull = []
810 self.built = []
811 self.failed_build = []
812
813 # create dict of images, setting defaults
814 for image in buildable_images:
815
816 repo_d = self.rm.get_repo(image['repo'])
817
818 if "components" in image:
819 components = []
820
821 for component in image['components']:
822 comp = {}
823 comp['repo_name'] = component['repo']
824 comp['repo_d'] = self.rm.get_repo(component['repo'])
825 comp['dest'] = component['dest']
826 comp['path'] = component.get('path', '.')
827 components.append(comp)
828 else:
829 components = None
830
831 # set the full name in case this is pulled
832 full_name = "%s:%s" % (image['name'], build_tag)
833
834 img_o = DockerImage(full_name, image['repo'], repo_d,
835 image.get('path', '.'),
836 image.get('context', '.'),
837 image.get('dockerfile', 'Dockerfile'),
838 components=components)
839
840 self.images.append(img_o)
841
842 # add misc images
843 for misc_image in pull_only_images:
844 img_o = DockerImage(misc_image)
845 self.images.append(img_o)
846
847 if not args.dry_run:
848 self._docker_connect()
849
850 self.create_dependency()
851
852 if not args.build: # if forcing build, don't use preexisting
853 self.find_preexisting()
854
855 if args.graph is not None:
856 self.dependency_graph(args.graph)
857
858 self.process_images()
859
860 if args.actions_taken is not None:
861 self.write_actions_file(args.actions_taken)
862
863 def _docker_connect(self):
864 """ Connect to docker daemon """
865
866 try:
867 # get a "high level" Docker object with conf from the environment
868 hl_dc = docker.from_env()
869 # use the low level APIClient (same as the 1.x API)
870 self.dc = hl_dc.api
871 except requests.ConnectionError:
872 LOG.debug("Docker connection not available")
873 sys.exit(1)
874
875 if self.dc.ping():
876 LOG.debug("Docker server is responding")
877 else:
878 LOG.error("Unable to ping docker server")
879 sys.exit(1)
880
881 def find_preexisting(self):
882 """ find images that already exist in Docker and mark """
883
884 if self.dc:
885 LOG.debug("Evaluating already built/fetched Docker images")
886
887 # get list of images from docker
888 pe_images = self.dc.images()
889
890 for pe_image in pe_images:
891 raw_tags = pe_image['RepoTags']
892
893 if raw_tags:
894 LOG.info("Preexisting Image - ID: %s, tags: %s" %
895 (pe_image['Id'], ",".join(raw_tags)))
896
897 has_build_tag = False
898 for tag in raw_tags:
899 if build_tag in tag:
900 LOG.debug(" image has build_tag: %s" % build_tag)
901 has_build_tag = True
902
903 base_name = raw_tags[0].split(":")[0]
904 image = self.find_image(base_name)
905
906 # only evaluate images in the list of desired images
907 if image is not None:
908
909 good_labels = image.compare_labels(pe_image['Labels'])
910
911 if good_labels:
912 if has_build_tag:
913 LOG.info(" Image %s has up-to-date labels and"
914 " build_tag" % pe_image['Id'])
915 else:
916 LOG.info(" Image %s has up-to-date labels but"
917 " missing build_tag. Tagging image"
918 " with build_tag: %s" %
919 (pe_image['Id'], build_tag))
920
921 self.dc.tag(pe_image['Id'], image.name,
922 tag=build_tag)
923
924 self.preexisting.append({
925 'id': pe_image['Id'],
926 'tags': raw_tags,
927 'base': image.name.split(":")[0],
928 })
929
930 image.image_id = pe_image['Id']
931 image.status = DI_EXISTS
932
933 else: # doesn't have good labels
934
935 # if it has a build_tag, and a good image hasn't
936 # already been tagged
937 if has_build_tag and (image.status != DI_EXISTS):
938 LOG.info(" Image %s has obsolete labels and"
939 " build_tag, remove" % pe_image['Id'])
940
941 # remove build_tag from image
942 name_bt = "%s:%s" % (base_name, build_tag)
943 self.dc.remove_image(name_bt, args.force, True)
944
945 else:
946 LOG.info(" Image %s has obsolete labels, lacks"
947 " build_tag, ignore" % pe_image['Id'])
948
949 self.obsolete.append({
950 'id': pe_image['Id'],
951 'tags': raw_tags,
952 })
953
954 def find_image(self, image_name):
955 """ return image object matching name """
956 LOG.debug(" attempting to find image for: %s" % image_name)
957
958 for image in self.images:
959 if image.same_name(image_name):
960 LOG.debug(" found a match: %s" % image.raw_name)
961 return image
962 return None
963
964 def create_dependency(self):
965 """ set parent/child links for images """
966
967 # List of lists of parents images. Done in two steps for clarity
968 lol_of_parents = [img.parent_names for img in self.images
969 if img.parent_names is not []]
970
971 # flat list of all parent image names, with dupes
972 parents_with_dupes = [parent for parent_sublist in lol_of_parents
973 for parent in parent_sublist]
974
975 # remove duplicates
976 parents = list(set(parents_with_dupes))
977
978 LOG.info("All parent images: %s" % ", ".join(parents))
979
980 # list of "external parents", ones not built internally
981 external_parents = []
982
983 for parent_name in parents:
984 LOG.debug("Evaluating parent image: %s" % parent_name)
985 internal_parent = False
986
987 # match on p_name, without tag
988 (p_name, p_tag) = split_name(parent_name)
989
990 for image in self.images:
991 if image.same_name(p_name): # internal image is a parent
992 internal_parent = True
993 LOG.debug(" Internal parent: %s" % image.name)
994 break
995
996 if not internal_parent: # parent is external
997 LOG.debug(" External parent: %s" % parent_name)
998 external_parents.append(parent_name)
999
1000 # add unique external parents to image list
1001 for e_p_name in set(external_parents):
1002 LOG.debug(" Creating external parent image object: %s" % e_p_name)
1003 img_o = DockerImage(e_p_name)
1004 self.images.append(img_o)
1005
1006 # now that all images (including parents) are in list, associate them
1007 for image in filter(lambda img: img.parent_names is not [],
1008 self.images):
1009
1010 LOG.debug("Associating image: %s" % image.name)
1011
1012 for parent_name in image.parent_names:
1013
1014 parent = self.find_image(parent_name)
1015 image.parents.append(parent)
1016
1017 if parent is not None:
1018 LOG.debug(" internal image '%s' is parent of '%s'" %
1019 (parent.name, image.name))
1020 parent.children.append(image)
1021
1022 else:
1023 LOG.debug(" external image '%s' is parent of '%s'" %
1024 (image.parent_name, image.name))
1025
1026 # loop again now that parents are linked to create labels
1027 for image in self.images:
1028 image.create_labels()
1029 image.create_tags()
1030
1031 # if image has parent, get labels from parent(s)
1032 if image.parents is not None:
1033 for parent in image.parents:
1034 LOG.debug("Adding parent labels from %s to child %s" %
1035 (parent.name, image.name))
1036
1037 # don't create component labels for same repo as image
1038 repo_list = [image.repo_name]
1039 image.labels.update(parent.child_labels(repo_list))
1040
1041 def dependency_graph(self, graph_fn):
1042 """ save a DOT dependency graph to a file """
1043
1044 graph_fn_abs = os.path.abspath(graph_fn)
1045
1046 LOG.info("Saving DOT dependency graph to: %s" % graph_fn_abs)
1047
1048 try:
1049 import graphviz
1050 except ImportError:
1051 LOG.error('graphviz pip module not found')
1052 raise
1053
1054 dg = graphviz.Digraph(comment='Image Dependency Graph',
1055 graph_attr={'rankdir': 'LR'})
1056
1057 component_nodes = []
1058
1059 # Use raw names, so they match with what's in Dockerfiles
1060 # delete colons as python graphviz module breaks with them
1061 for image in self.images:
1062 name_g = image.raw_name.replace(':', '\n')
1063 dg.node(name_g)
1064
1065 if image.parents is not None:
1066 for parent in image.parents:
1067 name_p = parent.raw_name.replace(':', '\n')
1068 dg.edge(name_p, name_g)
1069
1070 if image.components is not None:
1071 for component in image.components:
1072 name_c = "component - %s" % component['repo_name']
1073 if name_c not in component_nodes:
1074 dg.node(name_c)
1075 component_nodes.append(name_c)
1076 dg.edge(name_c, name_g, "", {'style': 'dashed'})
1077
1078 with open(graph_fn_abs, 'w') as g_fh:
1079 g_fh.write(dg.source)
1080
1081 def write_actions_file(self, actions_fn):
1082
1083 actions_fn_abs = os.path.abspath(actions_fn)
1084
1085 LOG.info("Saving actions as YAML to: %s" % actions_fn_abs)
1086
1087 actions = {
1088 "ib_pulled": self.pulled,
1089 "ib_built": self.built,
1090 "ib_preexisting_images": self.preexisting,
1091 "ib_obsolete_images": self.obsolete,
1092 "ib_failed_pull": self.failed_pull,
1093 "ib_obsolete_pull": self.obsolete_pull,
1094 "ib_failed_build": self.failed_build,
1095 }
1096
1097 with open(actions_fn_abs, 'w') as a_fh:
1098 yaml.safe_dump(actions, a_fh)
1099 LOG.debug(yaml.safe_dump(actions))
1100
1101 def process_images(self):
1102
1103 """ determine whether to build/fetch images """
1104 # upstream images (have no parents), must be fetched
1105 must_fetch_a = filter(lambda img: not img.parents, self.images)
1106
1107 for image in must_fetch_a:
1108 if image.status is not DI_EXISTS:
1109 image.status = DI_FETCH
1110
1111 # images that can be built or fetched (have parents)
1112 b_or_f_a = filter(lambda img: img.parents, self.images)
1113
1114 for image in b_or_f_a:
1115 if not image.parents_clean() or args.build:
1116 # must be built if not clean
1117 image.status = DI_BUILD
1118 elif image.status is not DI_EXISTS:
1119 # try to fetch if clean and doesn't exist
1120 image.status = DI_FETCH
1121 # otherwise, image is clean and exists (image.status == DI_EXISTS)
1122
1123 c_and_e_a = filter(lambda img: img.status is DI_EXISTS, self.images)
1124 LOG.info("Preexisting and clean images: %s" %
1125 ", ".join(c.name for c in c_and_e_a))
1126
1127 upstream_a = filter(lambda img: (img.status is DI_FETCH and
1128 not img.parents), self.images)
1129 LOG.info("Upstream images that must be fetched: %s" %
1130 ", ".join(u.raw_name for u in upstream_a))
1131
1132 fetch_a = filter(lambda img: (img.status is DI_FETCH and
1133 img.parents), self.images)
1134 LOG.info("Clean, buildable images to attempt to fetch: %s" %
1135 ", ".join(f.raw_name for f in fetch_a))
1136
1137 build_a = filter(lambda img: img.status is DI_BUILD, self.images)
1138 LOG.info("Buildable images, due to unclean context or parents: %s" %
1139 ", ".join(b.raw_name for b in build_a))
1140
1141 # OK to fetch upstream in any case as they should reduce number of
1142 # layers pulled/built later
1143
1144 for image in upstream_a:
1145 if not self._fetch_image(image):
1146 LOG.error("Unable to fetch upstream image: %s" %
1147 image.raw_name)
1148 sys.exit(1)
1149
1150 # fetch if not forcing the build of all images
1151 if not args.build:
1152 fetch_sort = sorted(fetch_a, key=(lambda img: len(img.children)),
1153 reverse=True)
1154
1155 for image in fetch_sort:
1156 if not self._fetch_image(image):
1157 # if didn't fetch, build
1158 image.status = DI_BUILD
1159
1160 while True:
1161 buildable_images = self.get_buildable()
1162
1163 if buildable_images and args.pull:
1164 LOG.error("Images must be built, but --pull is specified")
1165 exit(1)
1166
1167 if buildable_images:
1168 for image in buildable_images:
1169 self._build_image(image)
1170 else:
1171 LOG.debug("No more images to build, ending build loop")
1172 break
1173
1174 def get_buildable(self):
1175 """ Returns list of images that can be built"""
1176
1177 buildable = []
1178
1179 for image in filter(lambda img: img.status is DI_BUILD, self.images):
1180 for parent in image.parents:
1181 if parent.status is DI_EXISTS:
1182 if image not in buildable: # build once if two parents
1183 buildable.append(image)
1184
1185 LOG.debug("Buildable images: %s" %
1186 ', '.join(image.name for image in buildable))
1187
1188 return buildable
1189
1190 def tag_image(self, image):
1191 """ Applies tags to an image """
1192
1193 for tag in image.tags:
1194
1195 LOG.info("Tagging id: '%s', repo: '%s', tag: '%s'" %
1196 (image.image_id, image.name, tag))
1197
1198 if self.dc is not None:
1199 self.dc.tag(image.image_id, image.name, tag=tag)
1200
1201 def _fetch_image(self, image):
1202
1203 LOG.info("Attempting to fetch docker image: %s" % image.raw_name)
1204
1205 if self.dc is not None:
1206 try:
1207 for stat_json in self.dc.pull(image.raw_name,
1208 stream=True):
1209
1210 # sometimes Docker's JSON is dirty, per:
1211 # https://github.com/docker/docker-py/pull/1081/
1212 stat_s = stat_json.strip()
1213 stat_list = stat_s.split("\r\n")
1214
1215 for s_j in stat_list:
1216 stat_d = json.loads(s_j)
1217
1218 if 'stream' in stat_d:
1219 for stat_l in stat_d['stream'].split('\n'):
1220 LOG.debug(stat_l)
1221
1222 if 'status' in stat_d:
1223 for stat_l in stat_d['status'].split('\n'):
1224 noisy = ["Extracting", "Downloading",
1225 "Waiting", "Download complete",
1226 "Pulling fs layer", "Pull complete",
1227 "Verifying Checksum",
1228 "Already exists"]
1229 if stat_l in noisy:
1230 LOG.debug(stat_l)
1231 else:
1232 LOG.info(stat_l)
1233
1234 if 'error' in stat_d:
1235 LOG.error(stat_d['error'])
1236 sys.exit(1)
1237
1238 except (DockerErrors.NotFound, DockerErrors.ImageNotFound) as e:
1239 LOG.warning("Image could not be pulled: %s" % e)
1240
1241 self.failed_pull.append({
1242 "tags": [image.raw_name, ],
1243 })
1244
1245 if not image.parents:
1246 LOG.error("Pulled image required to build, not available!")
1247 sys.exit(1)
1248
1249 return False
1250
1251 except:
1252 LOG.exception("Error pulling docker image")
1253
1254 self.failed_pull.append({
1255 "tags": [image.raw_name, ],
1256 })
1257
1258 return False
1259
1260 # obtain the image_id by inspecting the pulled image. Seems unusual
1261 # that the Docker API `pull` method doesn't provide it when the
1262 # `build` method does
1263 pulled_image = self.dc.inspect_image(image.raw_name)
1264
1265 # check to make sure that image that was downloaded has the labels
1266 # that we expect it to have, otherwise return false, trigger build
1267 if not image.compare_labels(
1268 pulled_image['ContainerConfig']['Labels']):
1269 LOG.info("Tried fetching image %s, but labels didn't match" %
1270 image.raw_name)
1271
1272 self.obsolete_pull.append({
1273 "id": pulled_image['Id'],
1274 "tags": pulled_image['RepoTags'],
1275 })
1276 return False
1277
1278 image.image_id = pulled_image['Id']
1279 LOG.info("Fetched image %s, id: %s" %
1280 (image.raw_name, image.image_id))
1281
1282 self.pulled.append({
1283 "id": pulled_image['Id'],
1284 "tags": pulled_image['RepoTags'],
1285 "base": image.name.split(":")[0],
1286 })
1287
1288 self.tag_image(image)
1289 image.status = DI_EXISTS
1290 return True
1291
1292 def _build_image(self, image):
1293
1294 global build_tag
1295
1296 LOG.info("Building docker image for %s" % image.raw_name)
1297
1298 if self.dc is not None:
1299
1300 image_build_tag = "%s:%s" % (image.name, build_tag)
1301
1302 buildargs = image.buildargs()
1303 context_tar = image.context_tarball()
1304 dockerfile = image.dockerfile_rel_path()
1305
1306 for key, val in buildargs.iteritems():
1307 LOG.debug("Buildarg - %s : %s" % (key, val))
1308
1309 bl_path = ""
1310 start_time = datetime.datetime.utcnow()
1311
1312 if(args.build_log_dir):
1313 bl_name = "%s_%s" % (start_time.strftime("%Y%m%dT%H%M%SZ"),
1314 re.sub(r'\W', '_', image.name))
1315 bl_path = os.path.abspath(
1316 os.path.join(args.build_log_dir, bl_name))
1317
1318 LOG.info("Build log: %s" % bl_path)
1319 bl_fh = open(bl_path, 'w+', 0) # 0 = unbuffered writes
1320 else:
1321 bl_fh = None
1322
1323 try:
1324 LOG.info("Building image: %s" % image)
1325
1326 buildparams = dict(
1327 tag=image_build_tag,
1328 buildargs=buildargs,
1329 nocache=args.build,
1330 custom_context=True,
1331 fileobj=context_tar,
1332 dockerfile=dockerfile,
1333 rm=True,
1334 forcerm=True,
1335 pull=False,
1336 decode=True
1337 )
1338
1339 if DOCKER_PY_VERSION == 2:
1340 buildparams['stream'] = True
1341
1342 for stat_d in self.dc.build(**buildparams):
1343
1344 if 'stream' in stat_d:
1345
1346 if bl_fh:
1347 bl_fh.write(stat_d['stream'].encode('utf-8'))
1348
1349 for stat_l in stat_d['stream'].split('\n'):
1350 if(stat_l):
1351 LOG.debug(stat_l)
1352 if stat_d['stream'].startswith("Successfully built "):
1353 siid = stat_d['stream'].split(' ')[2]
1354 short_image_id = siid.strip()
1355 LOG.debug("Short Image ID: %s" % short_image_id)
1356
1357 if 'status' in stat_d:
1358 for stat_l in stat_d['status'].split('\n'):
1359 if(stat_l):
1360 LOG.info(stat_l)
1361
1362 if 'error' in stat_d:
1363 LOG.error(stat_d['error'])
1364 image.status = DI_ERROR
1365 sys.exit(1)
1366
1367 except:
1368 LOG.exception("Error building docker image")
1369
1370 self.failed_build.append({
1371 "tags": [image_build_tag, ],
1372 })
1373
1374 return
1375
1376 finally:
1377 if(bl_fh):
1378 bl_fh.close()
1379
1380 # the image ID given by output isn't the full SHA256 id, so find
1381 # and set it to the full one
1382 built_image = self.dc.inspect_image(short_image_id)
1383 image.image_id = built_image['Id']
1384
1385 end_time = datetime.datetime.utcnow()
1386 duration = end_time - start_time # duration is a timedelta
1387
1388 LOG.info("Built Image: %s, duration: %s, id: %s" %
1389 (image.name, duration, image.image_id))
1390
1391 self.tag_image(image)
1392
1393 # don't push the build_tag to dockerhub
1394 built_tags = list(image.tags)
1395 built_tags.remove(build_tag)
1396
1397 self.built.append({
1398 "id": image.image_id,
1399 "tags": built_tags,
1400 "build_log": bl_path,
1401 "duration": duration.total_seconds(),
1402 "base": image.name.split(":")[0],
1403 })
1404
1405 image.status = DI_EXISTS
1406
1407
1408if __name__ == "__main__":
1409 parse_args()
1410 load_config()
1411
1412 # only include docker module if not a dry run
1413 if not args.dry_run:
1414 try:
1415 import requests
1416 from distutils.version import LooseVersion
1417 from docker import __version__ as docker_version
1418
1419 # handle the docker-py v1 to v2 API differences
1420 if LooseVersion(docker_version) < LooseVersion('2.0.0'):
1421 LOG.error("Unsupported python docker module - "
1422 "remove docker-py 1.x, install docker 2.x")
1423 sys.exit(1)
1424
1425 DOCKER_PY_VERSION = 2
1426 if LooseVersion(docker_version) >= LooseVersion('3.0.0'):
1427 DOCKER_PY_VERSION = 3
1428
1429 import docker
1430 from docker import utils as DockerUtils
1431 from docker import errors as DockerErrors
1432
1433 except ImportError:
1434 LOG.error("Unable to load python docker module (dry run?)")
1435 sys.exit(1)
1436
1437 rm = RepoManifest()
1438 db = DockerBuilder(rm)