Static Jenkins Site Generator
- Private Jenkins job scraping w/API key
- Added Gilroy font to match main public website
- Link back to ONF website for products
- Add more products
Change-Id: I3ed2dc1e371c564ee483ab83fd110a88d818bca7
diff --git a/buildcollector.py b/buildcollector.py
new file mode 100644
index 0000000..7091197
--- /dev/null
+++ b/buildcollector.py
@@ -0,0 +1,443 @@
+#!/usr/bin/env python3
+
+# SPDX-FileCopyrightText: © 2020 Open Networking Foundation <support@opennetworking.org>
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import absolute_import
+
+import argparse
+import base64
+import json
+import logging
+import os
+import re
+import sys
+import urllib.request
+import yaml
+
+from jsonpath_ng.ext import parse as jpparse
+
+# create shared logger
+logging.basicConfig()
+logger = logging.getLogger("sjsgc")
+
+# global dict of jsonpath expressions -> compiled jsonpath parsers, as
+# reparsing expressions in each loop results in 100x longer execution time
+gjpaths = {}
+
+# credentials global
+
+
+def parse_collector_args():
+ """
+ parse CLI arguments
+ """
+
+ parser = argparse.ArgumentParser(description="Jenkins job results collector")
+
+ # Positional args
+ parser.add_argument(
+ "scrape_file",
+ default="scrape.yaml",
+ type=argparse.FileType("r"),
+ help="YAML file describing Jenkins job and data to scrape",
+ )
+
+ # Flags
+ parser.add_argument(
+ "--credentials",
+ type=argparse.FileType("r"),
+ help="Credentials to use for private jenkins jobs",
+ )
+
+ parser.add_argument(
+ "--local", action="store_true", help="Prefer local copies of build lists"
+ )
+
+ parser.add_argument(
+ "--product_dir", default="products", help="Directory to save per-product output"
+ )
+
+ parser.add_argument(
+ "--jobs_dir", default="jobs", help="Directory to save raw Jenkins job output"
+ )
+
+ parser.add_argument(
+ "--debug", action="store_true", help="Print additional debugging information"
+ )
+
+ return parser.parse_args()
+
+
+def jenkins_job_list_url(server_url, job_name):
+ """
+ create a Jenkins JSON API URL for a job (list of builds)
+ """
+
+ url = "%s/job/%s/api/json" % (server_url, job_name)
+ return url
+
+
+def jenkins_job_build_url(server_url, job_name, build_number):
+ """
+ create a Jenkins JSON API URL for a specific build of a job
+ """
+
+ url = "%s/job/%s/%d/api/json" % (server_url, job_name, build_number)
+ return url
+
+
+def basic_auth_header(username, password):
+ """
+ returns a tuple containing a http basic auth header
+ """
+ creds_str = "%s:%s" % (username, password)
+ creds_b64 = base64.standard_b64encode(creds_str.encode("utf-8"))
+
+ return ("Authorization", "Basic %s" % creds_b64.decode("utf-8"))
+
+
+def jenkins_api_get(url, headers=[]):
+ """
+ Get data from Jenkins JSON API endpoint, return data as a dict
+ """
+
+ request = urllib.request.Request(url)
+
+ # add headers tuples
+ for header in headers:
+ request.add_header(*header)
+
+ try:
+ response = urllib.request.urlopen(request)
+ except urllib.error.HTTPError:
+ logger.exception("Server encountered an HTTPError at URL: '%s'", url)
+ except urllib.error.URLError:
+ logger.exception("An URLError occurred at URL: '%s'", url)
+ else:
+ # docs: https://docs.python.org/3/library/json.html
+ jsondata = response.read()
+ logger.debug("API response: %s", jsondata)
+
+ try:
+ data = json.loads(jsondata)
+ except json.decoder.JSONDecodeError:
+ logger.exception("Unable to decode JSON")
+ else:
+ logger.debug("JSON decoded: %s", data)
+
+ return data
+
+
+def json_file_load(path):
+ """
+ Get data from local file, return data as a dict
+ """
+
+ with open(path) as jf:
+ try:
+ data = json.loads(jf.read())
+ except json.decoder.JSONDecodeError:
+ logger.exception("Unable to decode JSON from file: '%s'", path)
+
+ return data
+
+
+def json_file_dump(path, data):
+ """
+ Write JSON file out to a path, creating directories in path as needed
+ """
+
+ # create directory if it doesn't already exist
+ parent_dir = os.path.dirname(path)
+ os.makedirs(parent_dir, exist_ok=True)
+
+ # write file, pretty printed
+ with open(path, "w") as jf:
+ json.dump(data, jf, indent=2)
+
+
+def parse_scrape_file(scrape_file):
+ """
+ Load and check the YAML scrape file, returning a list one or more documents
+ """
+
+ yout = list(yaml.safe_load_all(scrape_file)) # safe_load_all returns a generator
+ logger.debug("YAML decoded: %s", yout)
+
+ def check_required_keys(to_check, req_keys):
+ """
+ check that all required keys are found in the dict to check
+ """
+ for rk in req_keys:
+ if rk not in to_check:
+ logger.error("Required key '%s' not found in: '%s'", rk, to_check)
+ sys.exit(1)
+
+ # check that required keys exist in each YAML document
+ for ydoc in yout:
+ check_required_keys(ydoc, ["jenkins_jobs", "product_name", "onf_project"])
+
+ for group in ydoc["jenkins_jobs"]:
+ check_required_keys(group, ["group", "jenkins_url", "jobs"])
+
+ for job in group["jobs"]:
+ check_required_keys(job, ["name", "extract"])
+
+ return yout
+
+
+def jsonpath_extract(json_in, extract_list):
+ """
+ Extract data from json using list of jsonpath expressions
+ """
+
+ ret = {}
+
+ for name, jpath in extract_list.items():
+
+ # parsing jsonpath is expensive, store in global of parsed
+ # jsonpath expressions
+ if jpath not in gjpaths:
+ gjpaths[jpath] = jpparse(jpath)
+
+ jexpr = gjpaths[jpath]
+
+ matches = [match.value for match in jexpr.find(json_in)]
+
+ # If only a single match, unwrap from list
+ if len(matches) == 1:
+ ret[name] = matches[0]
+ else:
+ ret[name] = matches
+
+ logger.debug("extracted data: %s", ret)
+
+ return ret
+
+
+def get_builds_for_job(jobs_dir, local, jenkins_url, job_name, headers=[]):
+ """
+ Download list of builds from a Jenkins job, return list of build ids
+ """
+
+ # where to store jenkins JSON output with builds list
+ jbuildlist = "%s/%s/%s/0_list.json" % (jobs_dir, clean_url(jenkins_url), job_name)
+
+ if os.path.isfile(jbuildlist) and local:
+ # if already downlaoded and want to use the local copy, load it
+ jl = json_file_load(jbuildlist)
+ else:
+ # if not, query jenkins for the list of job builds
+ jlu = jenkins_job_list_url(jenkins_url, job_name)
+ jl = jenkins_api_get(jlu, headers)
+
+ # save to disk
+ json_file_dump(jbuildlist, jl)
+
+ # JSONPath for list of builds in the job
+ jexpr = jpparse("builds[*].number")
+
+ # get a list of builds
+ buildlist = [build.value for build in jexpr.find(jl)]
+
+ return buildlist
+
+
+def get_jenkins_build(jobs_dir, jenkins_url, job_name, build_id, headers=[]):
+ """
+ Download a single build and store it on disk, if job has completed
+ """
+
+ # path to store a copy of the JSON recieved by Jenkins
+ jjson = "%s/%s/%s/%d_build.json" % (
+ jobs_dir,
+ clean_url(jenkins_url),
+ job_name,
+ build_id,
+ )
+
+ if os.path.isfile(jjson):
+ # if have already run and local copy exists, read/return local copy
+ braw = json_file_load(jjson)
+ else:
+ # make an API call to get the JSON, store locally
+ burl = jenkins_job_build_url(jenkins_url, job_name, build_id)
+ braw = jenkins_api_get(burl, headers)
+
+ # if build is still going on the result field is null, so don't return
+ # build or save a copy, as build status is not final.
+ if not braw["result"]:
+ return None
+
+ # save to disk
+ json_file_dump(jjson, braw)
+
+ return braw
+
+
+def get_all_jenkins_builds(jobs_dir, jenkins_url, job_name, build_ids, headers=[]):
+ """
+ Get a list of all jenkins build data, for completed builds
+ """
+
+ builds_list = []
+
+ # download build data for all builds
+ for build_id in build_ids:
+
+ build = get_jenkins_build(
+ args.jobs_dir, jobgroup["jenkins_url"], job["name"], build_id, headers,
+ )
+
+ # may return None if build is in progress
+ if build:
+ builds_list.append(build)
+
+ return builds_list
+
+
+def clean_name(name):
+ """
+ Clean up a name string. Currently only replaces spaces with underscores
+ """
+ return name.replace(" ", "_")
+
+
+def clean_url(url):
+ """
+ remove prefix and any non-path friendly characters from URL
+ """
+ return re.sub(r"\W", "_", re.sub(r"\w+://", "", url))
+
+
+def save_product_builds(product_doc, product_dir, builds):
+ """
+ save the product-specific build information, if it's applicable to this
+ product based on the filters
+ """
+
+ # duplicate the scrape doc into final product data
+ product_data = dict(product_doc)
+
+ # used to hold groups of jobs
+ groups = {}
+
+ # each doc can have multiple job groups (usually version-specific)
+ for jobgroup in product_doc["jenkins_jobs"]:
+
+ groups[jobgroup["group"]] = {}
+
+ # each job group can have multiple jobs
+ for job in jobgroup["jobs"]:
+
+ pbuilds = []
+
+ # get the build data for the job
+ for build in builds[job["name"]]:
+
+ jpedata = jsonpath_extract(build, job["extract"])
+
+ # filter builds
+ save = True
+ if "filter" in job:
+ for k, v in job["filter"].items():
+ # if data doesn't match the filter value given, don't save it
+ if jpedata[k] != v:
+ save = False
+
+ if save:
+ pbuilds.append(jpedata)
+
+ # allow job name to be overridden, for private jobs
+ if "name_override" in job:
+ groups[jobgroup["group"]][job["name_override"]] = pbuilds
+ else:
+ groups[jobgroup["group"]][job["name"]] = pbuilds
+
+ product_data["groups"] = groups
+
+ product_filename = "%s/%s.json" % (
+ product_dir,
+ clean_name(product_doc["product_name"]),
+ )
+
+ json_file_dump(product_filename, product_data)
+
+
+# main function that calls other functions
+if __name__ == "__main__":
+
+ args = parse_collector_args()
+
+ if not os.path.isdir(args.product_dir):
+ logger.error("Output directory is not a directory: '%s'", args.product_dir)
+ sys.exit(1)
+
+ # only print log messages if debugging
+ if args.debug:
+ logger.setLevel(logging.DEBUG)
+ else:
+ logger.setLevel(logging.CRITICAL)
+
+ # read in credentials file if option if argument passed
+ credentials = {}
+ if args.credentials:
+ cred_file = yaml.safe_load(args.credentials)
+ credentials = cred_file["credentials"]
+
+ # read in the Scrape File
+ sfile = parse_scrape_file(args.scrape_file)
+
+ # dict of job name -> build data
+ builds = {}
+
+ # Scrape File YAML may contain multiple documents
+ for sdoc in sfile:
+
+ # phase 1 - identify all the Jenkins jobs
+ # each doc can have multiple job groups (usually version-specific)
+ for jobgroup in sdoc["jenkins_jobs"]:
+
+ api_headers = []
+
+ if "credentials" in jobgroup:
+ if jobgroup["credentials"] in credentials:
+ api_headers = [
+ basic_auth_header(
+ credentials[jobgroup["credentials"]]["jenkins_api_user"],
+ credentials[jobgroup["credentials"]]["jenkins_api_token"],
+ )
+ ]
+ else:
+ logger.error(
+ "Credentials for '%s' not supplied", jobgroup["credentials"]
+ )
+ sys.exit(1)
+
+ # each job group can have multiple jobs
+ for job in jobgroup["jobs"]:
+
+ # only redownload jobs that haven't been downloaded before
+ if job["name"] not in builds:
+
+ # get list of all Job ID's
+ build_ids = get_builds_for_job(
+ args.jobs_dir,
+ args.local,
+ jobgroup["jenkins_url"],
+ job["name"],
+ api_headers,
+ )
+
+ # get build info - either download or load from disk
+ builds[job["name"]] = get_all_jenkins_builds(
+ args.jobs_dir,
+ jobgroup["jenkins_url"],
+ job["name"],
+ build_ids,
+ api_headers,
+ )
+
+ # phase 2 - create per-product (document) lists of build extracted data
+ save_product_builds(sdoc, args.product_dir, builds)