| #!/usr/bin/env python3 |
| |
| # SPDX-FileCopyrightText: © 2020 Open Networking Foundation <support@opennetworking.org> |
| # SPDX-License-Identifier: Apache-2.0 |
| |
| from __future__ import absolute_import |
| |
| import argparse |
| import base64 |
| import json |
| import logging |
| import os |
| import re |
| import sys |
| import urllib.request |
| import yaml |
| |
| from jsonpath_ng.ext import parse as jpparse |
| |
| # create shared logger |
| logging.basicConfig() |
| logger = logging.getLogger("sjsgc") |
| |
| # global dict of jsonpath expressions -> compiled jsonpath parsers, as |
| # reparsing expressions in each loop results in 100x longer execution time |
| gjpaths = {} |
| |
| # credentials global |
| |
| |
| def parse_collector_args(): |
| """ |
| parse CLI arguments |
| """ |
| |
| parser = argparse.ArgumentParser(description="Jenkins job results collector") |
| |
| # Positional args |
| parser.add_argument( |
| "scrape_file", |
| default="scrape.yaml", |
| type=argparse.FileType("r"), |
| help="YAML file describing Jenkins job and data to scrape", |
| ) |
| |
| # Flags |
| parser.add_argument( |
| "--credentials", |
| type=argparse.FileType("r"), |
| help="Credentials to use for private jenkins jobs", |
| ) |
| |
| parser.add_argument( |
| "--local", action="store_true", help="Prefer local copies of build lists" |
| ) |
| |
| parser.add_argument( |
| "--product_dir", default="products", help="Directory to save per-product output" |
| ) |
| |
| parser.add_argument( |
| "--jobs_dir", default="jobs", help="Directory to save raw Jenkins job output" |
| ) |
| |
| parser.add_argument( |
| "--debug", action="store_true", help="Print additional debugging information" |
| ) |
| |
| return parser.parse_args() |
| |
| |
| def jenkins_job_list_url(server_url, job_name): |
| """ |
| create a Jenkins JSON API URL for a job (list of builds) |
| """ |
| |
| url = "%s/job/%s/api/json" % (server_url, job_name) |
| return url |
| |
| |
| def jenkins_job_build_url(server_url, job_name, build_number): |
| """ |
| create a Jenkins JSON API URL for a specific build of a job |
| """ |
| |
| url = "%s/job/%s/%d/api/json" % (server_url, job_name, build_number) |
| return url |
| |
| |
| def basic_auth_header(username, password): |
| """ |
| returns a tuple containing a http basic auth header |
| """ |
| creds_str = "%s:%s" % (username, password) |
| creds_b64 = base64.standard_b64encode(creds_str.encode("utf-8")) |
| |
| return ("Authorization", "Basic %s" % creds_b64.decode("utf-8")) |
| |
| |
| def jenkins_api_get(url, headers=[]): |
| """ |
| Get data from Jenkins JSON API endpoint, return data as a dict |
| """ |
| |
| request = urllib.request.Request(url) |
| |
| # add headers tuples |
| for header in headers: |
| request.add_header(*header) |
| |
| try: |
| response = urllib.request.urlopen(request) |
| except urllib.error.HTTPError: |
| logger.exception("Server encountered an HTTPError at URL: '%s'", url) |
| except urllib.error.URLError: |
| logger.exception("An URLError occurred at URL: '%s'", url) |
| else: |
| # docs: https://docs.python.org/3/library/json.html |
| jsondata = response.read() |
| logger.debug("API response: %s", jsondata) |
| |
| try: |
| data = json.loads(jsondata) |
| except json.decoder.JSONDecodeError: |
| logger.exception("Unable to decode JSON") |
| else: |
| logger.debug("JSON decoded: %s", data) |
| |
| return data |
| |
| |
| def json_file_load(path): |
| """ |
| Get data from local file, return data as a dict |
| """ |
| |
| with open(path) as jf: |
| try: |
| data = json.loads(jf.read()) |
| except json.decoder.JSONDecodeError: |
| logger.exception("Unable to decode JSON from file: '%s'", path) |
| |
| return data |
| |
| |
| def json_file_dump(path, data): |
| """ |
| Write JSON file out to a path, creating directories in path as needed |
| """ |
| logger.debug("writing JSON file: %s", path) |
| |
| # create directory if it doesn't already exist |
| parent_dir = os.path.dirname(path) |
| os.makedirs(parent_dir, exist_ok=True) |
| |
| # write file, pretty printed |
| with open(path, "w") as jf: |
| json.dump(data, jf, indent=2) |
| |
| |
| def parse_scrape_file(scrape_file): |
| """ |
| Load and check the YAML scrape file, returning a list one or more documents |
| """ |
| |
| yout = list(yaml.safe_load_all(scrape_file)) # safe_load_all returns a generator |
| logger.debug("YAML decoded: %s", yout) |
| |
| def check_required_keys(to_check, req_keys): |
| """ |
| check that all required keys are found in the dict to check |
| """ |
| for rk in req_keys: |
| if rk not in to_check: |
| logger.error("Required key '%s' not found in: '%s'", rk, to_check) |
| sys.exit(1) |
| |
| # check that required keys exist in each YAML document |
| for ydoc in yout: |
| check_required_keys(ydoc, ["jenkins_jobs", "product_name", "onf_project"]) |
| |
| for group in ydoc["jenkins_jobs"]: |
| check_required_keys(group, ["group", "jenkins_url", "jobs"]) |
| |
| for job in group["jobs"]: |
| check_required_keys(job, ["name", "extract"]) |
| |
| return yout |
| |
| |
| def jsonpath_extract(json_in, extract_list): |
| """ |
| Extract data from json using list of jsonpath expressions |
| """ |
| |
| ret = {} |
| |
| for name, jpath in extract_list.items(): |
| |
| # parsing jsonpath is expensive, store in global of parsed |
| # jsonpath expressions |
| if jpath not in gjpaths: |
| gjpaths[jpath] = jpparse(jpath) |
| |
| jexpr = gjpaths[jpath] |
| |
| matches = [match.value for match in jexpr.find(json_in)] |
| |
| # If only a single match, unwrap from list |
| if len(matches) == 1: |
| ret[name] = matches[0] |
| else: |
| ret[name] = matches |
| |
| logger.debug("extracted data: %s", ret) |
| |
| return ret |
| |
| |
| def get_builds_for_job(jobs_dir, local, jenkins_url, job_name, headers=[]): |
| """ |
| Download list of builds from a Jenkins job, return list of build ids |
| """ |
| |
| # where to store jenkins JSON output with builds list |
| jbuildlist = "%s/%s/%s/0_list.json" % (jobs_dir, clean_url(jenkins_url), job_name) |
| |
| if os.path.isfile(jbuildlist) and local: |
| # if already downlaoded and want to use the local copy, load it |
| jl = json_file_load(jbuildlist) |
| else: |
| # if not, query jenkins for the list of job builds |
| jlu = jenkins_job_list_url(jenkins_url, job_name) |
| jl = jenkins_api_get(jlu, headers) |
| |
| # save to disk |
| json_file_dump(jbuildlist, jl) |
| |
| # JSONPath for list of builds in the job |
| jexpr = jpparse("builds[*].number") |
| |
| # get a list of builds |
| buildlist = [build.value for build in jexpr.find(jl)] |
| |
| return buildlist |
| |
| |
| def get_jenkins_build(jobs_dir, jenkins_url, job_name, build_id, headers=[]): |
| """ |
| Download a single build and store it on disk, if job has completed |
| """ |
| |
| # path to store a copy of the JSON recieved by Jenkins |
| jjson = "%s/%s/%s/%d_build.json" % ( |
| jobs_dir, |
| clean_url(jenkins_url), |
| job_name, |
| build_id, |
| ) |
| |
| if os.path.isfile(jjson): |
| # if have already run and local copy exists, read/return local copy |
| braw = json_file_load(jjson) |
| else: |
| # make an API call to get the JSON, store locally |
| burl = jenkins_job_build_url(jenkins_url, job_name, build_id) |
| braw = jenkins_api_get(burl, headers) |
| |
| # if build is still going on the result field is null, so don't return |
| # build or save a copy, as build status is not final. |
| if not braw["result"]: |
| return None |
| |
| # save to disk |
| json_file_dump(jjson, braw) |
| |
| return braw |
| |
| |
| def get_all_jenkins_builds(jobs_dir, jenkins_url, job_name, build_ids, headers=[]): |
| """ |
| Get a list of all jenkins build data, for completed builds |
| """ |
| |
| builds_list = [] |
| |
| # download build data for all builds |
| for build_id in build_ids: |
| |
| build = get_jenkins_build( |
| args.jobs_dir, jobgroup["jenkins_url"], job["name"], build_id, headers, |
| ) |
| |
| # may return None if build is in progress |
| if build: |
| builds_list.append(build) |
| |
| return builds_list |
| |
| |
| def clean_name(name): |
| """ |
| Clean up a name string. Currently only replaces spaces with underscores |
| """ |
| return name.replace(" ", "_") |
| |
| |
| def clean_url(url): |
| """ |
| remove prefix and any non-path friendly characters from URL |
| """ |
| return re.sub(r"\W", "_", re.sub(r"\w+://", "", url)) |
| |
| |
| def save_product_builds(product_doc, product_dir, builds): |
| """ |
| save the product-specific build information, if it's applicable to this |
| product based on the filters |
| """ |
| |
| # duplicate the scrape doc into final product data |
| product_data = dict(product_doc) |
| |
| # used to hold groups of jobs |
| groups = {} |
| |
| # each doc can have multiple job groups (usually version-specific) |
| for jobgroup in product_doc["jenkins_jobs"]: |
| |
| groups[jobgroup["group"]] = {} |
| |
| # each job group can have multiple jobs |
| for job in jobgroup["jobs"]: |
| |
| pbuilds = [] |
| |
| # get the build data for the job |
| for build in builds[job["name"]]: |
| |
| jpedata = jsonpath_extract(build, job["extract"]) |
| |
| # filter builds |
| save = True |
| if "filter" in job: |
| for k, v in job["filter"].items(): |
| # if data doesn't match the filter value given, don't save it |
| if jpedata[k] != v: |
| save = False |
| |
| if save: |
| pbuilds.append(jpedata) |
| |
| # allow job name to be overridden, for private jobs |
| if "name_override" in job: |
| groups[jobgroup["group"]][job["name_override"]] = pbuilds |
| else: |
| groups[jobgroup["group"]][job["name"]] = pbuilds |
| |
| product_data["groups"] = groups |
| |
| product_filename = "%s/%s/%s.json" % ( |
| product_dir, |
| product_doc["onf_project"], |
| clean_name(product_doc["product_name"]), |
| ) |
| |
| json_file_dump(product_filename, product_data) |
| |
| |
| # main function that calls other functions |
| if __name__ == "__main__": |
| |
| args = parse_collector_args() |
| |
| if not os.path.isdir(args.product_dir): |
| logger.error("Output directory is not a directory: '%s'", args.product_dir) |
| sys.exit(1) |
| |
| # only print log messages if debugging |
| if args.debug: |
| logger.setLevel(logging.DEBUG) |
| else: |
| logger.setLevel(logging.CRITICAL) |
| |
| # read in credentials file if option if argument passed |
| credentials = {} |
| if args.credentials: |
| cred_file = yaml.safe_load(args.credentials) |
| credentials = cred_file["credentials"] |
| |
| # read in the Scrape File |
| sfile = parse_scrape_file(args.scrape_file) |
| |
| # dict of job name -> build data |
| builds = {} |
| |
| # Scrape File YAML may contain multiple documents |
| for sdoc in sfile: |
| |
| # phase 1 - identify all the Jenkins jobs |
| # each doc can have multiple job groups (usually version-specific) |
| for jobgroup in sdoc["jenkins_jobs"]: |
| |
| api_headers = [] |
| |
| if "credentials" in jobgroup: |
| if jobgroup["credentials"] in credentials: |
| api_headers = [ |
| basic_auth_header( |
| credentials[jobgroup["credentials"]]["jenkins_api_user"], |
| credentials[jobgroup["credentials"]]["jenkins_api_token"], |
| ) |
| ] |
| else: |
| logger.error( |
| "Credentials for '%s' not supplied", jobgroup["credentials"] |
| ) |
| sys.exit(1) |
| |
| # each job group can have multiple jobs |
| for job in jobgroup["jobs"]: |
| |
| # only redownload jobs that haven't been downloaded before |
| if job["name"] not in builds: |
| |
| # get list of all Job ID's |
| build_ids = get_builds_for_job( |
| args.jobs_dir, |
| args.local, |
| jobgroup["jenkins_url"], |
| job["name"], |
| api_headers, |
| ) |
| |
| # get build info - either download or load from disk |
| builds[job["name"]] = get_all_jenkins_builds( |
| args.jobs_dir, |
| jobgroup["jenkins_url"], |
| job["name"], |
| build_ids, |
| api_headers, |
| ) |
| |
| # phase 2 - create per-product (document) lists of build extracted data |
| save_product_builds(sdoc, args.product_dir, builds) |