blob: 8d84cfa45bc65c7f232b703e120b992584521ed5 [file] [log] [blame]
#!/usr/bin/env python3
# SPDX-FileCopyrightText: © 2020 Open Networking Foundation <support@opennetworking.org>
# SPDX-License-Identifier: Apache-2.0
from __future__ import absolute_import
import argparse
import base64
import json
import logging
import os
import re
import sys
import urllib.request
import yaml
from jsonpath_ng.ext import parse as jpparse
# create shared logger
logging.basicConfig()
logger = logging.getLogger("sjsgc")
# global dict of jsonpath expressions -> compiled jsonpath parsers, as
# reparsing expressions in each loop results in 100x longer execution time
gjpaths = {}
# credentials global
def parse_collector_args():
"""
parse CLI arguments
"""
parser = argparse.ArgumentParser(description="Jenkins job results collector")
# Positional args
parser.add_argument(
"scrape_file",
default="scrape.yaml",
type=argparse.FileType("r"),
help="YAML file describing Jenkins job and data to scrape",
)
# Flags
parser.add_argument(
"--credentials",
type=argparse.FileType("r"),
help="Credentials to use for private jenkins jobs",
)
parser.add_argument(
"--local", action="store_true", help="Prefer local copies of build lists"
)
parser.add_argument(
"--product_dir", default="products", help="Directory to save per-product output"
)
parser.add_argument(
"--jobs_dir", default="jobs", help="Directory to save raw Jenkins job output"
)
parser.add_argument(
"--debug", action="store_true", help="Print additional debugging information"
)
return parser.parse_args()
def jenkins_job_list_url(server_url, job_name):
"""
create a Jenkins JSON API URL for a job (list of builds)
"""
url = "%s/job/%s/api/json" % (server_url, job_name)
return url
def jenkins_job_build_url(server_url, job_name, build_number):
"""
create a Jenkins JSON API URL for a specific build of a job
"""
url = "%s/job/%s/%d/api/json" % (server_url, job_name, build_number)
return url
def basic_auth_header(username, password):
"""
returns a tuple containing a http basic auth header
"""
creds_str = "%s:%s" % (username, password)
creds_b64 = base64.standard_b64encode(creds_str.encode("utf-8"))
return ("Authorization", "Basic %s" % creds_b64.decode("utf-8"))
def jenkins_api_get(url, headers=[]):
"""
Get data from Jenkins JSON API endpoint, return data as a dict
"""
request = urllib.request.Request(url)
# add headers tuples
for header in headers:
request.add_header(*header)
try:
response = urllib.request.urlopen(request)
except urllib.error.HTTPError:
logger.exception("Server encountered an HTTPError at URL: '%s'", url)
except urllib.error.URLError:
logger.exception("An URLError occurred at URL: '%s'", url)
else:
# docs: https://docs.python.org/3/library/json.html
jsondata = response.read()
logger.debug("API response: %s", jsondata)
try:
data = json.loads(jsondata)
except json.decoder.JSONDecodeError:
logger.exception("Unable to decode JSON")
else:
logger.debug("JSON decoded: %s", data)
return data
def json_file_load(path):
"""
Get data from local file, return data as a dict
"""
with open(path) as jf:
try:
data = json.loads(jf.read())
except json.decoder.JSONDecodeError:
logger.exception("Unable to decode JSON from file: '%s'", path)
return data
def json_file_dump(path, data):
"""
Write JSON file out to a path, creating directories in path as needed
"""
logger.debug("writing JSON file: %s", path)
# create directory if it doesn't already exist
parent_dir = os.path.dirname(path)
os.makedirs(parent_dir, exist_ok=True)
# write file, pretty printed
with open(path, "w") as jf:
json.dump(data, jf, indent=2)
def parse_scrape_file(scrape_file):
"""
Load and check the YAML scrape file, returning a list one or more documents
"""
yout = list(yaml.safe_load_all(scrape_file)) # safe_load_all returns a generator
logger.debug("YAML decoded: %s", yout)
def check_required_keys(to_check, req_keys):
"""
check that all required keys are found in the dict to check
"""
for rk in req_keys:
if rk not in to_check:
logger.error("Required key '%s' not found in: '%s'", rk, to_check)
sys.exit(1)
# check that required keys exist in each YAML document
for ydoc in yout:
check_required_keys(ydoc, ["jenkins_jobs", "product_name", "onf_project"])
for group in ydoc["jenkins_jobs"]:
check_required_keys(group, ["group", "jenkins_url", "jobs"])
for job in group["jobs"]:
check_required_keys(job, ["name", "extract"])
return yout
def jsonpath_extract(json_in, extract_list):
"""
Extract data from json using list of jsonpath expressions
"""
ret = {}
for name, jpath in extract_list.items():
# parsing jsonpath is expensive, store in global of parsed
# jsonpath expressions
if jpath not in gjpaths:
gjpaths[jpath] = jpparse(jpath)
jexpr = gjpaths[jpath]
matches = [match.value for match in jexpr.find(json_in)]
# If only a single match, unwrap from list
if len(matches) == 1:
ret[name] = matches[0]
else:
ret[name] = matches
logger.debug("extracted data: %s", ret)
return ret
def get_builds_for_job(jobs_dir, local, jenkins_url, job_name, headers=[]):
"""
Download list of builds from a Jenkins job, return list of build ids
"""
# where to store jenkins JSON output with builds list
jbuildlist = "%s/%s/%s/0_list.json" % (jobs_dir, clean_url(jenkins_url), job_name)
if os.path.isfile(jbuildlist) and local:
# if already downlaoded and want to use the local copy, load it
jl = json_file_load(jbuildlist)
else:
# if not, query jenkins for the list of job builds
jlu = jenkins_job_list_url(jenkins_url, job_name)
jl = jenkins_api_get(jlu, headers)
# save to disk
json_file_dump(jbuildlist, jl)
# JSONPath for list of builds in the job
jexpr = jpparse("builds[*].number")
# get a list of builds
buildlist = [build.value for build in jexpr.find(jl)]
return buildlist
def get_jenkins_build(jobs_dir, jenkins_url, job_name, build_id, headers=[]):
"""
Download a single build and store it on disk, if job has completed
"""
# path to store a copy of the JSON recieved by Jenkins
jjson = "%s/%s/%s/%d_build.json" % (
jobs_dir,
clean_url(jenkins_url),
job_name,
build_id,
)
if os.path.isfile(jjson):
# if have already run and local copy exists, read/return local copy
braw = json_file_load(jjson)
else:
# make an API call to get the JSON, store locally
burl = jenkins_job_build_url(jenkins_url, job_name, build_id)
braw = jenkins_api_get(burl, headers)
# if build is still going on the result field is null, so don't return
# build or save a copy, as build status is not final.
if not braw["result"]:
return None
# save to disk
json_file_dump(jjson, braw)
return braw
def get_all_jenkins_builds(jobs_dir, jenkins_url, job_name, build_ids, headers=[]):
"""
Get a list of all jenkins build data, for completed builds
"""
builds_list = []
# download build data for all builds
for build_id in build_ids:
build = get_jenkins_build(
args.jobs_dir, jobgroup["jenkins_url"], job["name"], build_id, headers,
)
# may return None if build is in progress
if build:
builds_list.append(build)
return builds_list
def clean_name(name):
"""
Clean up a name string. Currently only replaces spaces with underscores
"""
return name.replace(" ", "_")
def clean_url(url):
"""
remove prefix and any non-path friendly characters from URL
"""
return re.sub(r"\W", "_", re.sub(r"\w+://", "", url))
def save_product_builds(product_doc, product_dir, builds):
"""
save the product-specific build information, if it's applicable to this
product based on the filters
"""
# duplicate the scrape doc into final product data
product_data = dict(product_doc)
# used to hold groups of jobs
groups = {}
# each doc can have multiple job groups (usually version-specific)
for jobgroup in product_doc["jenkins_jobs"]:
groups[jobgroup["group"]] = {}
# each job group can have multiple jobs
for job in jobgroup["jobs"]:
pbuilds = []
# get the build data for the job
for build in builds[job["name"]]:
jpedata = jsonpath_extract(build, job["extract"])
# filter builds
save = True
if "filter" in job:
for k, v in job["filter"].items():
# if data doesn't match the filter value given, don't save it
if jpedata[k] != v:
save = False
if save:
pbuilds.append(jpedata)
# allow job name to be overridden, for private jobs
if "name_override" in job:
groups[jobgroup["group"]][job["name_override"]] = pbuilds
else:
groups[jobgroup["group"]][job["name"]] = pbuilds
product_data["groups"] = groups
product_filename = "%s/%s/%s.json" % (
product_dir,
product_doc["onf_project"],
clean_name(product_doc["product_name"]),
)
json_file_dump(product_filename, product_data)
# main function that calls other functions
if __name__ == "__main__":
args = parse_collector_args()
if not os.path.isdir(args.product_dir):
logger.error("Output directory is not a directory: '%s'", args.product_dir)
sys.exit(1)
# only print log messages if debugging
if args.debug:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.CRITICAL)
# read in credentials file if option if argument passed
credentials = {}
if args.credentials:
cred_file = yaml.safe_load(args.credentials)
credentials = cred_file["credentials"]
# read in the Scrape File
sfile = parse_scrape_file(args.scrape_file)
# dict of job name -> build data
builds = {}
# Scrape File YAML may contain multiple documents
for sdoc in sfile:
# phase 1 - identify all the Jenkins jobs
# each doc can have multiple job groups (usually version-specific)
for jobgroup in sdoc["jenkins_jobs"]:
api_headers = []
if "credentials" in jobgroup:
if jobgroup["credentials"] in credentials:
api_headers = [
basic_auth_header(
credentials[jobgroup["credentials"]]["jenkins_api_user"],
credentials[jobgroup["credentials"]]["jenkins_api_token"],
)
]
else:
logger.error(
"Credentials for '%s' not supplied", jobgroup["credentials"]
)
sys.exit(1)
# each job group can have multiple jobs
for job in jobgroup["jobs"]:
# only redownload jobs that haven't been downloaded before
if job["name"] not in builds:
# get list of all Job ID's
build_ids = get_builds_for_job(
args.jobs_dir,
args.local,
jobgroup["jenkins_url"],
job["name"],
api_headers,
)
# get build info - either download or load from disk
builds[job["name"]] = get_all_jenkins_builds(
args.jobs_dir,
jobgroup["jenkins_url"],
job["name"],
build_ids,
api_headers,
)
# phase 2 - create per-product (document) lists of build extracted data
save_product_builds(sdoc, args.product_dir, builds)