buildcollector.py - sjsg - Gitiles

 #!/usr/bin/env python3

 # SPDX-FileCopyrightText: © 2020 Open Networking Foundation <support@opennetworking.org>
 # SPDX-License-Identifier: Apache-2.0

 from __future__ import absolute_import

 import argparse
 import base64
 import json
 import logging
 import os
 import re
 import sys
 import urllib.request
 import yaml

 from jsonpath_ng.ext import parse as jpparse

 # create shared logger
 logging.basicConfig()
 logger = logging.getLogger("sjsgc")

 # global dict of jsonpath expressions -> compiled jsonpath parsers, as
 # reparsing expressions in each loop results in 100x longer execution time
 gjpaths = {}

 # credentials global


 def parse_collector_args():
     """
     parse CLI arguments
     """

     parser = argparse.ArgumentParser(description="Jenkins job results collector")

     # Positional args
     parser.add_argument(
         "scrape_file",
         default="scrape.yaml",
         type=argparse.FileType("r"),
         help="YAML file describing Jenkins job and data to scrape",
     )

     # Flags
     parser.add_argument(
         "--credentials",
         type=argparse.FileType("r"),
         help="Credentials to use for private jenkins jobs",
     )

     parser.add_argument(
         "--local", action="store_true", help="Prefer local copies of build lists"
     )

     parser.add_argument(
         "--product_dir", default="products", help="Directory to save per-product output"
     )

     parser.add_argument(
         "--jobs_dir", default="jobs", help="Directory to save raw Jenkins job output"
     )

     parser.add_argument(
         "--debug", action="store_true", help="Print additional debugging information"
     )

     return parser.parse_args()


 def jenkins_job_list_url(server_url, job_name):
     """
     create a Jenkins JSON API URL for a job (list of builds)
     """

     url = "%s/job/%s/api/json" % (server_url, job_name)
     return url


 def jenkins_job_build_url(server_url, job_name, build_number):
     """
     create a Jenkins JSON API URL for a specific build of a job
     """

     url = "%s/job/%s/%d/api/json" % (server_url, job_name, build_number)
     return url


 def basic_auth_header(username, password):
     """
     returns a tuple containing a http basic auth header
     """
     creds_str = "%s:%s" % (username, password)
     creds_b64 = base64.standard_b64encode(creds_str.encode("utf-8"))

     return ("Authorization", "Basic %s" % creds_b64.decode("utf-8"))


 def jenkins_api_get(url, headers=[]):
     """
     Get data from Jenkins JSON API endpoint, return data as a dict
     """

     request = urllib.request.Request(url)

     # add headers tuples
     for header in headers:
         request.add_header(*header)

     try:
         response = urllib.request.urlopen(request)
     except urllib.error.HTTPError:
         logger.exception("Server encountered an HTTPError at URL: '%s'", url)
     except urllib.error.URLError:
         logger.exception("An URLError occurred at URL: '%s'", url)
     else:
         # docs: https://docs.python.org/3/library/json.html
         jsondata = response.read()
         logger.debug("API response: %s", jsondata)

     try:
         data = json.loads(jsondata)
     except json.decoder.JSONDecodeError:
         logger.exception("Unable to decode JSON")
     else:
         logger.debug("JSON decoded: %s", data)

     return data


 def json_file_load(path):
     """
     Get data from local file, return data as a dict
     """

     with open(path) as jf:
         try:
             data = json.loads(jf.read())
         except json.decoder.JSONDecodeError:
             logger.exception("Unable to decode JSON from file: '%s'", path)

     return data


 def json_file_dump(path, data):
     """
     Write JSON file out to a path, creating directories in path as needed
     """
     logger.debug("writing JSON file: %s", path)

     # create directory if it doesn't already exist
     parent_dir = os.path.dirname(path)
     os.makedirs(parent_dir, exist_ok=True)

     # write file, pretty printed
     with open(path, "w") as jf:
         json.dump(data, jf, indent=2)


 def parse_scrape_file(scrape_file):
     """
     Load and check the YAML scrape file, returning a list one or more documents
     """

     yout = list(yaml.safe_load_all(scrape_file))  # safe_load_all returns a generator
     logger.debug("YAML decoded: %s", yout)

     def check_required_keys(to_check, req_keys):
         """
         check that all required keys are found in the dict to check
         """
         for rk in req_keys:
             if rk not in to_check:
                 logger.error("Required key '%s' not found in: '%s'", rk, to_check)
                 sys.exit(1)

     # check that required keys exist in each YAML document
     for ydoc in yout:
         check_required_keys(ydoc, ["jenkins_jobs", "product_name", "onf_project"])

         for group in ydoc["jenkins_jobs"]:
             check_required_keys(group, ["group", "jenkins_url", "jobs"])

             for job in group["jobs"]:
                 check_required_keys(job, ["name", "extract"])

     return yout


 def jsonpath_extract(json_in, extract_list):
     """
     Extract data from json using list of jsonpath expressions
     """

     ret = {}

     for name, jpath in extract_list.items():

         # parsing jsonpath is expensive, store in global of parsed
         # jsonpath expressions
         if jpath not in gjpaths:
             gjpaths[jpath] = jpparse(jpath)

         jexpr = gjpaths[jpath]

         matches = [match.value for match in jexpr.find(json_in)]

         # If only a single match, unwrap from list
         if len(matches) == 1:
             ret[name] = matches[0]
         else:
             ret[name] = matches

     logger.debug("extracted data: %s", ret)

     return ret


 def get_builds_for_job(jobs_dir, local, jenkins_url, job_name, headers=[]):
     """
     Download list of builds from a Jenkins job, return list of build ids
     """

     # where to store jenkins JSON output with builds list
     jbuildlist = "%s/%s/%s/0_list.json" % (jobs_dir, clean_url(jenkins_url), job_name)

     if os.path.isfile(jbuildlist) and local:
         # if already downlaoded and want to use the local copy, load it
         jl = json_file_load(jbuildlist)
     else:
         # if not, query jenkins for the list of job builds
         jlu = jenkins_job_list_url(jenkins_url, job_name)
         jl = jenkins_api_get(jlu, headers)

         # save to disk
         json_file_dump(jbuildlist, jl)

     # JSONPath for list of builds in the job
     jexpr = jpparse("builds[*].number")

     # get a list of builds
     buildlist = [build.value for build in jexpr.find(jl)]

     return buildlist


 def get_jenkins_build(jobs_dir, jenkins_url, job_name, build_id, headers=[]):
     """
     Download a single build and store it on disk, if job has completed
     """

     # path to store a copy of the JSON recieved by Jenkins
     jjson = "%s/%s/%s/%d_build.json" % (
         jobs_dir,
         clean_url(jenkins_url),
         job_name,
         build_id,
     )

     if os.path.isfile(jjson):
         # if have already run and local copy exists, read/return local copy
         braw = json_file_load(jjson)
     else:
         # make an API call to get the JSON, store locally
         burl = jenkins_job_build_url(jenkins_url, job_name, build_id)
         braw = jenkins_api_get(burl, headers)

         # if build is still going on the result field is null, so don't return
         # build or save a copy, as build status is not final.
         if not braw["result"]:
             return None

         # save to disk
         json_file_dump(jjson, braw)

     return braw


 def get_all_jenkins_builds(jobs_dir, jenkins_url, job_name, build_ids, headers=[]):
     """
     Get a list of all jenkins build data, for completed builds
     """

     builds_list = []

     # download build data for all builds
     for build_id in build_ids:

         build = get_jenkins_build(
             args.jobs_dir, jobgroup["jenkins_url"], job["name"], build_id, headers,
         )

         # may return None if build is in progress
         if build:
             builds_list.append(build)

     return builds_list


 def clean_name(name):
     """
     Clean up a name string. Currently only replaces spaces with underscores
     """
     return name.replace(" ", "_")


 def clean_url(url):
     """
     remove prefix and any non-path friendly characters from URL
     """
     return re.sub(r"\W", "_", re.sub(r"\w+://", "", url))


 def save_product_builds(product_doc, product_dir, builds):
     """
     save the product-specific build information, if it's applicable to this
     product based on the filters
     """

     # duplicate the scrape doc into final product data
     product_data = dict(product_doc)

     # used to hold groups of jobs
     groups = {}

     # each doc can have multiple job groups (usually version-specific)
     for jobgroup in product_doc["jenkins_jobs"]:

         groups[jobgroup["group"]] = {}

         # each job group can have multiple jobs
         for job in jobgroup["jobs"]:

             pbuilds = []

             # get the build data for the job
             for build in builds[job["name"]]:

                 jpedata = jsonpath_extract(build, job["extract"])

                 # filter builds
                 save = True
                 if "filter" in job:
                     for k, v in job["filter"].items():
                         # if data doesn't match the filter value given, don't save it
                         if jpedata[k] != v:
                             save = False

                 if save:
                     pbuilds.append(jpedata)

             # allow job name to be overridden, for private jobs
             if "name_override" in job:
                 groups[jobgroup["group"]][job["name_override"]] = pbuilds
             else:
                 groups[jobgroup["group"]][job["name"]] = pbuilds

     product_data["groups"] = groups

     product_filename = "%s/%s/%s.json" % (
         product_dir,
         product_doc["onf_project"],
         clean_name(product_doc["product_name"]),
     )

     json_file_dump(product_filename, product_data)


 # main function that calls other functions
 if __name__ == "__main__":

     args = parse_collector_args()

     if not os.path.isdir(args.product_dir):
         logger.error("Output directory is not a directory: '%s'", args.product_dir)
         sys.exit(1)

     # only print log messages if debugging
     if args.debug:
         logger.setLevel(logging.DEBUG)
     else:
         logger.setLevel(logging.CRITICAL)

     # read in credentials file if option if argument passed
     credentials = {}
     if args.credentials:
         cred_file = yaml.safe_load(args.credentials)
         credentials = cred_file["credentials"]

     # read in the Scrape File
     sfile = parse_scrape_file(args.scrape_file)

     # dict of job name -> build data
     builds = {}

     # Scrape File YAML may contain multiple documents
     for sdoc in sfile:

         # phase 1 - identify all the Jenkins jobs
         # each doc can have multiple job groups (usually version-specific)
         for jobgroup in sdoc["jenkins_jobs"]:

             api_headers = []

             if "credentials" in jobgroup:
                 if jobgroup["credentials"] in credentials:
                     api_headers = [
                         basic_auth_header(
                             credentials[jobgroup["credentials"]]["jenkins_api_user"],
                             credentials[jobgroup["credentials"]]["jenkins_api_token"],
                         )
                     ]
                 else:
                     logger.error(
                         "Credentials for '%s' not supplied", jobgroup["credentials"]
                     )
                     sys.exit(1)

             # each job group can have multiple jobs
             for job in jobgroup["jobs"]:

                 # only redownload jobs that haven't been downloaded before
                 if job["name"] not in builds:

                     # get list of all Job ID's
                     build_ids = get_builds_for_job(
                         args.jobs_dir,
                         args.local,
                         jobgroup["jenkins_url"],
                         job["name"],
                         api_headers,
                     )

                     # get build info - either download or load from disk
                     builds[job["name"]] = get_all_jenkins_builds(
                         args.jobs_dir,
                         jobgroup["jenkins_url"],
                         job["name"],
                         build_ids,
                         api_headers,
                     )

         # phase 2 - create per-product (document) lists of build extracted data
         save_product_builds(sdoc, args.product_dir, builds)
	#!/usr/bin/env python3

	# SPDX-FileCopyrightText: © 2020 Open Networking Foundation <support@opennetworking.org>
	# SPDX-License-Identifier: Apache-2.0

	from __future__ import absolute_import

	import argparse
	import base64
	import json
	import logging
	import os
	import re
	import sys
	import urllib.request
	import yaml

	from jsonpath_ng.ext import parse as jpparse

	# create shared logger
	logging.basicConfig()
	logger = logging.getLogger("sjsgc")

	# global dict of jsonpath expressions -> compiled jsonpath parsers, as
	# reparsing expressions in each loop results in 100x longer execution time
	gjpaths = {}

	# credentials global


	def parse_collector_args():
	"""
	parse CLI arguments
	"""

	parser = argparse.ArgumentParser(description="Jenkins job results collector")

	# Positional args
	parser.add_argument(
	"scrape_file",
	default="scrape.yaml",
	type=argparse.FileType("r"),
	help="YAML file describing Jenkins job and data to scrape",
	)

	# Flags
	parser.add_argument(
	"--credentials",
	type=argparse.FileType("r"),
	help="Credentials to use for private jenkins jobs",
	)

	parser.add_argument(
	"--local", action="store_true", help="Prefer local copies of build lists"
	)

	parser.add_argument(
	"--product_dir", default="products", help="Directory to save per-product output"
	)

	parser.add_argument(
	"--jobs_dir", default="jobs", help="Directory to save raw Jenkins job output"
	)

	parser.add_argument(
	"--debug", action="store_true", help="Print additional debugging information"
	)

	return parser.parse_args()


	def jenkins_job_list_url(server_url, job_name):
	"""
	create a Jenkins JSON API URL for a job (list of builds)
	"""

	url = "%s/job/%s/api/json" % (server_url, job_name)
	return url


	def jenkins_job_build_url(server_url, job_name, build_number):
	"""
	create a Jenkins JSON API URL for a specific build of a job
	"""

	url = "%s/job/%s/%d/api/json" % (server_url, job_name, build_number)
	return url


	def basic_auth_header(username, password):
	"""
	returns a tuple containing a http basic auth header
	"""
	creds_str = "%s:%s" % (username, password)
	creds_b64 = base64.standard_b64encode(creds_str.encode("utf-8"))

	return ("Authorization", "Basic %s" % creds_b64.decode("utf-8"))


	def jenkins_api_get(url, headers=[]):
	"""
	Get data from Jenkins JSON API endpoint, return data as a dict
	"""

	request = urllib.request.Request(url)

	# add headers tuples
	for header in headers:
	request.add_header(*header)

	try:
	response = urllib.request.urlopen(request)
	except urllib.error.HTTPError:
	logger.exception("Server encountered an HTTPError at URL: '%s'", url)
	except urllib.error.URLError:
	logger.exception("An URLError occurred at URL: '%s'", url)
	else:
	# docs: https://docs.python.org/3/library/json.html
	jsondata = response.read()
	logger.debug("API response: %s", jsondata)

	try:
	data = json.loads(jsondata)
	except json.decoder.JSONDecodeError:
	logger.exception("Unable to decode JSON")
	else:
	logger.debug("JSON decoded: %s", data)

	return data


	def json_file_load(path):
	"""
	Get data from local file, return data as a dict
	"""

	with open(path) as jf:
	try:
	data = json.loads(jf.read())
	except json.decoder.JSONDecodeError:
	logger.exception("Unable to decode JSON from file: '%s'", path)

	return data


	def json_file_dump(path, data):
	"""
	Write JSON file out to a path, creating directories in path as needed
	"""
	logger.debug("writing JSON file: %s", path)

	# create directory if it doesn't already exist
	parent_dir = os.path.dirname(path)
	os.makedirs(parent_dir, exist_ok=True)

	# write file, pretty printed
	with open(path, "w") as jf:
	json.dump(data, jf, indent=2)


	def parse_scrape_file(scrape_file):
	"""
	Load and check the YAML scrape file, returning a list one or more documents
	"""

	yout = list(yaml.safe_load_all(scrape_file)) # safe_load_all returns a generator
	logger.debug("YAML decoded: %s", yout)

	def check_required_keys(to_check, req_keys):
	"""
	check that all required keys are found in the dict to check
	"""
	for rk in req_keys:
	if rk not in to_check:
	logger.error("Required key '%s' not found in: '%s'", rk, to_check)
	sys.exit(1)

	# check that required keys exist in each YAML document
	for ydoc in yout:
	check_required_keys(ydoc, ["jenkins_jobs", "product_name", "onf_project"])

	for group in ydoc["jenkins_jobs"]:
	check_required_keys(group, ["group", "jenkins_url", "jobs"])

	for job in group["jobs"]:
	check_required_keys(job, ["name", "extract"])

	return yout


	def jsonpath_extract(json_in, extract_list):
	"""
	Extract data from json using list of jsonpath expressions
	"""

	ret = {}

	for name, jpath in extract_list.items():

	# parsing jsonpath is expensive, store in global of parsed
	# jsonpath expressions
	if jpath not in gjpaths:
	gjpaths[jpath] = jpparse(jpath)

	jexpr = gjpaths[jpath]

	matches = [match.value for match in jexpr.find(json_in)]

	# If only a single match, unwrap from list
	if len(matches) == 1:
	ret[name] = matches[0]
	else:
	ret[name] = matches

	logger.debug("extracted data: %s", ret)

	return ret


	def get_builds_for_job(jobs_dir, local, jenkins_url, job_name, headers=[]):
	"""
	Download list of builds from a Jenkins job, return list of build ids
	"""

	# where to store jenkins JSON output with builds list
	jbuildlist = "%s/%s/%s/0_list.json" % (jobs_dir, clean_url(jenkins_url), job_name)

	if os.path.isfile(jbuildlist) and local:
	# if already downlaoded and want to use the local copy, load it
	jl = json_file_load(jbuildlist)
	else:
	# if not, query jenkins for the list of job builds
	jlu = jenkins_job_list_url(jenkins_url, job_name)
	jl = jenkins_api_get(jlu, headers)

	# save to disk
	json_file_dump(jbuildlist, jl)

	# JSONPath for list of builds in the job
	jexpr = jpparse("builds[*].number")

	# get a list of builds
	buildlist = [build.value for build in jexpr.find(jl)]

	return buildlist


	def get_jenkins_build(jobs_dir, jenkins_url, job_name, build_id, headers=[]):
	"""
	Download a single build and store it on disk, if job has completed
	"""

	# path to store a copy of the JSON recieved by Jenkins
	jjson = "%s/%s/%s/%d_build.json" % (
	jobs_dir,
	clean_url(jenkins_url),
	job_name,
	build_id,
	)

	if os.path.isfile(jjson):
	# if have already run and local copy exists, read/return local copy
	braw = json_file_load(jjson)
	else:
	# make an API call to get the JSON, store locally
	burl = jenkins_job_build_url(jenkins_url, job_name, build_id)
	braw = jenkins_api_get(burl, headers)

	# if build is still going on the result field is null, so don't return
	# build or save a copy, as build status is not final.
	if not braw["result"]:
	return None

	# save to disk
	json_file_dump(jjson, braw)

	return braw


	def get_all_jenkins_builds(jobs_dir, jenkins_url, job_name, build_ids, headers=[]):
	"""
	Get a list of all jenkins build data, for completed builds
	"""

	builds_list = []

	# download build data for all builds
	for build_id in build_ids:

	build = get_jenkins_build(
	args.jobs_dir, jobgroup["jenkins_url"], job["name"], build_id, headers,
	)

	# may return None if build is in progress
	if build:
	builds_list.append(build)

	return builds_list


	def clean_name(name):
	"""
	Clean up a name string. Currently only replaces spaces with underscores
	"""
	return name.replace(" ", "_")


	def clean_url(url):
	"""
	remove prefix and any non-path friendly characters from URL
	"""
	return re.sub(r"\W", "_", re.sub(r"\w+://", "", url))


	def save_product_builds(product_doc, product_dir, builds):
	"""
	save the product-specific build information, if it's applicable to this
	product based on the filters
	"""

	# duplicate the scrape doc into final product data
	product_data = dict(product_doc)

	# used to hold groups of jobs
	groups = {}

	# each doc can have multiple job groups (usually version-specific)
	for jobgroup in product_doc["jenkins_jobs"]:

	groups[jobgroup["group"]] = {}

	# each job group can have multiple jobs
	for job in jobgroup["jobs"]:

	pbuilds = []

	# get the build data for the job
	for build in builds[job["name"]]:

	jpedata = jsonpath_extract(build, job["extract"])

	# filter builds
	save = True
	if "filter" in job:
	for k, v in job["filter"].items():
	# if data doesn't match the filter value given, don't save it
	if jpedata[k] != v:
	save = False

	if save:
	pbuilds.append(jpedata)

	# allow job name to be overridden, for private jobs
	if "name_override" in job:
	groups[jobgroup["group"]][job["name_override"]] = pbuilds
	else:
	groups[jobgroup["group"]][job["name"]] = pbuilds

	product_data["groups"] = groups

	product_filename = "%s/%s/%s.json" % (
	product_dir,
	product_doc["onf_project"],
	clean_name(product_doc["product_name"]),
	)

	json_file_dump(product_filename, product_data)


	# main function that calls other functions
	if __name__ == "__main__":

	args = parse_collector_args()

	if not os.path.isdir(args.product_dir):
	logger.error("Output directory is not a directory: '%s'", args.product_dir)
	sys.exit(1)

	# only print log messages if debugging
	if args.debug:
	logger.setLevel(logging.DEBUG)
	else:
	logger.setLevel(logging.CRITICAL)

	# read in credentials file if option if argument passed
	credentials = {}
	if args.credentials:
	cred_file = yaml.safe_load(args.credentials)
	credentials = cred_file["credentials"]

	# read in the Scrape File
	sfile = parse_scrape_file(args.scrape_file)

	# dict of job name -> build data
	builds = {}

	# Scrape File YAML may contain multiple documents
	for sdoc in sfile:

	# phase 1 - identify all the Jenkins jobs
	# each doc can have multiple job groups (usually version-specific)
	for jobgroup in sdoc["jenkins_jobs"]:

	api_headers = []

	if "credentials" in jobgroup:
	if jobgroup["credentials"] in credentials:
	api_headers = [
	basic_auth_header(
	credentials[jobgroup["credentials"]]["jenkins_api_user"],
	credentials[jobgroup["credentials"]]["jenkins_api_token"],
	)
	]
	else:
	logger.error(
	"Credentials for '%s' not supplied", jobgroup["credentials"]
	)
	sys.exit(1)

	# each job group can have multiple jobs
	for job in jobgroup["jobs"]:

	# only redownload jobs that haven't been downloaded before
	if job["name"] not in builds:

	# get list of all Job ID's
	build_ids = get_builds_for_job(
	args.jobs_dir,
	args.local,
	jobgroup["jenkins_url"],
	job["name"],
	api_headers,
	)

	# get build info - either download or load from disk
	builds[job["name"]] = get_all_jenkins_builds(
	args.jobs_dir,
	jobgroup["jenkins_url"],
	job["name"],
	build_ids,
	api_headers,
	)

	# phase 2 - create per-product (document) lists of build extracted data
	save_product_builds(sdoc, args.product_dir, builds)