Blame - buildcollector.py - sjsg

blob: 7091197534382101ea567a9e98512416c99d6184 [file] [log] [blame]

Zack Williams	712caf6	2020-04-28 13:37:41 -0700	[diff] [blame^]	1	#!/usr/bin/env python3
				2
				3	# SPDX-FileCopyrightText: © 2020 Open Networking Foundation <support@opennetworking.org>
				4	# SPDX-License-Identifier: Apache-2.0
				5
				6	from __future__ import absolute_import
				7
				8	import argparse
				9	import base64
				10	import json
				11	import logging
				12	import os
				13	import re
				14	import sys
				15	import urllib.request
				16	import yaml
				17
				18	from jsonpath_ng.ext import parse as jpparse
				19
				20	# create shared logger
				21	logging.basicConfig()
				22	logger = logging.getLogger("sjsgc")
				23
				24	# global dict of jsonpath expressions -> compiled jsonpath parsers, as
				25	# reparsing expressions in each loop results in 100x longer execution time
				26	gjpaths = {}
				27
				28	# credentials global
				29
				30
				31	def parse_collector_args():
				32	"""
				33	parse CLI arguments
				34	"""
				35
				36	parser = argparse.ArgumentParser(description="Jenkins job results collector")
				37
				38	# Positional args
				39	parser.add_argument(
				40	"scrape_file",
				41	default="scrape.yaml",
				42	type=argparse.FileType("r"),
				43	help="YAML file describing Jenkins job and data to scrape",
				44	)
				45
				46	# Flags
				47	parser.add_argument(
				48	"--credentials",
				49	type=argparse.FileType("r"),
				50	help="Credentials to use for private jenkins jobs",
				51	)
				52
				53	parser.add_argument(
				54	"--local", action="store_true", help="Prefer local copies of build lists"
				55	)
				56
				57	parser.add_argument(
				58	"--product_dir", default="products", help="Directory to save per-product output"
				59	)
				60
				61	parser.add_argument(
				62	"--jobs_dir", default="jobs", help="Directory to save raw Jenkins job output"
				63	)
				64
				65	parser.add_argument(
				66	"--debug", action="store_true", help="Print additional debugging information"
				67	)
				68
				69	return parser.parse_args()
				70
				71
				72	def jenkins_job_list_url(server_url, job_name):
				73	"""
				74	create a Jenkins JSON API URL for a job (list of builds)
				75	"""
				76
				77	url = "%s/job/%s/api/json" % (server_url, job_name)
				78	return url
				79
				80
				81	def jenkins_job_build_url(server_url, job_name, build_number):
				82	"""
				83	create a Jenkins JSON API URL for a specific build of a job
				84	"""
				85
				86	url = "%s/job/%s/%d/api/json" % (server_url, job_name, build_number)
				87	return url
				88
				89
				90	def basic_auth_header(username, password):
				91	"""
				92	returns a tuple containing a http basic auth header
				93	"""
				94	creds_str = "%s:%s" % (username, password)
				95	creds_b64 = base64.standard_b64encode(creds_str.encode("utf-8"))
				96
				97	return ("Authorization", "Basic %s" % creds_b64.decode("utf-8"))
				98
				99
				100	def jenkins_api_get(url, headers=[]):
				101	"""
				102	Get data from Jenkins JSON API endpoint, return data as a dict
				103	"""
				104
				105	request = urllib.request.Request(url)
				106
				107	# add headers tuples
				108	for header in headers:
				109	request.add_header(*header)
				110
				111	try:
				112	response = urllib.request.urlopen(request)
				113	except urllib.error.HTTPError:
				114	logger.exception("Server encountered an HTTPError at URL: '%s'", url)
				115	except urllib.error.URLError:
				116	logger.exception("An URLError occurred at URL: '%s'", url)
				117	else:
				118	# docs: https://docs.python.org/3/library/json.html
				119	jsondata = response.read()
				120	logger.debug("API response: %s", jsondata)
				121
				122	try:
				123	data = json.loads(jsondata)
				124	except json.decoder.JSONDecodeError:
				125	logger.exception("Unable to decode JSON")
				126	else:
				127	logger.debug("JSON decoded: %s", data)
				128
				129	return data
				130
				131
				132	def json_file_load(path):
				133	"""
				134	Get data from local file, return data as a dict
				135	"""
				136
				137	with open(path) as jf:
				138	try:
				139	data = json.loads(jf.read())
				140	except json.decoder.JSONDecodeError:
				141	logger.exception("Unable to decode JSON from file: '%s'", path)
				142
				143	return data
				144
				145
				146	def json_file_dump(path, data):
				147	"""
				148	Write JSON file out to a path, creating directories in path as needed
				149	"""
				150
				151	# create directory if it doesn't already exist
				152	parent_dir = os.path.dirname(path)
				153	os.makedirs(parent_dir, exist_ok=True)
				154
				155	# write file, pretty printed
				156	with open(path, "w") as jf:
				157	json.dump(data, jf, indent=2)
				158
				159
				160	def parse_scrape_file(scrape_file):
				161	"""
				162	Load and check the YAML scrape file, returning a list one or more documents
				163	"""
				164
				165	yout = list(yaml.safe_load_all(scrape_file)) # safe_load_all returns a generator
				166	logger.debug("YAML decoded: %s", yout)
				167
				168	def check_required_keys(to_check, req_keys):
				169	"""
				170	check that all required keys are found in the dict to check
				171	"""
				172	for rk in req_keys:
				173	if rk not in to_check:
				174	logger.error("Required key '%s' not found in: '%s'", rk, to_check)
				175	sys.exit(1)
				176
				177	# check that required keys exist in each YAML document
				178	for ydoc in yout:
				179	check_required_keys(ydoc, ["jenkins_jobs", "product_name", "onf_project"])
				180
				181	for group in ydoc["jenkins_jobs"]:
				182	check_required_keys(group, ["group", "jenkins_url", "jobs"])
				183
				184	for job in group["jobs"]:
				185	check_required_keys(job, ["name", "extract"])
				186
				187	return yout
				188
				189
				190	def jsonpath_extract(json_in, extract_list):
				191	"""
				192	Extract data from json using list of jsonpath expressions
				193	"""
				194
				195	ret = {}
				196
				197	for name, jpath in extract_list.items():
				198
				199	# parsing jsonpath is expensive, store in global of parsed
				200	# jsonpath expressions
				201	if jpath not in gjpaths:
				202	gjpaths[jpath] = jpparse(jpath)
				203
				204	jexpr = gjpaths[jpath]
				205
				206	matches = [match.value for match in jexpr.find(json_in)]
				207
				208	# If only a single match, unwrap from list
				209	if len(matches) == 1:
				210	ret[name] = matches[0]
				211	else:
				212	ret[name] = matches
				213
				214	logger.debug("extracted data: %s", ret)
				215
				216	return ret
				217
				218
				219	def get_builds_for_job(jobs_dir, local, jenkins_url, job_name, headers=[]):
				220	"""
				221	Download list of builds from a Jenkins job, return list of build ids
				222	"""
				223
				224	# where to store jenkins JSON output with builds list
				225	jbuildlist = "%s/%s/%s/0_list.json" % (jobs_dir, clean_url(jenkins_url), job_name)
				226
				227	if os.path.isfile(jbuildlist) and local:
				228	# if already downlaoded and want to use the local copy, load it
				229	jl = json_file_load(jbuildlist)
				230	else:
				231	# if not, query jenkins for the list of job builds
				232	jlu = jenkins_job_list_url(jenkins_url, job_name)
				233	jl = jenkins_api_get(jlu, headers)
				234
				235	# save to disk
				236	json_file_dump(jbuildlist, jl)
				237
				238	# JSONPath for list of builds in the job
				239	jexpr = jpparse("builds[*].number")
				240
				241	# get a list of builds
				242	buildlist = [build.value for build in jexpr.find(jl)]
				243
				244	return buildlist
				245
				246
				247	def get_jenkins_build(jobs_dir, jenkins_url, job_name, build_id, headers=[]):
				248	"""
				249	Download a single build and store it on disk, if job has completed
				250	"""
				251
				252	# path to store a copy of the JSON recieved by Jenkins
				253	jjson = "%s/%s/%s/%d_build.json" % (
				254	jobs_dir,
				255	clean_url(jenkins_url),
				256	job_name,
				257	build_id,
				258	)
				259
				260	if os.path.isfile(jjson):
				261	# if have already run and local copy exists, read/return local copy
				262	braw = json_file_load(jjson)
				263	else:
				264	# make an API call to get the JSON, store locally
				265	burl = jenkins_job_build_url(jenkins_url, job_name, build_id)
				266	braw = jenkins_api_get(burl, headers)
				267
				268	# if build is still going on the result field is null, so don't return
				269	# build or save a copy, as build status is not final.
				270	if not braw["result"]:
				271	return None
				272
				273	# save to disk
				274	json_file_dump(jjson, braw)
				275
				276	return braw
				277
				278
				279	def get_all_jenkins_builds(jobs_dir, jenkins_url, job_name, build_ids, headers=[]):
				280	"""
				281	Get a list of all jenkins build data, for completed builds
				282	"""
				283
				284	builds_list = []
				285
				286	# download build data for all builds
				287	for build_id in build_ids:
				288
				289	build = get_jenkins_build(
				290	args.jobs_dir, jobgroup["jenkins_url"], job["name"], build_id, headers,
				291	)
				292
				293	# may return None if build is in progress
				294	if build:
				295	builds_list.append(build)
				296
				297	return builds_list
				298
				299
				300	def clean_name(name):
				301	"""
				302	Clean up a name string. Currently only replaces spaces with underscores
				303	"""
				304	return name.replace(" ", "_")
				305
				306
				307	def clean_url(url):
				308	"""
				309	remove prefix and any non-path friendly characters from URL
				310	"""
				311	return re.sub(r"\W", "_", re.sub(r"\w+://", "", url))
				312
				313
				314	def save_product_builds(product_doc, product_dir, builds):
				315	"""
				316	save the product-specific build information, if it's applicable to this
				317	product based on the filters
				318	"""
				319
				320	# duplicate the scrape doc into final product data
				321	product_data = dict(product_doc)
				322
				323	# used to hold groups of jobs
				324	groups = {}
				325
				326	# each doc can have multiple job groups (usually version-specific)
				327	for jobgroup in product_doc["jenkins_jobs"]:
				328
				329	groups[jobgroup["group"]] = {}
				330
				331	# each job group can have multiple jobs
				332	for job in jobgroup["jobs"]:
				333
				334	pbuilds = []
				335
				336	# get the build data for the job
				337	for build in builds[job["name"]]:
				338
				339	jpedata = jsonpath_extract(build, job["extract"])
				340
				341	# filter builds
				342	save = True
				343	if "filter" in job:
				344	for k, v in job["filter"].items():
				345	# if data doesn't match the filter value given, don't save it
				346	if jpedata[k] != v:
				347	save = False
				348
				349	if save:
				350	pbuilds.append(jpedata)
				351
				352	# allow job name to be overridden, for private jobs
				353	if "name_override" in job:
				354	groups[jobgroup["group"]][job["name_override"]] = pbuilds
				355	else:
				356	groups[jobgroup["group"]][job["name"]] = pbuilds
				357
				358	product_data["groups"] = groups
				359
				360	product_filename = "%s/%s.json" % (
				361	product_dir,
				362	clean_name(product_doc["product_name"]),
				363	)
				364
				365	json_file_dump(product_filename, product_data)
				366
				367
				368	# main function that calls other functions
				369	if __name__ == "__main__":
				370
				371	args = parse_collector_args()
				372
				373	if not os.path.isdir(args.product_dir):
				374	logger.error("Output directory is not a directory: '%s'", args.product_dir)
				375	sys.exit(1)
				376
				377	# only print log messages if debugging
				378	if args.debug:
				379	logger.setLevel(logging.DEBUG)
				380	else:
				381	logger.setLevel(logging.CRITICAL)
				382
				383	# read in credentials file if option if argument passed
				384	credentials = {}
				385	if args.credentials:
				386	cred_file = yaml.safe_load(args.credentials)
				387	credentials = cred_file["credentials"]
				388
				389	# read in the Scrape File
				390	sfile = parse_scrape_file(args.scrape_file)
				391
				392	# dict of job name -> build data
				393	builds = {}
				394
				395	# Scrape File YAML may contain multiple documents
				396	for sdoc in sfile:
				397
				398	# phase 1 - identify all the Jenkins jobs
				399	# each doc can have multiple job groups (usually version-specific)
				400	for jobgroup in sdoc["jenkins_jobs"]:
				401
				402	api_headers = []
				403
				404	if "credentials" in jobgroup:
				405	if jobgroup["credentials"] in credentials:
				406	api_headers = [
				407	basic_auth_header(
				408	credentials[jobgroup["credentials"]]["jenkins_api_user"],
				409	credentials[jobgroup["credentials"]]["jenkins_api_token"],
				410	)
				411	]
				412	else:
				413	logger.error(
				414	"Credentials for '%s' not supplied", jobgroup["credentials"]
				415	)
				416	sys.exit(1)
				417
				418	# each job group can have multiple jobs
				419	for job in jobgroup["jobs"]:
				420
				421	# only redownload jobs that haven't been downloaded before
				422	if job["name"] not in builds:
				423
				424	# get list of all Job ID's
				425	build_ids = get_builds_for_job(
				426	args.jobs_dir,
				427	args.local,
				428	jobgroup["jenkins_url"],
				429	job["name"],
				430	api_headers,
				431	)
				432
				433	# get build info - either download or load from disk
				434	builds[job["name"]] = get_all_jenkins_builds(
				435	args.jobs_dir,
				436	jobgroup["jenkins_url"],
				437	job["name"],
				438	build_ids,
				439	api_headers,
				440	)
				441
				442	# phase 2 - create per-product (document) lists of build extracted data
				443	save_product_builds(sdoc, args.product_dir, builds)