Blame - buildcollector.py - sjsg

blob: 8d84cfa45bc65c7f232b703e120b992584521ed5 [file] [log] [blame]

Zack Williams	712caf6	2020-04-28 13:37:41 -0700	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	# SPDX-FileCopyrightText: © 2020 Open Networking Foundation <support@opennetworking.org>
				4	# SPDX-License-Identifier: Apache-2.0
				5
				6	from __future__ import absolute_import
				7
				8	import argparse
				9	import base64
				10	import json
				11	import logging
				12	import os
				13	import re
				14	import sys
				15	import urllib.request
				16	import yaml
				17
				18	from jsonpath_ng.ext import parse as jpparse
				19
				20	# create shared logger
				21	logging.basicConfig()
				22	logger = logging.getLogger("sjsgc")
				23
				24	# global dict of jsonpath expressions -> compiled jsonpath parsers, as
				25	# reparsing expressions in each loop results in 100x longer execution time
				26	gjpaths = {}
				27
				28	# credentials global
				29
				30
				31	def parse_collector_args():
				32	"""
				33	parse CLI arguments
				34	"""
				35
				36	parser = argparse.ArgumentParser(description="Jenkins job results collector")
				37
				38	# Positional args
				39	parser.add_argument(
				40	"scrape_file",
				41	default="scrape.yaml",
				42	type=argparse.FileType("r"),
				43	help="YAML file describing Jenkins job and data to scrape",
				44	)
				45
				46	# Flags
				47	parser.add_argument(
				48	"--credentials",
				49	type=argparse.FileType("r"),
				50	help="Credentials to use for private jenkins jobs",
				51	)
				52
				53	parser.add_argument(
				54	"--local", action="store_true", help="Prefer local copies of build lists"
				55	)
				56
				57	parser.add_argument(
				58	"--product_dir", default="products", help="Directory to save per-product output"
				59	)
				60
				61	parser.add_argument(
				62	"--jobs_dir", default="jobs", help="Directory to save raw Jenkins job output"
				63	)
				64
				65	parser.add_argument(
				66	"--debug", action="store_true", help="Print additional debugging information"
				67	)
				68
				69	return parser.parse_args()
				70
				71
				72	def jenkins_job_list_url(server_url, job_name):
				73	"""
				74	create a Jenkins JSON API URL for a job (list of builds)
				75	"""
				76
				77	url = "%s/job/%s/api/json" % (server_url, job_name)
				78	return url
				79
				80
				81	def jenkins_job_build_url(server_url, job_name, build_number):
				82	"""
				83	create a Jenkins JSON API URL for a specific build of a job
				84	"""
				85
				86	url = "%s/job/%s/%d/api/json" % (server_url, job_name, build_number)
				87	return url
				88
				89
				90	def basic_auth_header(username, password):
				91	"""
				92	returns a tuple containing a http basic auth header
				93	"""
				94	creds_str = "%s:%s" % (username, password)
				95	creds_b64 = base64.standard_b64encode(creds_str.encode("utf-8"))
				96
				97	return ("Authorization", "Basic %s" % creds_b64.decode("utf-8"))
				98
				99
				100	def jenkins_api_get(url, headers=[]):
				101	"""
				102	Get data from Jenkins JSON API endpoint, return data as a dict
				103	"""
				104
				105	request = urllib.request.Request(url)
				106
				107	# add headers tuples
				108	for header in headers:
				109	request.add_header(*header)
				110
				111	try:
				112	response = urllib.request.urlopen(request)
				113	except urllib.error.HTTPError:
				114	logger.exception("Server encountered an HTTPError at URL: '%s'", url)
				115	except urllib.error.URLError:
				116	logger.exception("An URLError occurred at URL: '%s'", url)
				117	else:
				118	# docs: https://docs.python.org/3/library/json.html
				119	jsondata = response.read()
				120	logger.debug("API response: %s", jsondata)
				121
				122	try:
				123	data = json.loads(jsondata)
				124	except json.decoder.JSONDecodeError:
				125	logger.exception("Unable to decode JSON")
				126	else:
				127	logger.debug("JSON decoded: %s", data)
				128
				129	return data
				130
				131
				132	def json_file_load(path):
				133	"""
				134	Get data from local file, return data as a dict
				135	"""
				136
				137	with open(path) as jf:
				138	try:
				139	data = json.loads(jf.read())
				140	except json.decoder.JSONDecodeError:
				141	logger.exception("Unable to decode JSON from file: '%s'", path)
				142
				143	return data
				144
				145
				146	def json_file_dump(path, data):
				147	"""
				148	Write JSON file out to a path, creating directories in path as needed
				149	"""
Zack Williams	0204788	2020-10-28 11:04:07 -0700	[diff] [blame]	150	logger.debug("writing JSON file: %s", path)
Zack Williams	712caf6	2020-04-28 13:37:41 -0700	[diff] [blame]	151
				152	# create directory if it doesn't already exist
				153	parent_dir = os.path.dirname(path)
				154	os.makedirs(parent_dir, exist_ok=True)
				155
				156	# write file, pretty printed
				157	with open(path, "w") as jf:
				158	json.dump(data, jf, indent=2)
				159
				160
				161	def parse_scrape_file(scrape_file):
				162	"""
				163	Load and check the YAML scrape file, returning a list one or more documents
				164	"""
				165
				166	yout = list(yaml.safe_load_all(scrape_file)) # safe_load_all returns a generator
				167	logger.debug("YAML decoded: %s", yout)
				168
				169	def check_required_keys(to_check, req_keys):
				170	"""
				171	check that all required keys are found in the dict to check
				172	"""
				173	for rk in req_keys:
				174	if rk not in to_check:
				175	logger.error("Required key '%s' not found in: '%s'", rk, to_check)
				176	sys.exit(1)
				177
				178	# check that required keys exist in each YAML document
				179	for ydoc in yout:
				180	check_required_keys(ydoc, ["jenkins_jobs", "product_name", "onf_project"])
				181
				182	for group in ydoc["jenkins_jobs"]:
				183	check_required_keys(group, ["group", "jenkins_url", "jobs"])
				184
				185	for job in group["jobs"]:
				186	check_required_keys(job, ["name", "extract"])
				187
				188	return yout
				189
				190
				191	def jsonpath_extract(json_in, extract_list):
				192	"""
				193	Extract data from json using list of jsonpath expressions
				194	"""
				195
				196	ret = {}
				197
				198	for name, jpath in extract_list.items():
				199
				200	# parsing jsonpath is expensive, store in global of parsed
				201	# jsonpath expressions
				202	if jpath not in gjpaths:
				203	gjpaths[jpath] = jpparse(jpath)
				204
				205	jexpr = gjpaths[jpath]
				206
				207	matches = [match.value for match in jexpr.find(json_in)]
				208
				209	# If only a single match, unwrap from list
				210	if len(matches) == 1:
				211	ret[name] = matches[0]
				212	else:
				213	ret[name] = matches
				214
				215	logger.debug("extracted data: %s", ret)
				216
				217	return ret
				218
				219
				220	def get_builds_for_job(jobs_dir, local, jenkins_url, job_name, headers=[]):
				221	"""
				222	Download list of builds from a Jenkins job, return list of build ids
				223	"""
				224
				225	# where to store jenkins JSON output with builds list
				226	jbuildlist = "%s/%s/%s/0_list.json" % (jobs_dir, clean_url(jenkins_url), job_name)
				227
				228	if os.path.isfile(jbuildlist) and local:
				229	# if already downlaoded and want to use the local copy, load it
				230	jl = json_file_load(jbuildlist)
				231	else:
				232	# if not, query jenkins for the list of job builds
				233	jlu = jenkins_job_list_url(jenkins_url, job_name)
				234	jl = jenkins_api_get(jlu, headers)
				235
				236	# save to disk
				237	json_file_dump(jbuildlist, jl)
				238
				239	# JSONPath for list of builds in the job
				240	jexpr = jpparse("builds[*].number")
				241
				242	# get a list of builds
				243	buildlist = [build.value for build in jexpr.find(jl)]
				244
				245	return buildlist
				246
				247
				248	def get_jenkins_build(jobs_dir, jenkins_url, job_name, build_id, headers=[]):
				249	"""
				250	Download a single build and store it on disk, if job has completed
				251	"""
				252
				253	# path to store a copy of the JSON recieved by Jenkins
				254	jjson = "%s/%s/%s/%d_build.json" % (
				255	jobs_dir,
				256	clean_url(jenkins_url),
				257	job_name,
				258	build_id,
				259	)
				260
				261	if os.path.isfile(jjson):
				262	# if have already run and local copy exists, read/return local copy
				263	braw = json_file_load(jjson)
				264	else:
				265	# make an API call to get the JSON, store locally
				266	burl = jenkins_job_build_url(jenkins_url, job_name, build_id)
				267	braw = jenkins_api_get(burl, headers)
				268
				269	# if build is still going on the result field is null, so don't return
				270	# build or save a copy, as build status is not final.
				271	if not braw["result"]:
				272	return None
				273
				274	# save to disk
				275	json_file_dump(jjson, braw)
				276
				277	return braw
				278
				279
				280	def get_all_jenkins_builds(jobs_dir, jenkins_url, job_name, build_ids, headers=[]):
				281	"""
				282	Get a list of all jenkins build data, for completed builds
				283	"""
				284
				285	builds_list = []
				286
				287	# download build data for all builds
				288	for build_id in build_ids:
				289
				290	build = get_jenkins_build(
				291	args.jobs_dir, jobgroup["jenkins_url"], job["name"], build_id, headers,
				292	)
				293
				294	# may return None if build is in progress
				295	if build:
				296	builds_list.append(build)
				297
				298	return builds_list
				299
				300
				301	def clean_name(name):
				302	"""
				303	Clean up a name string. Currently only replaces spaces with underscores
				304	"""
				305	return name.replace(" ", "_")
				306
				307
				308	def clean_url(url):
				309	"""
				310	remove prefix and any non-path friendly characters from URL
				311	"""
				312	return re.sub(r"\W", "_", re.sub(r"\w+://", "", url))
				313
				314
				315	def save_product_builds(product_doc, product_dir, builds):
				316	"""
				317	save the product-specific build information, if it's applicable to this
				318	product based on the filters
				319	"""
				320
				321	# duplicate the scrape doc into final product data
				322	product_data = dict(product_doc)
				323
				324	# used to hold groups of jobs
				325	groups = {}
				326
				327	# each doc can have multiple job groups (usually version-specific)
				328	for jobgroup in product_doc["jenkins_jobs"]:
				329
				330	groups[jobgroup["group"]] = {}
				331
				332	# each job group can have multiple jobs
				333	for job in jobgroup["jobs"]:
				334
				335	pbuilds = []
				336
				337	# get the build data for the job
				338	for build in builds[job["name"]]:
				339
				340	jpedata = jsonpath_extract(build, job["extract"])
				341
				342	# filter builds
				343	save = True
				344	if "filter" in job:
				345	for k, v in job["filter"].items():
				346	# if data doesn't match the filter value given, don't save it
				347	if jpedata[k] != v:
				348	save = False
				349
				350	if save:
				351	pbuilds.append(jpedata)
				352
				353	# allow job name to be overridden, for private jobs
				354	if "name_override" in job:
				355	groups[jobgroup["group"]][job["name_override"]] = pbuilds
				356	else:
				357	groups[jobgroup["group"]][job["name"]] = pbuilds
				358
				359	product_data["groups"] = groups
				360
Zack Williams	0204788	2020-10-28 11:04:07 -0700	[diff] [blame]	361	product_filename = "%s/%s/%s.json" % (
Zack Williams	712caf6	2020-04-28 13:37:41 -0700	[diff] [blame]	362	product_dir,
Zack Williams	0204788	2020-10-28 11:04:07 -0700	[diff] [blame]	363	product_doc["onf_project"],
Zack Williams	712caf6	2020-04-28 13:37:41 -0700	[diff] [blame]	364	clean_name(product_doc["product_name"]),
				365	)
				366
				367	json_file_dump(product_filename, product_data)
				368
				369
				370	# main function that calls other functions
				371	if __name__ == "__main__":
				372
				373	args = parse_collector_args()
				374
				375	if not os.path.isdir(args.product_dir):
				376	logger.error("Output directory is not a directory: '%s'", args.product_dir)
				377	sys.exit(1)
				378
				379	# only print log messages if debugging
				380	if args.debug:
				381	logger.setLevel(logging.DEBUG)
				382	else:
				383	logger.setLevel(logging.CRITICAL)
				384
				385	# read in credentials file if option if argument passed
				386	credentials = {}
				387	if args.credentials:
				388	cred_file = yaml.safe_load(args.credentials)
				389	credentials = cred_file["credentials"]
				390
				391	# read in the Scrape File
				392	sfile = parse_scrape_file(args.scrape_file)
				393
				394	# dict of job name -> build data
				395	builds = {}
				396
				397	# Scrape File YAML may contain multiple documents
				398	for sdoc in sfile:
				399
				400	# phase 1 - identify all the Jenkins jobs
				401	# each doc can have multiple job groups (usually version-specific)
				402	for jobgroup in sdoc["jenkins_jobs"]:
				403
				404	api_headers = []
				405
				406	if "credentials" in jobgroup:
				407	if jobgroup["credentials"] in credentials:
				408	api_headers = [
				409	basic_auth_header(
				410	credentials[jobgroup["credentials"]]["jenkins_api_user"],
				411	credentials[jobgroup["credentials"]]["jenkins_api_token"],
				412	)
				413	]
				414	else:
				415	logger.error(
				416	"Credentials for '%s' not supplied", jobgroup["credentials"]
				417	)
				418	sys.exit(1)
				419
				420	# each job group can have multiple jobs
				421	for job in jobgroup["jobs"]:
				422
				423	# only redownload jobs that haven't been downloaded before
				424	if job["name"] not in builds:
				425
				426	# get list of all Job ID's
				427	build_ids = get_builds_for_job(
				428	args.jobs_dir,
				429	args.local,
				430	jobgroup["jenkins_url"],
				431	job["name"],
				432	api_headers,
				433	)
				434
				435	# get build info - either download or load from disk
				436	builds[job["name"]] = get_all_jenkins_builds(
				437	args.jobs_dir,
				438	jobgroup["jenkins_url"],
				439	job["name"],
				440	build_ids,
				441	api_headers,
				442	)
				443
				444	# phase 2 - create per-product (document) lists of build extracted data
				445	save_product_builds(sdoc, args.product_dir, builds)