blob: 7091197534382101ea567a9e98512416c99d6184 [file] [log] [blame]
Zack Williams712caf62020-04-28 13:37:41 -07001#!/usr/bin/env python3
2
3# SPDX-FileCopyrightText: © 2020 Open Networking Foundation <support@opennetworking.org>
4# SPDX-License-Identifier: Apache-2.0
5
6from __future__ import absolute_import
7
8import argparse
9import base64
10import json
11import logging
12import os
13import re
14import sys
15import urllib.request
16import yaml
17
18from jsonpath_ng.ext import parse as jpparse
19
20# create shared logger
21logging.basicConfig()
22logger = logging.getLogger("sjsgc")
23
24# global dict of jsonpath expressions -> compiled jsonpath parsers, as
25# reparsing expressions in each loop results in 100x longer execution time
26gjpaths = {}
27
28# credentials global
29
30
31def parse_collector_args():
32 """
33 parse CLI arguments
34 """
35
36 parser = argparse.ArgumentParser(description="Jenkins job results collector")
37
38 # Positional args
39 parser.add_argument(
40 "scrape_file",
41 default="scrape.yaml",
42 type=argparse.FileType("r"),
43 help="YAML file describing Jenkins job and data to scrape",
44 )
45
46 # Flags
47 parser.add_argument(
48 "--credentials",
49 type=argparse.FileType("r"),
50 help="Credentials to use for private jenkins jobs",
51 )
52
53 parser.add_argument(
54 "--local", action="store_true", help="Prefer local copies of build lists"
55 )
56
57 parser.add_argument(
58 "--product_dir", default="products", help="Directory to save per-product output"
59 )
60
61 parser.add_argument(
62 "--jobs_dir", default="jobs", help="Directory to save raw Jenkins job output"
63 )
64
65 parser.add_argument(
66 "--debug", action="store_true", help="Print additional debugging information"
67 )
68
69 return parser.parse_args()
70
71
72def jenkins_job_list_url(server_url, job_name):
73 """
74 create a Jenkins JSON API URL for a job (list of builds)
75 """
76
77 url = "%s/job/%s/api/json" % (server_url, job_name)
78 return url
79
80
81def jenkins_job_build_url(server_url, job_name, build_number):
82 """
83 create a Jenkins JSON API URL for a specific build of a job
84 """
85
86 url = "%s/job/%s/%d/api/json" % (server_url, job_name, build_number)
87 return url
88
89
90def basic_auth_header(username, password):
91 """
92 returns a tuple containing a http basic auth header
93 """
94 creds_str = "%s:%s" % (username, password)
95 creds_b64 = base64.standard_b64encode(creds_str.encode("utf-8"))
96
97 return ("Authorization", "Basic %s" % creds_b64.decode("utf-8"))
98
99
100def jenkins_api_get(url, headers=[]):
101 """
102 Get data from Jenkins JSON API endpoint, return data as a dict
103 """
104
105 request = urllib.request.Request(url)
106
107 # add headers tuples
108 for header in headers:
109 request.add_header(*header)
110
111 try:
112 response = urllib.request.urlopen(request)
113 except urllib.error.HTTPError:
114 logger.exception("Server encountered an HTTPError at URL: '%s'", url)
115 except urllib.error.URLError:
116 logger.exception("An URLError occurred at URL: '%s'", url)
117 else:
118 # docs: https://docs.python.org/3/library/json.html
119 jsondata = response.read()
120 logger.debug("API response: %s", jsondata)
121
122 try:
123 data = json.loads(jsondata)
124 except json.decoder.JSONDecodeError:
125 logger.exception("Unable to decode JSON")
126 else:
127 logger.debug("JSON decoded: %s", data)
128
129 return data
130
131
132def json_file_load(path):
133 """
134 Get data from local file, return data as a dict
135 """
136
137 with open(path) as jf:
138 try:
139 data = json.loads(jf.read())
140 except json.decoder.JSONDecodeError:
141 logger.exception("Unable to decode JSON from file: '%s'", path)
142
143 return data
144
145
146def json_file_dump(path, data):
147 """
148 Write JSON file out to a path, creating directories in path as needed
149 """
150
151 # create directory if it doesn't already exist
152 parent_dir = os.path.dirname(path)
153 os.makedirs(parent_dir, exist_ok=True)
154
155 # write file, pretty printed
156 with open(path, "w") as jf:
157 json.dump(data, jf, indent=2)
158
159
160def parse_scrape_file(scrape_file):
161 """
162 Load and check the YAML scrape file, returning a list one or more documents
163 """
164
165 yout = list(yaml.safe_load_all(scrape_file)) # safe_load_all returns a generator
166 logger.debug("YAML decoded: %s", yout)
167
168 def check_required_keys(to_check, req_keys):
169 """
170 check that all required keys are found in the dict to check
171 """
172 for rk in req_keys:
173 if rk not in to_check:
174 logger.error("Required key '%s' not found in: '%s'", rk, to_check)
175 sys.exit(1)
176
177 # check that required keys exist in each YAML document
178 for ydoc in yout:
179 check_required_keys(ydoc, ["jenkins_jobs", "product_name", "onf_project"])
180
181 for group in ydoc["jenkins_jobs"]:
182 check_required_keys(group, ["group", "jenkins_url", "jobs"])
183
184 for job in group["jobs"]:
185 check_required_keys(job, ["name", "extract"])
186
187 return yout
188
189
190def jsonpath_extract(json_in, extract_list):
191 """
192 Extract data from json using list of jsonpath expressions
193 """
194
195 ret = {}
196
197 for name, jpath in extract_list.items():
198
199 # parsing jsonpath is expensive, store in global of parsed
200 # jsonpath expressions
201 if jpath not in gjpaths:
202 gjpaths[jpath] = jpparse(jpath)
203
204 jexpr = gjpaths[jpath]
205
206 matches = [match.value for match in jexpr.find(json_in)]
207
208 # If only a single match, unwrap from list
209 if len(matches) == 1:
210 ret[name] = matches[0]
211 else:
212 ret[name] = matches
213
214 logger.debug("extracted data: %s", ret)
215
216 return ret
217
218
219def get_builds_for_job(jobs_dir, local, jenkins_url, job_name, headers=[]):
220 """
221 Download list of builds from a Jenkins job, return list of build ids
222 """
223
224 # where to store jenkins JSON output with builds list
225 jbuildlist = "%s/%s/%s/0_list.json" % (jobs_dir, clean_url(jenkins_url), job_name)
226
227 if os.path.isfile(jbuildlist) and local:
228 # if already downlaoded and want to use the local copy, load it
229 jl = json_file_load(jbuildlist)
230 else:
231 # if not, query jenkins for the list of job builds
232 jlu = jenkins_job_list_url(jenkins_url, job_name)
233 jl = jenkins_api_get(jlu, headers)
234
235 # save to disk
236 json_file_dump(jbuildlist, jl)
237
238 # JSONPath for list of builds in the job
239 jexpr = jpparse("builds[*].number")
240
241 # get a list of builds
242 buildlist = [build.value for build in jexpr.find(jl)]
243
244 return buildlist
245
246
247def get_jenkins_build(jobs_dir, jenkins_url, job_name, build_id, headers=[]):
248 """
249 Download a single build and store it on disk, if job has completed
250 """
251
252 # path to store a copy of the JSON recieved by Jenkins
253 jjson = "%s/%s/%s/%d_build.json" % (
254 jobs_dir,
255 clean_url(jenkins_url),
256 job_name,
257 build_id,
258 )
259
260 if os.path.isfile(jjson):
261 # if have already run and local copy exists, read/return local copy
262 braw = json_file_load(jjson)
263 else:
264 # make an API call to get the JSON, store locally
265 burl = jenkins_job_build_url(jenkins_url, job_name, build_id)
266 braw = jenkins_api_get(burl, headers)
267
268 # if build is still going on the result field is null, so don't return
269 # build or save a copy, as build status is not final.
270 if not braw["result"]:
271 return None
272
273 # save to disk
274 json_file_dump(jjson, braw)
275
276 return braw
277
278
279def get_all_jenkins_builds(jobs_dir, jenkins_url, job_name, build_ids, headers=[]):
280 """
281 Get a list of all jenkins build data, for completed builds
282 """
283
284 builds_list = []
285
286 # download build data for all builds
287 for build_id in build_ids:
288
289 build = get_jenkins_build(
290 args.jobs_dir, jobgroup["jenkins_url"], job["name"], build_id, headers,
291 )
292
293 # may return None if build is in progress
294 if build:
295 builds_list.append(build)
296
297 return builds_list
298
299
300def clean_name(name):
301 """
302 Clean up a name string. Currently only replaces spaces with underscores
303 """
304 return name.replace(" ", "_")
305
306
307def clean_url(url):
308 """
309 remove prefix and any non-path friendly characters from URL
310 """
311 return re.sub(r"\W", "_", re.sub(r"\w+://", "", url))
312
313
314def save_product_builds(product_doc, product_dir, builds):
315 """
316 save the product-specific build information, if it's applicable to this
317 product based on the filters
318 """
319
320 # duplicate the scrape doc into final product data
321 product_data = dict(product_doc)
322
323 # used to hold groups of jobs
324 groups = {}
325
326 # each doc can have multiple job groups (usually version-specific)
327 for jobgroup in product_doc["jenkins_jobs"]:
328
329 groups[jobgroup["group"]] = {}
330
331 # each job group can have multiple jobs
332 for job in jobgroup["jobs"]:
333
334 pbuilds = []
335
336 # get the build data for the job
337 for build in builds[job["name"]]:
338
339 jpedata = jsonpath_extract(build, job["extract"])
340
341 # filter builds
342 save = True
343 if "filter" in job:
344 for k, v in job["filter"].items():
345 # if data doesn't match the filter value given, don't save it
346 if jpedata[k] != v:
347 save = False
348
349 if save:
350 pbuilds.append(jpedata)
351
352 # allow job name to be overridden, for private jobs
353 if "name_override" in job:
354 groups[jobgroup["group"]][job["name_override"]] = pbuilds
355 else:
356 groups[jobgroup["group"]][job["name"]] = pbuilds
357
358 product_data["groups"] = groups
359
360 product_filename = "%s/%s.json" % (
361 product_dir,
362 clean_name(product_doc["product_name"]),
363 )
364
365 json_file_dump(product_filename, product_data)
366
367
368# main function that calls other functions
369if __name__ == "__main__":
370
371 args = parse_collector_args()
372
373 if not os.path.isdir(args.product_dir):
374 logger.error("Output directory is not a directory: '%s'", args.product_dir)
375 sys.exit(1)
376
377 # only print log messages if debugging
378 if args.debug:
379 logger.setLevel(logging.DEBUG)
380 else:
381 logger.setLevel(logging.CRITICAL)
382
383 # read in credentials file if option if argument passed
384 credentials = {}
385 if args.credentials:
386 cred_file = yaml.safe_load(args.credentials)
387 credentials = cred_file["credentials"]
388
389 # read in the Scrape File
390 sfile = parse_scrape_file(args.scrape_file)
391
392 # dict of job name -> build data
393 builds = {}
394
395 # Scrape File YAML may contain multiple documents
396 for sdoc in sfile:
397
398 # phase 1 - identify all the Jenkins jobs
399 # each doc can have multiple job groups (usually version-specific)
400 for jobgroup in sdoc["jenkins_jobs"]:
401
402 api_headers = []
403
404 if "credentials" in jobgroup:
405 if jobgroup["credentials"] in credentials:
406 api_headers = [
407 basic_auth_header(
408 credentials[jobgroup["credentials"]]["jenkins_api_user"],
409 credentials[jobgroup["credentials"]]["jenkins_api_token"],
410 )
411 ]
412 else:
413 logger.error(
414 "Credentials for '%s' not supplied", jobgroup["credentials"]
415 )
416 sys.exit(1)
417
418 # each job group can have multiple jobs
419 for job in jobgroup["jobs"]:
420
421 # only redownload jobs that haven't been downloaded before
422 if job["name"] not in builds:
423
424 # get list of all Job ID's
425 build_ids = get_builds_for_job(
426 args.jobs_dir,
427 args.local,
428 jobgroup["jenkins_url"],
429 job["name"],
430 api_headers,
431 )
432
433 # get build info - either download or load from disk
434 builds[job["name"]] = get_all_jenkins_builds(
435 args.jobs_dir,
436 jobgroup["jenkins_url"],
437 job["name"],
438 build_ids,
439 api_headers,
440 )
441
442 # phase 2 - create per-product (document) lists of build extracted data
443 save_product_builds(sdoc, args.product_dir, builds)