blob: 8d84cfa45bc65c7f232b703e120b992584521ed5 [file] [log] [blame]
Zack Williams712caf62020-04-28 13:37:41 -07001#!/usr/bin/env python3
2
3# SPDX-FileCopyrightText: © 2020 Open Networking Foundation <support@opennetworking.org>
4# SPDX-License-Identifier: Apache-2.0
5
6from __future__ import absolute_import
7
8import argparse
9import base64
10import json
11import logging
12import os
13import re
14import sys
15import urllib.request
16import yaml
17
18from jsonpath_ng.ext import parse as jpparse
19
20# create shared logger
21logging.basicConfig()
22logger = logging.getLogger("sjsgc")
23
24# global dict of jsonpath expressions -> compiled jsonpath parsers, as
25# reparsing expressions in each loop results in 100x longer execution time
26gjpaths = {}
27
28# credentials global
29
30
31def parse_collector_args():
32 """
33 parse CLI arguments
34 """
35
36 parser = argparse.ArgumentParser(description="Jenkins job results collector")
37
38 # Positional args
39 parser.add_argument(
40 "scrape_file",
41 default="scrape.yaml",
42 type=argparse.FileType("r"),
43 help="YAML file describing Jenkins job and data to scrape",
44 )
45
46 # Flags
47 parser.add_argument(
48 "--credentials",
49 type=argparse.FileType("r"),
50 help="Credentials to use for private jenkins jobs",
51 )
52
53 parser.add_argument(
54 "--local", action="store_true", help="Prefer local copies of build lists"
55 )
56
57 parser.add_argument(
58 "--product_dir", default="products", help="Directory to save per-product output"
59 )
60
61 parser.add_argument(
62 "--jobs_dir", default="jobs", help="Directory to save raw Jenkins job output"
63 )
64
65 parser.add_argument(
66 "--debug", action="store_true", help="Print additional debugging information"
67 )
68
69 return parser.parse_args()
70
71
72def jenkins_job_list_url(server_url, job_name):
73 """
74 create a Jenkins JSON API URL for a job (list of builds)
75 """
76
77 url = "%s/job/%s/api/json" % (server_url, job_name)
78 return url
79
80
81def jenkins_job_build_url(server_url, job_name, build_number):
82 """
83 create a Jenkins JSON API URL for a specific build of a job
84 """
85
86 url = "%s/job/%s/%d/api/json" % (server_url, job_name, build_number)
87 return url
88
89
90def basic_auth_header(username, password):
91 """
92 returns a tuple containing a http basic auth header
93 """
94 creds_str = "%s:%s" % (username, password)
95 creds_b64 = base64.standard_b64encode(creds_str.encode("utf-8"))
96
97 return ("Authorization", "Basic %s" % creds_b64.decode("utf-8"))
98
99
100def jenkins_api_get(url, headers=[]):
101 """
102 Get data from Jenkins JSON API endpoint, return data as a dict
103 """
104
105 request = urllib.request.Request(url)
106
107 # add headers tuples
108 for header in headers:
109 request.add_header(*header)
110
111 try:
112 response = urllib.request.urlopen(request)
113 except urllib.error.HTTPError:
114 logger.exception("Server encountered an HTTPError at URL: '%s'", url)
115 except urllib.error.URLError:
116 logger.exception("An URLError occurred at URL: '%s'", url)
117 else:
118 # docs: https://docs.python.org/3/library/json.html
119 jsondata = response.read()
120 logger.debug("API response: %s", jsondata)
121
122 try:
123 data = json.loads(jsondata)
124 except json.decoder.JSONDecodeError:
125 logger.exception("Unable to decode JSON")
126 else:
127 logger.debug("JSON decoded: %s", data)
128
129 return data
130
131
132def json_file_load(path):
133 """
134 Get data from local file, return data as a dict
135 """
136
137 with open(path) as jf:
138 try:
139 data = json.loads(jf.read())
140 except json.decoder.JSONDecodeError:
141 logger.exception("Unable to decode JSON from file: '%s'", path)
142
143 return data
144
145
146def json_file_dump(path, data):
147 """
148 Write JSON file out to a path, creating directories in path as needed
149 """
Zack Williams02047882020-10-28 11:04:07 -0700150 logger.debug("writing JSON file: %s", path)
Zack Williams712caf62020-04-28 13:37:41 -0700151
152 # create directory if it doesn't already exist
153 parent_dir = os.path.dirname(path)
154 os.makedirs(parent_dir, exist_ok=True)
155
156 # write file, pretty printed
157 with open(path, "w") as jf:
158 json.dump(data, jf, indent=2)
159
160
161def parse_scrape_file(scrape_file):
162 """
163 Load and check the YAML scrape file, returning a list one or more documents
164 """
165
166 yout = list(yaml.safe_load_all(scrape_file)) # safe_load_all returns a generator
167 logger.debug("YAML decoded: %s", yout)
168
169 def check_required_keys(to_check, req_keys):
170 """
171 check that all required keys are found in the dict to check
172 """
173 for rk in req_keys:
174 if rk not in to_check:
175 logger.error("Required key '%s' not found in: '%s'", rk, to_check)
176 sys.exit(1)
177
178 # check that required keys exist in each YAML document
179 for ydoc in yout:
180 check_required_keys(ydoc, ["jenkins_jobs", "product_name", "onf_project"])
181
182 for group in ydoc["jenkins_jobs"]:
183 check_required_keys(group, ["group", "jenkins_url", "jobs"])
184
185 for job in group["jobs"]:
186 check_required_keys(job, ["name", "extract"])
187
188 return yout
189
190
191def jsonpath_extract(json_in, extract_list):
192 """
193 Extract data from json using list of jsonpath expressions
194 """
195
196 ret = {}
197
198 for name, jpath in extract_list.items():
199
200 # parsing jsonpath is expensive, store in global of parsed
201 # jsonpath expressions
202 if jpath not in gjpaths:
203 gjpaths[jpath] = jpparse(jpath)
204
205 jexpr = gjpaths[jpath]
206
207 matches = [match.value for match in jexpr.find(json_in)]
208
209 # If only a single match, unwrap from list
210 if len(matches) == 1:
211 ret[name] = matches[0]
212 else:
213 ret[name] = matches
214
215 logger.debug("extracted data: %s", ret)
216
217 return ret
218
219
220def get_builds_for_job(jobs_dir, local, jenkins_url, job_name, headers=[]):
221 """
222 Download list of builds from a Jenkins job, return list of build ids
223 """
224
225 # where to store jenkins JSON output with builds list
226 jbuildlist = "%s/%s/%s/0_list.json" % (jobs_dir, clean_url(jenkins_url), job_name)
227
228 if os.path.isfile(jbuildlist) and local:
229 # if already downlaoded and want to use the local copy, load it
230 jl = json_file_load(jbuildlist)
231 else:
232 # if not, query jenkins for the list of job builds
233 jlu = jenkins_job_list_url(jenkins_url, job_name)
234 jl = jenkins_api_get(jlu, headers)
235
236 # save to disk
237 json_file_dump(jbuildlist, jl)
238
239 # JSONPath for list of builds in the job
240 jexpr = jpparse("builds[*].number")
241
242 # get a list of builds
243 buildlist = [build.value for build in jexpr.find(jl)]
244
245 return buildlist
246
247
248def get_jenkins_build(jobs_dir, jenkins_url, job_name, build_id, headers=[]):
249 """
250 Download a single build and store it on disk, if job has completed
251 """
252
253 # path to store a copy of the JSON recieved by Jenkins
254 jjson = "%s/%s/%s/%d_build.json" % (
255 jobs_dir,
256 clean_url(jenkins_url),
257 job_name,
258 build_id,
259 )
260
261 if os.path.isfile(jjson):
262 # if have already run and local copy exists, read/return local copy
263 braw = json_file_load(jjson)
264 else:
265 # make an API call to get the JSON, store locally
266 burl = jenkins_job_build_url(jenkins_url, job_name, build_id)
267 braw = jenkins_api_get(burl, headers)
268
269 # if build is still going on the result field is null, so don't return
270 # build or save a copy, as build status is not final.
271 if not braw["result"]:
272 return None
273
274 # save to disk
275 json_file_dump(jjson, braw)
276
277 return braw
278
279
280def get_all_jenkins_builds(jobs_dir, jenkins_url, job_name, build_ids, headers=[]):
281 """
282 Get a list of all jenkins build data, for completed builds
283 """
284
285 builds_list = []
286
287 # download build data for all builds
288 for build_id in build_ids:
289
290 build = get_jenkins_build(
291 args.jobs_dir, jobgroup["jenkins_url"], job["name"], build_id, headers,
292 )
293
294 # may return None if build is in progress
295 if build:
296 builds_list.append(build)
297
298 return builds_list
299
300
301def clean_name(name):
302 """
303 Clean up a name string. Currently only replaces spaces with underscores
304 """
305 return name.replace(" ", "_")
306
307
308def clean_url(url):
309 """
310 remove prefix and any non-path friendly characters from URL
311 """
312 return re.sub(r"\W", "_", re.sub(r"\w+://", "", url))
313
314
315def save_product_builds(product_doc, product_dir, builds):
316 """
317 save the product-specific build information, if it's applicable to this
318 product based on the filters
319 """
320
321 # duplicate the scrape doc into final product data
322 product_data = dict(product_doc)
323
324 # used to hold groups of jobs
325 groups = {}
326
327 # each doc can have multiple job groups (usually version-specific)
328 for jobgroup in product_doc["jenkins_jobs"]:
329
330 groups[jobgroup["group"]] = {}
331
332 # each job group can have multiple jobs
333 for job in jobgroup["jobs"]:
334
335 pbuilds = []
336
337 # get the build data for the job
338 for build in builds[job["name"]]:
339
340 jpedata = jsonpath_extract(build, job["extract"])
341
342 # filter builds
343 save = True
344 if "filter" in job:
345 for k, v in job["filter"].items():
346 # if data doesn't match the filter value given, don't save it
347 if jpedata[k] != v:
348 save = False
349
350 if save:
351 pbuilds.append(jpedata)
352
353 # allow job name to be overridden, for private jobs
354 if "name_override" in job:
355 groups[jobgroup["group"]][job["name_override"]] = pbuilds
356 else:
357 groups[jobgroup["group"]][job["name"]] = pbuilds
358
359 product_data["groups"] = groups
360
Zack Williams02047882020-10-28 11:04:07 -0700361 product_filename = "%s/%s/%s.json" % (
Zack Williams712caf62020-04-28 13:37:41 -0700362 product_dir,
Zack Williams02047882020-10-28 11:04:07 -0700363 product_doc["onf_project"],
Zack Williams712caf62020-04-28 13:37:41 -0700364 clean_name(product_doc["product_name"]),
365 )
366
367 json_file_dump(product_filename, product_data)
368
369
370# main function that calls other functions
371if __name__ == "__main__":
372
373 args = parse_collector_args()
374
375 if not os.path.isdir(args.product_dir):
376 logger.error("Output directory is not a directory: '%s'", args.product_dir)
377 sys.exit(1)
378
379 # only print log messages if debugging
380 if args.debug:
381 logger.setLevel(logging.DEBUG)
382 else:
383 logger.setLevel(logging.CRITICAL)
384
385 # read in credentials file if option if argument passed
386 credentials = {}
387 if args.credentials:
388 cred_file = yaml.safe_load(args.credentials)
389 credentials = cred_file["credentials"]
390
391 # read in the Scrape File
392 sfile = parse_scrape_file(args.scrape_file)
393
394 # dict of job name -> build data
395 builds = {}
396
397 # Scrape File YAML may contain multiple documents
398 for sdoc in sfile:
399
400 # phase 1 - identify all the Jenkins jobs
401 # each doc can have multiple job groups (usually version-specific)
402 for jobgroup in sdoc["jenkins_jobs"]:
403
404 api_headers = []
405
406 if "credentials" in jobgroup:
407 if jobgroup["credentials"] in credentials:
408 api_headers = [
409 basic_auth_header(
410 credentials[jobgroup["credentials"]]["jenkins_api_user"],
411 credentials[jobgroup["credentials"]]["jenkins_api_token"],
412 )
413 ]
414 else:
415 logger.error(
416 "Credentials for '%s' not supplied", jobgroup["credentials"]
417 )
418 sys.exit(1)
419
420 # each job group can have multiple jobs
421 for job in jobgroup["jobs"]:
422
423 # only redownload jobs that haven't been downloaded before
424 if job["name"] not in builds:
425
426 # get list of all Job ID's
427 build_ids = get_builds_for_job(
428 args.jobs_dir,
429 args.local,
430 jobgroup["jenkins_url"],
431 job["name"],
432 api_headers,
433 )
434
435 # get build info - either download or load from disk
436 builds[job["name"]] = get_all_jenkins_builds(
437 args.jobs_dir,
438 jobgroup["jenkins_url"],
439 job["name"],
440 build_ids,
441 api_headers,
442 )
443
444 # phase 2 - create per-product (document) lists of build extracted data
445 save_product_builds(sdoc, args.product_dir, builds)