blob: e75e253f4e566df5118b8b4aa6b2505388a4a95e [file] [log] [blame]
Scott Baker43adf1b2014-03-19 21:54:55 -07001from bigquery_analytics import BigQueryAnalytics
Scott Baker050b1b82014-03-27 09:13:41 -07002import datetime
3import re
Scott Bakerc527fda2014-03-20 17:14:52 -07004import os
5import sys
Scott Baker43adf1b2014-03-19 21:54:55 -07006import json
Scott Baker78ab1012014-03-19 23:44:39 -07007import traceback
Scott Baker050b1b82014-03-27 09:13:41 -07008import urllib2
Scott Baker43adf1b2014-03-19 21:54:55 -07009
Scott Bakerc527fda2014-03-20 17:14:52 -070010if os.path.exists("/home/smbaker/projects/vicci/plstackapi/planetstack"):
11 sys.path.append("/home/smbaker/projects/vicci/plstackapi/planetstack")
12else:
13 sys.path.append("/opt/planetstack")
14
15os.environ.setdefault("DJANGO_SETTINGS_MODULE", "planetstack.settings")
16from django import db
17from django.db import connection
18from core.models import Slice, Sliver, ServiceClass, Reservation, Tag, Network, User, Node, Image, Deployment, Site, NetworkTemplate, NetworkSlice, Service
19
20BLUE_LOAD=5000000
21RED_LOAD=15000000
22
Scott Baker43adf1b2014-03-19 21:54:55 -070023class PlanetStackAnalytics(BigQueryAnalytics):
24 def __init__(self, tableName="demoevents"):
25 BigQueryAnalytics.__init__(self, tableName)
26
Scott Bakerc527fda2014-03-20 17:14:52 -070027 def service_to_sliceNames(self, serviceName):
28 service=Service.objects.get(name=serviceName)
29 try:
30 slices = service.slices.all()
31 except:
32 # BUG in data model -- Slice.service has related name 'service' and
33 # it should be 'slices'
34 slices = service.service.all()
35
36 return [slice.name for slice in slices]
37
Scott Baker050b1b82014-03-27 09:13:41 -070038 def compose_query(self, slice=None, site=None, node=None, service=None, timeBucket="60", avg=[], sum=[], count=[], computed=[], groupBy=["Time"], orderBy=["Time"], tableName="demoevents"):
Scott Baker43adf1b2014-03-19 21:54:55 -070039 tablePart = "%s.%s@-3600000--1" % ("vicci", tableName)
40
41 fields = []
42 fieldNames = []
43
Scott Baker050b1b82014-03-27 09:13:41 -070044 fields.append("SEC_TO_TIMESTAMP(INTEGER(TIMESTAMP_TO_SEC(time)/%s)*%s) as Time" % (str(timeBucket),str(timeBucket)))
45 #fields.append("INTEGER(TIMESTAMP_TO_SEC(time)/%s)*%s as Time" % (str(timeBucket),str(timeBucket)))
Scott Baker43adf1b2014-03-19 21:54:55 -070046
47 for fieldName in avg:
48 fields.append("AVG(%s) as avg_%s" % (fieldName, fieldName.replace("%","")))
49 fieldNames.append("avg_%s" % fieldName.replace("%",""))
50
51 for fieldName in sum:
52 fields.append("SUM(%s) as sum_%s" % (fieldName, fieldName.replace("%","")))
53 fieldNames.append("sum_%s" % fieldName.replace("%",""))
54
55 for fieldName in count:
56 fields.append("COUNT(distinct %s) as count_%s" % (fieldName, fieldName.replace("%","")))
57 fieldNames.append("count_%s" % fieldName.replace("%",""))
58
59 for fieldName in computed:
60 operator = "/"
61 parts = fieldName.split("/")
62 computedFieldName = "computed_" + parts[0].replace("%","")+"_div_"+parts[1].replace("%","")
63 if len(parts)==1:
64 operator = "*"
65 parts = computed.split("*")
66 computedFieldName = "computed_" + parts[0].replace("%","")+"_mult_"+parts[1].replace("%","")
67 fields.append("SUM(%s)%sSUM(%s) as %s" % (parts[0], operator, parts[1], computedFieldName))
68 fieldNames.append(computedFieldName)
69
Scott Bakerc527fda2014-03-20 17:14:52 -070070 for fieldName in groupBy:
Scott Baker050b1b82014-03-27 09:13:41 -070071 if (fieldName not in ["Time"]):
Scott Bakerc527fda2014-03-20 17:14:52 -070072 fields.append(fieldName)
73 fieldNames.append(fieldName)
74
Scott Baker43adf1b2014-03-19 21:54:55 -070075 fields = ", ".join(fields)
76
77 where = []
78
79 if slice:
80 where.append("%%slice='%s'" % slice)
81 if site:
82 where.append("%%site='%s'" % site)
83 if node:
84 where.append("%%hostname='%s'" % node)
Scott Bakerc527fda2014-03-20 17:14:52 -070085 if service:
86 sliceNames = self.service_to_sliceNames(service)
87 if sliceNames:
88 where.append("(" + " OR ".join(["%%slice='%s'" % sliceName for sliceName in sliceNames]) +")")
Scott Baker43adf1b2014-03-19 21:54:55 -070089
90 if where:
91 where = " WHERE " + " AND ".join(where)
92 else:
93 where =""
94
95 if groupBy:
Scott Bakerc527fda2014-03-20 17:14:52 -070096 groupBySub = " GROUP BY " + ",".join(groupBy + ["%hostname"])
Scott Baker43adf1b2014-03-19 21:54:55 -070097 groupBy = " GROUP BY " + ",".join(groupBy)
98 else:
Scott Bakerc527fda2014-03-20 17:14:52 -070099 groupBySub = " GROUP BY %hostname"
Scott Baker43adf1b2014-03-19 21:54:55 -0700100 groupBy = ""
101
102 if orderBy:
103 orderBy = " ORDER BY " + ",".join(orderBy)
104 else:
105 orderBy = ""
106
107 if computed:
108 subQuery = "SELECT %%hostname, %s FROM [%s]" % (fields, tablePart)
109 if where:
110 subQuery = subQuery + where
Scott Bakerc527fda2014-03-20 17:14:52 -0700111 subQuery = subQuery + groupBySub
112 #subQuery = subQuery + " GROUP BY %s,%%hostname" % timeField
Scott Baker43adf1b2014-03-19 21:54:55 -0700113
114 sumFields = []
115 for fieldName in fieldNames:
116 if fieldName.startswith("avg"):
117 sumFields.append("AVG(%s) as avg_%s"%(fieldName,fieldName))
Scott Bakerc527fda2014-03-20 17:14:52 -0700118 sumFields.append("MAX(%s) as max_%s"%(fieldName,fieldName))
119 elif (fieldName.startswith("count")) or (fieldName.startswith("sum")) or (fieldName.startswith("computed")):
Scott Baker43adf1b2014-03-19 21:54:55 -0700120 sumFields.append("SUM(%s) as sum_%s"%(fieldName,fieldName))
Scott Bakerc527fda2014-03-20 17:14:52 -0700121 else:
122 sumFields.append(fieldName)
Scott Baker43adf1b2014-03-19 21:54:55 -0700123
124 sumFields = ",".join(sumFields)
125
Scott Baker050b1b82014-03-27 09:13:41 -0700126 query = "SELECT %s, %s FROM (%s)" % ("Time", sumFields, subQuery)
Scott Baker43adf1b2014-03-19 21:54:55 -0700127 if groupBy:
128 query = query + groupBy
129 if orderBy:
130 query = query + orderBy
131 else:
132 query = "SELECT %s FROM [%s]" % (fields, tablePart)
133 if where:
134 query = query + " " + where
135 if groupBy:
136 query = query + groupBy
137 if orderBy:
138 query = query + orderBy
139
140 return query
141
142 def get_list_from_req(self, req, name, default=[]):
143 value = req.GET.get(name, None)
144 if not value:
145 return default
Scott Baker050b1b82014-03-27 09:13:41 -0700146 value=value.replace("@","%")
Scott Baker43adf1b2014-03-19 21:54:55 -0700147 return value.split(",")
148
Scott Baker050b1b82014-03-27 09:13:41 -0700149 def format_result(self, format, result, query, dataSourceUrl):
Scott Baker43adf1b2014-03-19 21:54:55 -0700150 if (format == "json_dicts"):
Scott Baker050b1b82014-03-27 09:13:41 -0700151 result = {"query": query, "rows": result, "dataSourceUrl": dataSourceUrl}
Scott Baker43adf1b2014-03-19 21:54:55 -0700152 return ("application/javascript", json.dumps(result))
153
154 elif (format == "json_arrays"):
155 new_result = []
156 for row in result:
157 new_row = []
158 for key in sorted(row.keys()):
159 new_row.append(row[key])
160 new_result.append(new_row)
161 new_result = {"query": query, "rows": new_result}
162 return ("application/javascript", json.dumps(new_result))
163
164 elif (format == "html_table"):
165 new_rows = []
166 for row in result:
167 new_row = []
168 for key in sorted(row.keys()):
169 new_row.append("<TD>%s</TD>" % str(row[key]))
170 new_rows.append("<TR>%s</TR>" % "".join(new_row))
171
172 new_result = "<TABLE>%s</TABLE>" % "\n".join(new_rows)
173
174 return ("text/html", new_result)
175
Scott Bakerc527fda2014-03-20 17:14:52 -0700176 elif (format == "json_hpcdash"):
177 new_rows = {}
178 for row in result:
179 new_row = {"lat": float(row.get("lat", 0)),
180 "long": float(row.get("long", 0)),
181 "health": 0,
182 "numNodes": int(row.get("numNodes",0)),
183 "numHPCSlivers": int(row.get("sum_count_hostname", 0)),
184 "siteUrl": row.get("url", ""),
185 "hot": float(row.get("hotness", 0.0)),
186 "load": int(float(row.get("max_avg_cpu", 0)))}
187 new_rows[row["site"]] = new_row
188 return ("application/javascript", json.dumps(new_rows))
189
190 def only_largest(self, rows, fieldName):
191 """ Given a fieldName, only return the set of rows that had the
192 maximum value of that fieldName.
193 """
194 maxVal = max( [int(row[fieldName]) for row in rows] )
195 new_rows = [row for row in rows if int(row[fieldName])==maxVal]
196 return new_rows
197
198 def merge_datamodel_sites(self, rows):
199 """ For a query that included "site" in its groupby, merge in the
200 opencloud site information.
201 """
202 for row in rows:
203 sitename = row["site"]
204 try:
205 model_site = Site.objects.get(name=sitename)
206 except:
207 # we didn't find it in the data model
208 continue
209
210 row["lat"] = float(model_site.location.latitude)
211 row["long"] = float(model_site.location.longitude)
212 row["url"] = model_site.site_url
213 row["numNodes"] = model_site.nodes.count()
214
215 if "max_avg_cpu" in row:
216 cpu=float(row["max_avg_cpu"])/100.0
217 row["hotness"] = max(0.0, ((cpu*RED_LOAD) - BLUE_LOAD)/(RED_LOAD-BLUE_LOAD))
218
Scott Baker43adf1b2014-03-19 21:54:55 -0700219 def process_request(self, req):
220 print req.GET
221
Scott Baker050b1b82014-03-27 09:13:41 -0700222 tqx = req.GET.get("tqx", None)
Scott Baker43adf1b2014-03-19 21:54:55 -0700223
224 slice = req.GET.get("slice", None)
225 site = req.GET.get("site", None)
226 node = req.GET.get("node", None)
Scott Bakerc527fda2014-03-20 17:14:52 -0700227 service = req.GET.get("service", None)
Scott Baker43adf1b2014-03-19 21:54:55 -0700228
229 format = req.GET.get("format", "json_dicts")
230
Scott Baker050b1b82014-03-27 09:13:41 -0700231 timeField = req.GET.get("timeBucket", "60")
Scott Baker43adf1b2014-03-19 21:54:55 -0700232 avg = self.get_list_from_req(req, "avg")
233 sum = self.get_list_from_req(req, "sum")
234 count = self.get_list_from_req(req, "count")
235 computed = self.get_list_from_req(req, "computed")
Scott Baker050b1b82014-03-27 09:13:41 -0700236 groupBy = self.get_list_from_req(req, "groupBy", ["Time"])
237 orderBy = self.get_list_from_req(req, "orderBy", ["Time"])
Scott Baker43adf1b2014-03-19 21:54:55 -0700238
239 maxRows = req.GET.get("maxRows", None)
Scott Bakerc527fda2014-03-20 17:14:52 -0700240 onlyLargest = req.GET.get("onlyLargest", None)
241 mergeDataModelSites = req.GET.get("mergeDataModelSites", None)
Scott Baker43adf1b2014-03-19 21:54:55 -0700242
Scott Bakerc527fda2014-03-20 17:14:52 -0700243 q = self.compose_query(slice, site, node, service, timeField, avg, sum, count, computed, groupBy, orderBy)
Scott Baker43adf1b2014-03-19 21:54:55 -0700244
245 print q
Scott Baker050b1b82014-03-27 09:13:41 -0700246 dataSourceUrl = "http://" + req.META["SERVER_NAME"] + ":" + req.META["SERVER_PORT"] + req.META["PATH_INFO"] + "?" + req.META["QUERY_STRING"].replace("format=","origFormat=").replace("%","%25") + "&format=charts";
Scott Baker43adf1b2014-03-19 21:54:55 -0700247
Scott Baker050b1b82014-03-27 09:13:41 -0700248 if (format=="dataSourceUrl"):
249 result = {"dataSourceUrl": dataSourceUrl}
250 return ("application/javascript", result)
251
252 elif (format=="raw"):
Scott Baker43adf1b2014-03-19 21:54:55 -0700253 result = self.run_query_raw(q)
Scott Baker050b1b82014-03-27 09:13:41 -0700254 result["dataSourceUrl"] = dataSourceUrl
255
256 result = json.dumps(result);
257
258 return ("application/javascript", result)
259
260 elif (format=="charts"):
261 bq_result = self.run_query_raw(q)
262
263 # cloudscrutiny code is probably better!
264 table = {}
265 table["cols"] = self.schema_to_cols(bq_result["schema"])
266 rows = []
267 for row in bq_result["rows"]:
268 rowcols = []
269 for (colnum,col) in enumerate(row["f"]):
270 if (colnum==0):
271 dt = datetime.datetime.fromtimestamp(float(col["v"]))
272 rowcols.append({"v": 'new Date("%s")' % dt.isoformat()})
273 else:
274 try:
275 rowcols.append({"v": float(col["v"])})
276 except:
277 rowcols.append({"v": col["v"]})
278 rows.append({"c": rowcols})
279 table["rows"] = rows
280
281 if tqx:
282 reqId = tqx.strip("reqId:")
283 else:
284 reqId = "0"
285
286 result = {"status": "okColumnChart", "reqId": reqId, "table": table, "version": "0.6"}
287
288 result = "google.visualization.Query.setResponse(" + json.dumps(result) + ");"
289
290 def unquote_it(x): return x.group()[1:-1].replace('\\"', '"')
291
292 p = re.compile(r'"new Date\(\\"[^"]*\\"\)"')
293 result=p.sub(unquote_it, result)
294
295 return ("application/javascript", result)
296
Scott Baker43adf1b2014-03-19 21:54:55 -0700297 else:
298 result = self.run_query(q)
299
Scott Bakerc527fda2014-03-20 17:14:52 -0700300 if onlyLargest:
301 result = self.only_largest(result, onlyLargest)
302
303 if mergeDataModelSites:
304 self.merge_datamodel_sites(result)
305
Scott Baker43adf1b2014-03-19 21:54:55 -0700306 if maxRows:
307 result = result[-int(maxRows):]
308
Scott Baker050b1b82014-03-27 09:13:41 -0700309 return self.format_result(format, result, q, dataSourceUrl)
Scott Baker43adf1b2014-03-19 21:54:55 -0700310
311
312def DoPlanetStackAnalytics(request):
313 bq = PlanetStackAnalytics()
314 result = bq.process_request(request)
315
316 return result
317
318def main():
319 bq = PlanetStackAnalytics()
320
Scott Baker050b1b82014-03-27 09:13:41 -0700321 q=bq.compose_query(avg=["%cpu","%bandwidth"], count=["%hostname"], slice="HyperCache")
Scott Baker43adf1b2014-03-19 21:54:55 -0700322 print q
323 bq.dump_table(bq.run_query(q))
324
325 q=bq.compose_query(computed=["%bytes_sent/%elapsed"])
326 print
327 print q
328 bq.dump_table(bq.run_query(q))
Scott Baker43adf1b2014-03-19 21:54:55 -0700329
Scott Baker050b1b82014-03-27 09:13:41 -0700330 q=bq.compose_query(timeBucket=60*60, avg=["%cpu"], count=["%hostname"], computed=["%bytes_sent/%elapsed"])
Scott Baker43adf1b2014-03-19 21:54:55 -0700331 print
332 print q
333 bq.dump_table(bq.run_query(q))
Scott Bakerc527fda2014-03-20 17:14:52 -0700334
Scott Baker050b1b82014-03-27 09:13:41 -0700335 q=bq.compose_query(avg=["%cpu"], count=["%hostname"], computed=["%bytes_sent/%elapsed"], service="HPC Service", groupBy=["Time","%site"])
Scott Bakerc527fda2014-03-20 17:14:52 -0700336 print
337 print q
338 result=bq.run_query(q)
Scott Baker050b1b82014-03-27 09:13:41 -0700339 result = bq.only_largest(result, "Time")
Scott Bakerc527fda2014-03-20 17:14:52 -0700340 bq.merge_datamodel_sites(result)
341 #bq.dump_table(result)
342 print bq.format_result("json_hpcdash", result, q)
Scott Baker43adf1b2014-03-19 21:54:55 -0700343
344if __name__ == "__main__":
345 main()
346
347
348
349
350