Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 1 | import re |
| 2 | import base64 |
| 3 | import requests |
| 4 | import urllib |
| 5 | import json |
| 6 | import httplib2 |
| 7 | import threading |
| 8 | import os |
| 9 | import time |
| 10 | import traceback |
| 11 | |
| 12 | from apiclient.discovery import build |
| 13 | from apiclient.errors import HttpError |
| 14 | from oauth2client.client import AccessTokenRefreshError |
| 15 | from oauth2client.client import OAuth2WebServerFlow |
| 16 | from oauth2client.client import flow_from_clientsecrets |
| 17 | from oauth2client.file import Storage |
| 18 | from oauth2client.tools import run_flow,run |
| 19 | |
| 20 | """ |
| 21 | yum -y install python-httplib2 |
| 22 | easy_install python_gflags |
| 23 | easy_install google_api_python_client |
| 24 | """ |
| 25 | |
| 26 | PROJECT_NUMBER = '549187599759' |
| 27 | |
Scott Baker | 78ab101 | 2014-03-19 23:44:39 -0700 | [diff] [blame] | 28 | try: |
| 29 | FLOW = flow_from_clientsecrets('/opt/planetstack/hpc_wizard/client_secrets.json', |
| 30 | scope='https://www.googleapis.com/auth/bigquery') |
| 31 | except: |
| 32 | print "exception while initializing bigquery flow" |
| 33 | traceback.print_exc() |
| 34 | FLOW = None |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 35 | |
| 36 | MINUTE_MS = 60*1000 |
| 37 | HOUR_MS = 60*60*1000 |
| 38 | |
Scott Baker | ba60d82 | 2014-03-27 09:12:28 -0700 | [diff] [blame] | 39 | # global to hold cached mappings |
| 40 | mappings = {} |
| 41 | reverse_mappings = {} |
| 42 | |
| 43 | class MappingException(Exception): |
| 44 | pass |
| 45 | |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 46 | class BigQueryAnalytics: |
| 47 | def __init__(self, table = "demoevents"): |
| 48 | self.projectName = "vicci" |
| 49 | self.tableName = table |
Scott Baker | ba60d82 | 2014-03-27 09:12:28 -0700 | [diff] [blame] | 50 | |
| 51 | def reload_mapping(self): |
| 52 | global mappings, reverse_mappings |
| 53 | mappings[self.tableName] = json.loads(self.fetch_mapping(table=self.tableName)) |
| 54 | reverse_mappings[self.tableName] = {v:k for k, v in mappings[self.tableName].items()} |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 55 | |
| 56 | def fetch_mapping(self, m=0, table="events"): |
| 57 | req = 'http://cloud-scrutiny.appspot.com/command?action=get_allocations&multiplexer=%d&table=%s'% (m,table) |
| 58 | resp = requests.get(req) |
| 59 | if (resp.status_code==200): |
| 60 | return resp.text |
| 61 | else: |
| 62 | raise Exception('Error accessing register allocations: %d'%resp.status_code) |
| 63 | |
| 64 | def run_query_raw(self, query): |
| 65 | p = re.compile('%[a-zA-z_]*') |
Scott Baker | ba60d82 | 2014-03-27 09:12:28 -0700 | [diff] [blame] | 66 | |
| 67 | try: |
| 68 | query = p.sub(self.remap, query) |
| 69 | except MappingException: |
| 70 | self.reload_mapping() |
| 71 | query = p.sub(self.remap, query) |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 72 | |
| 73 | storage = Storage('/opt/planetstack/hpc_wizard/bigquery_credentials.dat') |
| 74 | credentials = storage.get() |
| 75 | |
| 76 | if credentials is None or credentials.invalid: |
| 77 | credentials = run(FLOW, storage) |
| 78 | |
| 79 | http = httplib2.Http() |
| 80 | http = credentials.authorize(http) |
| 81 | |
| 82 | service = build('bigquery', 'v2', http=http) |
| 83 | |
Scott Baker | 0817209 | 2014-03-20 15:07:06 -0700 | [diff] [blame] | 84 | body = {"query": query, |
| 85 | "timeoutMs": 30000} |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 86 | response = service.jobs().query(projectId=PROJECT_NUMBER, body=body).execute() |
| 87 | |
| 88 | return response |
| 89 | |
| 90 | def translate_schema(self, response): |
| 91 | for field in response["schema"]["fields"]: |
Scott Baker | ba60d82 | 2014-03-27 09:12:28 -0700 | [diff] [blame] | 92 | field["name"] = reverse_mappings[self.tableName].get(field["name"], field["name"]) |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 93 | |
| 94 | def run_query(self, query): |
| 95 | response = self.run_query_raw(query) |
| 96 | |
| 97 | fieldNames = [] |
| 98 | for field in response["schema"]["fields"]: |
| 99 | fieldNames.append(field["name"]) |
| 100 | |
| 101 | result = [] |
| 102 | if "rows" in response: |
| 103 | for row in response["rows"]: |
| 104 | this_result = {} |
| 105 | for (i,column) in enumerate(row["f"]): |
Scott Baker | ba60d82 | 2014-03-27 09:12:28 -0700 | [diff] [blame] | 106 | this_result[reverse_mappings[self.tableName].get(fieldNames[i],fieldNames[i])] = column["v"] |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 107 | result.append(this_result) |
| 108 | |
| 109 | return result |
| 110 | |
| 111 | def remap(self, match): |
Scott Baker | ba60d82 | 2014-03-27 09:12:28 -0700 | [diff] [blame] | 112 | if not self.tableName in mappings: |
| 113 | raise MappingException("no mapping for table %s" % self.tableName) |
| 114 | |
| 115 | mapping = mappings[self.tableName] |
| 116 | |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 117 | token = match.group()[1:] |
Scott Baker | ba60d82 | 2014-03-27 09:12:28 -0700 | [diff] [blame] | 118 | if token in mapping: |
| 119 | return mapping[token] |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 120 | else: |
Scott Baker | ba60d82 | 2014-03-27 09:12:28 -0700 | [diff] [blame] | 121 | raise MappingException('unknown token %s' % token) |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 122 | |
| 123 | def dump_table(self, rows, keys=None): |
| 124 | if not keys: |
| 125 | keys = rows[0].keys() |
| 126 | |
| 127 | lens = {} |
| 128 | for key in keys: |
| 129 | lens[key] = len(key) |
| 130 | |
| 131 | for row in rows: |
| 132 | for key in keys: |
| 133 | thislen = len(str(row.get(key,""))) |
| 134 | lens[key] = max(lens.get(key,0), thislen) |
| 135 | |
| 136 | for key in keys: |
| 137 | print "%*s" % (lens[key], key), |
| 138 | print |
| 139 | |
| 140 | for row in rows: |
| 141 | for key in keys: |
| 142 | print "%*s" % (lens[key], str(row.get(key,""))), |
| 143 | print |
| 144 | |
Scott Baker | ba60d82 | 2014-03-27 09:12:28 -0700 | [diff] [blame] | 145 | def schema_to_cols(self, schema): |
| 146 | fields = schema["fields"] |
| 147 | |
| 148 | colTypes = {"STRING": "string", "INTEGER": "number", "FLOAT": "number", "TIMESTAMP": "date"} |
| 149 | |
| 150 | cols = [] |
| 151 | i=0 |
| 152 | for field in fields: |
| 153 | col = {"type": colTypes[field["type"]], |
| 154 | "id": "Col%d" % i, |
| 155 | "label": reverse_mappings[self.tableName].get(field["name"],field["name"])} |
| 156 | cols.append(col) |
| 157 | i=i+1 |
| 158 | |
| 159 | return cols |
| 160 | |
Scott Baker | 43adf1b | 2014-03-19 21:54:55 -0700 | [diff] [blame] | 161 | def main(): |
| 162 | bq = BigQueryAnalytics() |
| 163 | |
| 164 | rows = bq.run_query("select %hostname,SUM(%bytes_sent) from [vicci.demoevents] group by %hostname") |
| 165 | |
| 166 | bq.dump_table(rows) |
| 167 | |
| 168 | if __name__ == "__main__": |
| 169 | main() |