1# Copyright 2015 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import logging 7import os 8import socket 9import sys 10import re 11 12import requests 13 14from infra_libs.ts_mon.common import interface 15from infra_libs.ts_mon.common import monitors 16from infra_libs.ts_mon.common import standard_metrics 17from infra_libs.ts_mon.common import targets 18 19 20def load_machine_config(filename): 21 if not os.path.exists(filename): 22 logging.info('Configuration file does not exist, ignoring: %s', filename) 23 return {} 24 25 try: 26 with open(filename) as fh: 27 return json.load(fh) 28 except Exception: 29 logging.error('Configuration file couldn\'t be read: %s', filename) 30 raise 31 32 33def _default_region(fqdn): 34 # Check if we're running in a GCE instance. 35 try: 36 r = requests.get( 37 'http://metadata.google.internal/computeMetadata/v1/instance/zone', 38 headers={'Metadata-Flavor': 'Google'}, 39 timeout=1.0) 40 except requests.exceptions.RequestException: 41 pass 42 else: 43 if r.status_code == requests.codes.ok: 44 # The zone is the last slash-separated component. 45 return r.text.split('/')[-1] 46 47 try: 48 return fqdn.split('.')[1] # [chrome|golo] 49 except IndexError: 50 return '' 51 52 53def _default_network(host): 54 try: 55 # Regular expression that matches the vast majority of our host names. 56 # Matches everything of the form 'masterN', 'masterNa', and 'foo-xN'. 57 return re.match(r'^([\w-]*?-[acm]|master)(\d+)a?$', host).group(2) # N 58 except AttributeError: 59 return '' 60 61 62def add_argparse_options(parser): 63 """Add monitoring related flags to a process' argument parser. 64 65 Args: 66 parser (argparse.ArgumentParser): the parser for the main process. 67 """ 68 if sys.platform == 'win32': # pragma: no cover 69 default_config_file = 'C:\\chrome-infra\\ts-mon.json' 70 else: # pragma: no cover 71 default_config_file = '/etc/chrome-infra/ts-mon.json' 72 73 parser = parser.add_argument_group('Timeseries Monitoring Options') 74 parser.add_argument( 75 '--ts-mon-config-file', 76 default=default_config_file, 77 help='path to a JSON config file that contains suitable values for ' 78 '"endpoint" and "credentials" for this machine. This config file is ' 79 'intended to be shared by all processes on the machine, as the ' 80 'values depend on the machine\'s position in the network, IP ' 81 'whitelisting and deployment of credentials. (default: %(default)s)') 82 parser.add_argument( 83 '--ts-mon-endpoint', 84 help='url (file:// or https://) to post monitoring metrics to. If set, ' 85 'overrides the value in --ts-mon-config-file') 86 parser.add_argument( 87 '--ts-mon-credentials', 88 help='path to a pkcs8 json credential file. If set, overrides the value ' 89 'in --ts-mon-config-file') 90 parser.add_argument( 91 '--ts-mon-ca-certs', 92 help='path to file containing root CA certificates for SSL server ' 93 'certificate validation. If not set, a CA cert file bundled with ' 94 'httplib2 is used.') 95 parser.add_argument( 96 '--ts-mon-flush', 97 choices=('manual', 'auto'), default='auto', 98 help=('metric push behavior: manual (only send when flush() is called), ' 99 'or auto (send automatically every --ts-mon-flush-interval-secs ' 100 'seconds). (default: %(default)s)')) 101 parser.add_argument( 102 '--ts-mon-flush-interval-secs', 103 type=int, 104 default=60, 105 help=('automatically push metrics on this interval if ' 106 '--ts-mon-flush=auto.')) 107 parser.add_argument( 108 '--ts-mon-autogen-hostname', 109 action="store_true", 110 help=('Indicate that the hostname is autogenerated. ' 111 'This option must be set on autoscaled GCE VMs, Kubernetes pods, ' 112 'or any other hosts with dynamically generated names.')) 113 114 parser.add_argument( 115 '--ts-mon-target-type', 116 choices=('device', 'task'), 117 default='device', 118 help='the type of target that is being monitored ("device" or "task").' 119 ' (default: %(default)s)') 120 121 fqdn = socket.getfqdn().lower() # foo-[a|m]N.[chrome|golo].chromium.org 122 host = fqdn.split('.')[0] # foo-[a|m]N 123 region = _default_region(fqdn) 124 network = _default_network(host) 125 126 parser.add_argument( 127 '--ts-mon-device-hostname', 128 default=host, 129 help='name of this device, (default: %(default)s)') 130 parser.add_argument( 131 '--ts-mon-device-region', 132 default=region, 133 help='name of the region this devices lives in. (default: %(default)s)') 134 parser.add_argument( 135 '--ts-mon-device-role', 136 default='default', 137 help='Role of the device. (default: %(default)s)') 138 parser.add_argument( 139 '--ts-mon-device-network', 140 default=network, 141 help='name of the network this device is connected to. ' 142 '(default: %(default)s)') 143 144 parser.add_argument( 145 '--ts-mon-task-service-name', 146 help='name of the service being monitored') 147 parser.add_argument( 148 '--ts-mon-task-job-name', 149 help='name of this job instance of the task') 150 parser.add_argument( 151 '--ts-mon-task-region', 152 default=region, 153 help='name of the region in which this task is running ' 154 '(default: %(default)s)') 155 parser.add_argument( 156 '--ts-mon-task-hostname', 157 default=host, 158 help='name of the host on which this task is running ' 159 '(default: %(default)s)') 160 parser.add_argument( 161 '--ts-mon-task-number', type=int, default=0, 162 help='number (e.g. for replication) of this instance of this task ' 163 '(default: %(default)s)') 164 165 parser.add_argument( 166 '--ts-mon-metric-name-prefix', 167 default='/chrome/infra/', 168 help='metric name prefix for all metrics (default: %(default)s)') 169 170 parser.add_argument( 171 '--ts-mon-use-new-proto', 172 default=True, action='store_true', 173 help='deprecated and ignored') 174 175 176def process_argparse_options(args): 177 """Process command line arguments to initialize the global monitor. 178 179 Also initializes the default target. 180 181 Starts a background thread to automatically flush monitoring metrics if not 182 disabled by command line arguments. 183 184 Args: 185 args (argparse.Namespace): the result of parsing the command line arguments 186 """ 187 # Parse the config file if it exists. 188 config = load_machine_config(args.ts_mon_config_file) 189 endpoint = config.get('endpoint', '') 190 credentials = config.get('credentials', '') 191 autogen_hostname = config.get('autogen_hostname', False) 192 193 # Command-line args override the values in the config file. 194 if args.ts_mon_endpoint is not None: 195 endpoint = args.ts_mon_endpoint 196 if args.ts_mon_credentials is not None: 197 credentials = args.ts_mon_credentials 198 199 if args.ts_mon_target_type == 'device': 200 hostname = args.ts_mon_device_hostname 201 if args.ts_mon_autogen_hostname or autogen_hostname: 202 hostname = 'autogen:' + hostname 203 interface.state.target = targets.DeviceTarget( 204 args.ts_mon_device_region, 205 args.ts_mon_device_role, 206 args.ts_mon_device_network, 207 hostname) 208 if args.ts_mon_target_type == 'task': 209 # Reimplement ArgumentParser.error, since we don't have access to the parser 210 if not args.ts_mon_task_service_name: 211 print >> sys.stderr, ('Argument --ts-mon-task-service-name must be ' 212 'provided when the target type is "task".') 213 sys.exit(2) 214 if not args.ts_mon_task_job_name: 215 print >> sys.stderr, ('Argument --ts-mon-task-job-name must be provided ' 216 'when the target type is "task".') 217 sys.exit(2) 218 hostname = args.ts_mon_task_hostname 219 if args.ts_mon_autogen_hostname or autogen_hostname: 220 hostname = 'autogen:' + hostname 221 interface.state.target = targets.TaskTarget( 222 args.ts_mon_task_service_name, 223 args.ts_mon_task_job_name, 224 args.ts_mon_task_region, 225 hostname, 226 args.ts_mon_task_number) 227 228 interface.state.metric_name_prefix = args.ts_mon_metric_name_prefix 229 interface.state.global_monitor = monitors.NullMonitor() 230 231 if endpoint.startswith('file://'): 232 interface.state.global_monitor = monitors.DebugMonitor( 233 endpoint[len('file://'):]) 234 elif endpoint.startswith('https://'): 235 interface.state.global_monitor = monitors.HttpsMonitor( 236 endpoint, monitors.CredentialFactory.from_string(credentials), 237 ca_certs=args.ts_mon_ca_certs) 238 elif endpoint.lower() == 'none' or not endpoint: 239 logging.info('ts_mon monitoring has been explicitly disabled') 240 else: 241 logging.error('ts_mon monitoring is disabled because the endpoint provided' 242 ' is invalid or not supported: %s', endpoint) 243 244 interface.state.flush_mode = args.ts_mon_flush 245 246 if args.ts_mon_flush == 'auto': 247 interface.state.flush_thread = interface._FlushThread( 248 args.ts_mon_flush_interval_secs) 249 interface.state.flush_thread.start() 250 251 standard_metrics.init() 252