/* * Copyright 2020 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ syntax = "proto3"; package fcp.client; option java_package = "com.google.intelligence.fcp.client"; option java_multiple_files = true; /** * Enumerations of timer and counter identifiers. * * For monitoring, certain timers and counters are logged as integer histograms. * This allows for computing aggregate histograms on the cloud and determine * distributions of latencies for blocks of code, resource usage etc. */ enum HistogramCounters { HISTOGRAM_COUNTER_UNDEFINED = 0; /** * How long it takes to run a plan on device, excluding downloading the plan * and reporting results. */ TRAINING_RUN_PHASE_LATENCY = 1; /** * The end time of running training for a whole plan, excluding downloading * the plan and reporting results, relative to the start of the training * session. */ TRAINING_RUN_PHASE_END_TIME = 2; /** How long running a "restore state op" takes. */ TRAINING_RESTORE_STATE_LATENCY = 3; /** * How long it takes to run training for a whole client execution (which may * involve running multiple epochs). This includes connecting and fetching * example from the example store, as well as training over them. */ TRAINING_RUN_CLIENT_EXECUTION_LATENCY = 4; /** How long running an "init op" takes. */ TRAINING_INIT_OP_LATENCY = 5; /** How long running a "before op" takes. */ TRAINING_BEFORE_OP_LATENCY = 6; /** How long running an "after op" takes. */ TRAINING_AFTER_OP_LATENCY = 7; /** * How long it takes to run training for a whole epoch. This includes * connecting and fetching example from the example store, as well as training * over them. */ TRAINING_RUN_EPOCH_LATENCY = 8; /** * How long it takes to gather enough examples for a mini batch. * This counter may be an average across minibatches and epochs. */ TRAINING_GATHER_MINI_BATCH_LATENCY = 9; /** * How long it takes to run training on a mini batch. * This counter may be an average across minibatches and epochs. */ TRAINING_RUN_MINI_BATCH_LATENCY = 10; /** * How long it takes the TensorFlow session to terminate after it's been * interrupted. */ TRAINING_INTERRUPT_TERMINATION_LATENCY = 11; /** How long it takes to commit the opstats message to the database. */ TRAINING_OPSTATS_COMMIT_LATENCY = 12; /** The number of examples encountered during overall training, across all * client executions. */ TRAINING_OVERALL_EXAMPLE_COUNT = 100001; /** * The sum of the size (in bytes) of all the examples encountered during * overall training, across all client executions. */ TRAINING_OVERALL_EXAMPLE_SIZE = 100002; /** * The number of examples encountered in a client execution, across all * epochs. */ TRAINING_CLIENT_EXECUTION_EXAMPLE_COUNT = 100003; /** * The sum of the size (in bytes) of all the examples encountered in a client * execution, across all epoch. */ TRAINING_CLIENT_EXECUTION_EXAMPLE_SIZE = 100004; /** * The number of examples encountered in an epoch. * This counter may be an average from multiple epochs. */ TRAINING_EPOCH_EXAMPLE_COUNT = 100005; /** * The sum of the size (in bytes) of all the examples encountered in an * epoch. This counter may be an average from multiple epochs */ TRAINING_EPOCH_EXAMPLE_SIZE = 100006; /** * The number of examples in a mini batch. * This counter may be an average from multiple minibatches. */ TRAINING_MINI_BATCH_EXAMPLE_COUNT = 100007; /** * The sum of the size (in bytes) of all the examples in a mini batch. * This counter may be an average from multiple minibatches. */ TRAINING_MINI_BATCH_EXAMPLE_SIZE = 100008; /** * The size (in bytes) of the OpStatsDb file. */ OPSTATS_DB_SIZE_BYTES = 100009; /** * The number of entries in OpStatsDb. */ OPSTATS_DB_NUM_ENTRIES = 100010; /** * The number of entries pruned from OpStatsDb due to exceeding max size. */ OPSTATS_NUM_PRUNED_ENTRIES = 100011; /** * The tenure (in hours) of the oldest entry which has been pruned from the * OpStatsDb due to exceeding max size. */ OPSTATS_OLDEST_PRUNED_ENTRY_TENURE_HOURS = 100012; /** How long checking in/downloading a plan takes (for FL plans only). */ TRAINING_FL_CHECKIN_LATENCY = 200001; /** * The end time of reporting results to the server, relative to the start * of the training session. */ TRAINING_FL_REPORT_RESULTS_END_TIME = 200002; /** How long reporting results to the server takes. */ TRAINING_FL_REPORT_RESULTS_LATENCY = 200003; /** The end time of checking in/downloading a plan from the server, relative to the start of the training session. */ TRAINING_FL_CHECKIN_END_TIME = 200004; /** How long reporting results to the server takes. */ TRAINING_FL_ELIGIBILITY_EVAL_CHECKIN_LATENCY = 200005; }