1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.monitoring.v3;
18
19import "google/api/resource.proto";
20import "google/protobuf/duration.proto";
21import "google/type/calendar_period.proto";
22
23option csharp_namespace = "Google.Cloud.Monitoring.V3";
24option go_package = "cloud.google.com/go/monitoring/apiv3/v2/monitoringpb;monitoringpb";
25option java_multiple_files = true;
26option java_outer_classname = "ServiceMonitoringProto";
27option java_package = "com.google.monitoring.v3";
28option php_namespace = "Google\\Cloud\\Monitoring\\V3";
29option ruby_package = "Google::Cloud::Monitoring::V3";
30
31// A `Service` is a discrete, autonomous, and network-accessible unit, designed
32// to solve an individual concern
33// ([Wikipedia](https://en.wikipedia.org/wiki/Service-orientation)). In
34// Cloud Monitoring, a `Service` acts as the root resource under which
35// operational aspects of the service are accessible.
36message Service {
37  option (google.api.resource) = {
38    type: "monitoring.googleapis.com/Service"
39    pattern: "projects/{project}/services/{service}"
40    pattern: "organizations/{organization}/services/{service}"
41    pattern: "folders/{folder}/services/{service}"
42    pattern: "*"
43  };
44
45  // Custom view of service telemetry. Currently a place-holder pending final
46  // design.
47  message Custom {
48
49  }
50
51  // App Engine service. Learn more at https://cloud.google.com/appengine.
52  message AppEngine {
53    // The ID of the App Engine module underlying this service. Corresponds to
54    // the `module_id` resource label in the `gae_app` monitored resource:
55    // https://cloud.google.com/monitoring/api/resources#tag_gae_app
56    string module_id = 1;
57  }
58
59  // Cloud Endpoints service. Learn more at https://cloud.google.com/endpoints.
60  message CloudEndpoints {
61    // The name of the Cloud Endpoints service underlying this service.
62    // Corresponds to the `service` resource label in the `api` monitored
63    // resource: https://cloud.google.com/monitoring/api/resources#tag_api
64    string service = 1;
65  }
66
67  // Istio service scoped to a single Kubernetes cluster. Learn more at
68  // https://istio.io. Clusters running OSS Istio will have their services
69  // ingested as this type.
70  message ClusterIstio {
71    // The location of the Kubernetes cluster in which this Istio service is
72    // defined. Corresponds to the `location` resource label in `k8s_cluster`
73    // resources.
74    string location = 1;
75
76    // The name of the Kubernetes cluster in which this Istio service is
77    // defined. Corresponds to the `cluster_name` resource label in
78    // `k8s_cluster` resources.
79    string cluster_name = 2;
80
81    // The namespace of the Istio service underlying this service. Corresponds
82    // to the `destination_service_namespace` metric label in Istio metrics.
83    string service_namespace = 3;
84
85    // The name of the Istio service underlying this service. Corresponds to the
86    // `destination_service_name` metric label in Istio metrics.
87    string service_name = 4;
88  }
89
90  // Istio service scoped to an Istio mesh. Anthos clusters running ASM < 1.6.8
91  // will have their services ingested as this type.
92  message MeshIstio {
93    // Identifier for the mesh in which this Istio service is defined.
94    // Corresponds to the `mesh_uid` metric label in Istio metrics.
95    string mesh_uid = 1;
96
97    // The namespace of the Istio service underlying this service. Corresponds
98    // to the `destination_service_namespace` metric label in Istio metrics.
99    string service_namespace = 3;
100
101    // The name of the Istio service underlying this service. Corresponds to the
102    // `destination_service_name` metric label in Istio metrics.
103    string service_name = 4;
104  }
105
106  // Canonical service scoped to an Istio mesh. Anthos clusters running ASM >=
107  // 1.6.8 will have their services ingested as this type.
108  message IstioCanonicalService {
109    // Identifier for the Istio mesh in which this canonical service is defined.
110    // Corresponds to the `mesh_uid` metric label in
111    // [Istio metrics](https://cloud.google.com/monitoring/api/metrics_istio).
112    string mesh_uid = 1;
113
114    // The namespace of the canonical service underlying this service.
115    // Corresponds to the `destination_canonical_service_namespace` metric
116    // label in [Istio
117    // metrics](https://cloud.google.com/monitoring/api/metrics_istio).
118    string canonical_service_namespace = 3;
119
120    // The name of the canonical service underlying this service.
121    // Corresponds to the `destination_canonical_service_name` metric label in
122    // label in [Istio
123    // metrics](https://cloud.google.com/monitoring/api/metrics_istio).
124    string canonical_service = 4;
125  }
126
127  // Configuration for how to query telemetry on a Service.
128  message Telemetry {
129    // The full name of the resource that defines this service. Formatted as
130    // described in https://cloud.google.com/apis/design/resource_names.
131    string resource_name = 1;
132  }
133
134  // Resource name for this Service. The format is:
135  //
136  //     projects/[PROJECT_ID_OR_NUMBER]/services/[SERVICE_ID]
137  string name = 1;
138
139  // Name used for UI elements listing this Service.
140  string display_name = 2;
141
142  // REQUIRED. Service-identifying atoms specifying the underlying service.
143  oneof identifier {
144    // Custom service type.
145    Custom custom = 6;
146
147    // Type used for App Engine services.
148    AppEngine app_engine = 7;
149
150    // Type used for Cloud Endpoints services.
151    CloudEndpoints cloud_endpoints = 8;
152
153    // Type used for Istio services that live in a Kubernetes cluster.
154    ClusterIstio cluster_istio = 9;
155
156    // Type used for Istio services scoped to an Istio mesh.
157    MeshIstio mesh_istio = 10;
158
159    // Type used for canonical services scoped to an Istio mesh.
160    // Metrics for Istio are
161    // [documented here](https://istio.io/latest/docs/reference/config/metrics/)
162    IstioCanonicalService istio_canonical_service = 11;
163  }
164
165  // Configuration for how to query telemetry on a Service.
166  Telemetry telemetry = 13;
167
168  // Labels which have been used to annotate the service. Label keys must start
169  // with a letter. Label keys and values may contain lowercase letters,
170  // numbers, underscores, and dashes. Label keys and values have a maximum
171  // length of 63 characters, and must be less than 128 bytes in size. Up to 64
172  // label entries may be stored. For labels which do not have a semantic value,
173  // the empty string may be supplied for the label value.
174  map<string, string> user_labels = 14;
175}
176
177// A Service-Level Objective (SLO) describes a level of desired good service. It
178// consists of a service-level indicator (SLI), a performance goal, and a period
179// over which the objective is to be evaluated against that goal. The SLO can
180// use SLIs defined in a number of different manners. Typical SLOs might include
181// "99% of requests in each rolling week have latency below 200 milliseconds" or
182// "99.5% of requests in each calendar month return successfully."
183message ServiceLevelObjective {
184  option (google.api.resource) = {
185    type: "monitoring.googleapis.com/ServiceLevelObjective"
186    pattern: "projects/{project}/services/{service}/serviceLevelObjectives/{service_level_objective}"
187    pattern: "organizations/{organization}/services/{service}/serviceLevelObjectives/{service_level_objective}"
188    pattern: "folders/{folder}/services/{service}/serviceLevelObjectives/{service_level_objective}"
189    pattern: "*"
190    history: ORIGINALLY_SINGLE_PATTERN
191  };
192
193  // `ServiceLevelObjective.View` determines what form of
194  // `ServiceLevelObjective` is returned from `GetServiceLevelObjective`,
195  // `ListServiceLevelObjectives`, and `ListServiceLevelObjectiveVersions` RPCs.
196  enum View {
197    // Same as FULL.
198    VIEW_UNSPECIFIED = 0;
199
200    // Return the embedded `ServiceLevelIndicator` in the form in which it was
201    // defined. If it was defined using a `BasicSli`, return that `BasicSli`.
202    FULL = 2;
203
204    // For `ServiceLevelIndicator`s using `BasicSli` articulation, instead
205    // return the `ServiceLevelIndicator` with its mode of computation fully
206    // spelled out as a `RequestBasedSli`. For `ServiceLevelIndicator`s using
207    // `RequestBasedSli` or `WindowsBasedSli`, return the
208    // `ServiceLevelIndicator` as it was provided.
209    EXPLICIT = 1;
210  }
211
212  // Resource name for this `ServiceLevelObjective`. The format is:
213  //
214  //     projects/[PROJECT_ID_OR_NUMBER]/services/[SERVICE_ID]/serviceLevelObjectives/[SLO_NAME]
215  string name = 1;
216
217  // Name used for UI elements listing this SLO.
218  string display_name = 11;
219
220  // The definition of good service, used to measure and calculate the quality
221  // of the `Service`'s performance with respect to a single aspect of service
222  // quality.
223  ServiceLevelIndicator service_level_indicator = 3;
224
225  // The fraction of service that must be good in order for this objective to be
226  // met. `0 < goal <= 0.999`.
227  double goal = 4;
228
229  // The time period over which the objective will be evaluated.
230  oneof period {
231    // A rolling time period, semantically "in the past `<rolling_period>`".
232    // Must be an integer multiple of 1 day no larger than 30 days.
233    google.protobuf.Duration rolling_period = 5;
234
235    // A calendar period, semantically "since the start of the current
236    // `<calendar_period>`". At this time, only `DAY`, `WEEK`, `FORTNIGHT`, and
237    // `MONTH` are supported.
238    google.type.CalendarPeriod calendar_period = 6;
239  }
240
241  // Labels which have been used to annotate the service-level objective. Label
242  // keys must start with a letter. Label keys and values may contain lowercase
243  // letters, numbers, underscores, and dashes. Label keys and values have a
244  // maximum length of 63 characters, and must be less than 128 bytes in size.
245  // Up to 64 label entries may be stored. For labels which do not have a
246  // semantic value, the empty string may be supplied for the label value.
247  map<string, string> user_labels = 12;
248}
249
250// A Service-Level Indicator (SLI) describes the "performance" of a service. For
251// some services, the SLI is well-defined. In such cases, the SLI can be
252// described easily by referencing the well-known SLI and providing the needed
253// parameters. Alternatively, a "custom" SLI can be defined with a query to the
254// underlying metric store. An SLI is defined to be `good_service /
255// total_service` over any queried time interval. The value of performance
256// always falls into the range `0 <= performance <= 1`. A custom SLI describes
257// how to compute this ratio, whether this is by dividing values from a pair of
258// time series, cutting a `Distribution` into good and bad counts, or counting
259// time windows in which the service complies with a criterion. For separation
260// of concerns, a single Service-Level Indicator measures performance for only
261// one aspect of service quality, such as fraction of successful queries or
262// fast-enough queries.
263message ServiceLevelIndicator {
264  // Service level indicators can be grouped by whether the "unit" of service
265  // being measured is based on counts of good requests or on counts of good
266  // time windows
267  oneof type {
268    // Basic SLI on a well-known service type.
269    BasicSli basic_sli = 4;
270
271    // Request-based SLIs
272    RequestBasedSli request_based = 1;
273
274    // Windows-based SLIs
275    WindowsBasedSli windows_based = 2;
276  }
277}
278
279// An SLI measuring performance on a well-known service type. Performance will
280// be computed on the basis of pre-defined metrics. The type of the
281// `service_resource` determines the metrics to use and the
282// `service_resource.labels` and `metric_labels` are used to construct a
283// monitoring filter to filter that metric down to just the data relevant to
284// this service.
285message BasicSli {
286  // Future parameters for the availability SLI.
287  message AvailabilityCriteria {
288
289  }
290
291  // Parameters for a latency threshold SLI.
292  message LatencyCriteria {
293    // Good service is defined to be the count of requests made to this service
294    // that return in no more than `threshold`.
295    google.protobuf.Duration threshold = 3;
296  }
297
298  // OPTIONAL: The set of RPCs to which this SLI is relevant. Telemetry from
299  // other methods will not be used to calculate performance for this SLI. If
300  // omitted, this SLI applies to all the Service's methods. For service types
301  // that don't support breaking down by method, setting this field will result
302  // in an error.
303  repeated string method = 7;
304
305  // OPTIONAL: The set of locations to which this SLI is relevant. Telemetry
306  // from other locations will not be used to calculate performance for this
307  // SLI. If omitted, this SLI applies to all locations in which the Service has
308  // activity. For service types that don't support breaking down by location,
309  // setting this field will result in an error.
310  repeated string location = 8;
311
312  // OPTIONAL: The set of API versions to which this SLI is relevant. Telemetry
313  // from other API versions will not be used to calculate performance for this
314  // SLI. If omitted, this SLI applies to all API versions. For service types
315  // that don't support breaking down by version, setting this field will result
316  // in an error.
317  repeated string version = 9;
318
319  // This SLI can be evaluated on the basis of availability or latency.
320  oneof sli_criteria {
321    // Good service is defined to be the count of requests made to this service
322    // that return successfully.
323    AvailabilityCriteria availability = 2;
324
325    // Good service is defined to be the count of requests made to this service
326    // that are fast enough with respect to `latency.threshold`.
327    LatencyCriteria latency = 3;
328  }
329}
330
331// Range of numerical values within `min` and `max`.
332message Range {
333  // Range minimum.
334  double min = 1;
335
336  // Range maximum.
337  double max = 2;
338}
339
340// Service Level Indicators for which atomic units of service are counted
341// directly.
342message RequestBasedSli {
343  // The means to compute a ratio of `good_service` to `total_service`.
344  oneof method {
345    // `good_total_ratio` is used when the ratio of `good_service` to
346    // `total_service` is computed from two `TimeSeries`.
347    TimeSeriesRatio good_total_ratio = 1;
348
349    // `distribution_cut` is used when `good_service` is a count of values
350    // aggregated in a `Distribution` that fall into a good range. The
351    // `total_service` is the total count of all values aggregated in the
352    // `Distribution`.
353    DistributionCut distribution_cut = 3;
354  }
355}
356
357// A `TimeSeriesRatio` specifies two `TimeSeries` to use for computing the
358// `good_service / total_service` ratio. The specified `TimeSeries` must have
359// `ValueType = DOUBLE` or `ValueType = INT64` and must have `MetricKind =
360// DELTA` or `MetricKind = CUMULATIVE`. The `TimeSeriesRatio` must specify
361// exactly two of good, bad, and total, and the relationship `good_service +
362// bad_service = total_service` will be assumed.
363message TimeSeriesRatio {
364  // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
365  // specifying a `TimeSeries` quantifying good service provided. Must have
366  // `ValueType = DOUBLE` or `ValueType = INT64` and must have `MetricKind =
367  // DELTA` or `MetricKind = CUMULATIVE`.
368  string good_service_filter = 4;
369
370  // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
371  // specifying a `TimeSeries` quantifying bad service, either demanded service
372  // that was not provided or demanded service that was of inadequate quality.
373  // Must have `ValueType = DOUBLE` or `ValueType = INT64` and must have
374  // `MetricKind = DELTA` or `MetricKind = CUMULATIVE`.
375  string bad_service_filter = 5;
376
377  // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
378  // specifying a `TimeSeries` quantifying total demanded service. Must have
379  // `ValueType = DOUBLE` or `ValueType = INT64` and must have `MetricKind =
380  // DELTA` or `MetricKind = CUMULATIVE`.
381  string total_service_filter = 6;
382}
383
384// A `DistributionCut` defines a `TimeSeries` and thresholds used for measuring
385// good service and total service. The `TimeSeries` must have `ValueType =
386// DISTRIBUTION` and `MetricKind = DELTA` or `MetricKind = CUMULATIVE`. The
387// computed `good_service` will be the estimated count of values in the
388// `Distribution` that fall within the specified `min` and `max`.
389message DistributionCut {
390  // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
391  // specifying a `TimeSeries` aggregating values. Must have `ValueType =
392  // DISTRIBUTION` and `MetricKind = DELTA` or `MetricKind = CUMULATIVE`.
393  string distribution_filter = 4;
394
395  // Range of values considered "good." For a one-sided range, set one bound to
396  // an infinite value.
397  Range range = 5;
398}
399
400// A `WindowsBasedSli` defines `good_service` as the count of time windows for
401// which the provided service was of good quality. Criteria for determining
402// if service was good are embedded in the `window_criterion`.
403message WindowsBasedSli {
404  // A `PerformanceThreshold` is used when each window is good when that window
405  // has a sufficiently high `performance`.
406  message PerformanceThreshold {
407    // The means, either a request-based SLI or a basic SLI, by which to compute
408    // performance over a window.
409    oneof type {
410      // `RequestBasedSli` to evaluate to judge window quality.
411      RequestBasedSli performance = 1;
412
413      // `BasicSli` to evaluate to judge window quality.
414      BasicSli basic_sli_performance = 3;
415    }
416
417    // If window `performance >= threshold`, the window is counted as good.
418    double threshold = 2;
419  }
420
421  // A `MetricRange` is used when each window is good when the value x of a
422  // single `TimeSeries` satisfies `range.min <= x <= range.max`. The provided
423  // `TimeSeries` must have `ValueType = INT64` or `ValueType = DOUBLE` and
424  // `MetricKind = GAUGE`.
425  message MetricRange {
426    // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
427    // specifying the `TimeSeries` to use for evaluating window quality.
428    string time_series = 1;
429
430    // Range of values considered "good." For a one-sided range, set one bound
431    // to an infinite value.
432    Range range = 4;
433  }
434
435  // The criterion to use for evaluating window goodness.
436  oneof window_criterion {
437    // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
438    // specifying a `TimeSeries` with `ValueType = BOOL`. The window is good if
439    // any `true` values appear in the window.
440    string good_bad_metric_filter = 5;
441
442    // A window is good if its `performance` is high enough.
443    PerformanceThreshold good_total_ratio_threshold = 2;
444
445    // A window is good if the metric's value is in a good range, averaged
446    // across returned streams.
447    MetricRange metric_mean_in_range = 6;
448
449    // A window is good if the metric's value is in a good range, summed across
450    // returned streams.
451    MetricRange metric_sum_in_range = 7;
452  }
453
454  // Duration over which window quality is evaluated. Must be an integer
455  // fraction of a day and at least `60s`.
456  google.protobuf.Duration window_period = 4;
457}
458