1// Copyright 2021 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.monitoring.v3; 18 19import "google/api/resource.proto"; 20import "google/protobuf/duration.proto"; 21import "google/type/calendar_period.proto"; 22 23option csharp_namespace = "Google.Cloud.Monitoring.V3"; 24option go_package = "cloud.google.com/go/monitoring/apiv3/v2/monitoringpb;monitoringpb"; 25option java_multiple_files = true; 26option java_outer_classname = "ServiceMonitoringProto"; 27option java_package = "com.google.monitoring.v3"; 28option php_namespace = "Google\\Cloud\\Monitoring\\V3"; 29option ruby_package = "Google::Cloud::Monitoring::V3"; 30 31// A `Service` is a discrete, autonomous, and network-accessible unit, designed 32// to solve an individual concern 33// ([Wikipedia](https://en.wikipedia.org/wiki/Service-orientation)). In 34// Cloud Monitoring, a `Service` acts as the root resource under which 35// operational aspects of the service are accessible. 36message Service { 37 option (google.api.resource) = { 38 type: "monitoring.googleapis.com/Service" 39 pattern: "projects/{project}/services/{service}" 40 pattern: "organizations/{organization}/services/{service}" 41 pattern: "folders/{folder}/services/{service}" 42 pattern: "*" 43 }; 44 45 // Custom view of service telemetry. Currently a place-holder pending final 46 // design. 47 message Custom { 48 49 } 50 51 // App Engine service. Learn more at https://cloud.google.com/appengine. 52 message AppEngine { 53 // The ID of the App Engine module underlying this service. Corresponds to 54 // the `module_id` resource label in the `gae_app` monitored resource: 55 // https://cloud.google.com/monitoring/api/resources#tag_gae_app 56 string module_id = 1; 57 } 58 59 // Cloud Endpoints service. Learn more at https://cloud.google.com/endpoints. 60 message CloudEndpoints { 61 // The name of the Cloud Endpoints service underlying this service. 62 // Corresponds to the `service` resource label in the `api` monitored 63 // resource: https://cloud.google.com/monitoring/api/resources#tag_api 64 string service = 1; 65 } 66 67 // Istio service scoped to a single Kubernetes cluster. Learn more at 68 // https://istio.io. Clusters running OSS Istio will have their services 69 // ingested as this type. 70 message ClusterIstio { 71 // The location of the Kubernetes cluster in which this Istio service is 72 // defined. Corresponds to the `location` resource label in `k8s_cluster` 73 // resources. 74 string location = 1; 75 76 // The name of the Kubernetes cluster in which this Istio service is 77 // defined. Corresponds to the `cluster_name` resource label in 78 // `k8s_cluster` resources. 79 string cluster_name = 2; 80 81 // The namespace of the Istio service underlying this service. Corresponds 82 // to the `destination_service_namespace` metric label in Istio metrics. 83 string service_namespace = 3; 84 85 // The name of the Istio service underlying this service. Corresponds to the 86 // `destination_service_name` metric label in Istio metrics. 87 string service_name = 4; 88 } 89 90 // Istio service scoped to an Istio mesh. Anthos clusters running ASM < 1.6.8 91 // will have their services ingested as this type. 92 message MeshIstio { 93 // Identifier for the mesh in which this Istio service is defined. 94 // Corresponds to the `mesh_uid` metric label in Istio metrics. 95 string mesh_uid = 1; 96 97 // The namespace of the Istio service underlying this service. Corresponds 98 // to the `destination_service_namespace` metric label in Istio metrics. 99 string service_namespace = 3; 100 101 // The name of the Istio service underlying this service. Corresponds to the 102 // `destination_service_name` metric label in Istio metrics. 103 string service_name = 4; 104 } 105 106 // Canonical service scoped to an Istio mesh. Anthos clusters running ASM >= 107 // 1.6.8 will have their services ingested as this type. 108 message IstioCanonicalService { 109 // Identifier for the Istio mesh in which this canonical service is defined. 110 // Corresponds to the `mesh_uid` metric label in 111 // [Istio metrics](https://cloud.google.com/monitoring/api/metrics_istio). 112 string mesh_uid = 1; 113 114 // The namespace of the canonical service underlying this service. 115 // Corresponds to the `destination_canonical_service_namespace` metric 116 // label in [Istio 117 // metrics](https://cloud.google.com/monitoring/api/metrics_istio). 118 string canonical_service_namespace = 3; 119 120 // The name of the canonical service underlying this service. 121 // Corresponds to the `destination_canonical_service_name` metric label in 122 // label in [Istio 123 // metrics](https://cloud.google.com/monitoring/api/metrics_istio). 124 string canonical_service = 4; 125 } 126 127 // Configuration for how to query telemetry on a Service. 128 message Telemetry { 129 // The full name of the resource that defines this service. Formatted as 130 // described in https://cloud.google.com/apis/design/resource_names. 131 string resource_name = 1; 132 } 133 134 // Resource name for this Service. The format is: 135 // 136 // projects/[PROJECT_ID_OR_NUMBER]/services/[SERVICE_ID] 137 string name = 1; 138 139 // Name used for UI elements listing this Service. 140 string display_name = 2; 141 142 // REQUIRED. Service-identifying atoms specifying the underlying service. 143 oneof identifier { 144 // Custom service type. 145 Custom custom = 6; 146 147 // Type used for App Engine services. 148 AppEngine app_engine = 7; 149 150 // Type used for Cloud Endpoints services. 151 CloudEndpoints cloud_endpoints = 8; 152 153 // Type used for Istio services that live in a Kubernetes cluster. 154 ClusterIstio cluster_istio = 9; 155 156 // Type used for Istio services scoped to an Istio mesh. 157 MeshIstio mesh_istio = 10; 158 159 // Type used for canonical services scoped to an Istio mesh. 160 // Metrics for Istio are 161 // [documented here](https://istio.io/latest/docs/reference/config/metrics/) 162 IstioCanonicalService istio_canonical_service = 11; 163 } 164 165 // Configuration for how to query telemetry on a Service. 166 Telemetry telemetry = 13; 167 168 // Labels which have been used to annotate the service. Label keys must start 169 // with a letter. Label keys and values may contain lowercase letters, 170 // numbers, underscores, and dashes. Label keys and values have a maximum 171 // length of 63 characters, and must be less than 128 bytes in size. Up to 64 172 // label entries may be stored. For labels which do not have a semantic value, 173 // the empty string may be supplied for the label value. 174 map<string, string> user_labels = 14; 175} 176 177// A Service-Level Objective (SLO) describes a level of desired good service. It 178// consists of a service-level indicator (SLI), a performance goal, and a period 179// over which the objective is to be evaluated against that goal. The SLO can 180// use SLIs defined in a number of different manners. Typical SLOs might include 181// "99% of requests in each rolling week have latency below 200 milliseconds" or 182// "99.5% of requests in each calendar month return successfully." 183message ServiceLevelObjective { 184 option (google.api.resource) = { 185 type: "monitoring.googleapis.com/ServiceLevelObjective" 186 pattern: "projects/{project}/services/{service}/serviceLevelObjectives/{service_level_objective}" 187 pattern: "organizations/{organization}/services/{service}/serviceLevelObjectives/{service_level_objective}" 188 pattern: "folders/{folder}/services/{service}/serviceLevelObjectives/{service_level_objective}" 189 pattern: "*" 190 history: ORIGINALLY_SINGLE_PATTERN 191 }; 192 193 // `ServiceLevelObjective.View` determines what form of 194 // `ServiceLevelObjective` is returned from `GetServiceLevelObjective`, 195 // `ListServiceLevelObjectives`, and `ListServiceLevelObjectiveVersions` RPCs. 196 enum View { 197 // Same as FULL. 198 VIEW_UNSPECIFIED = 0; 199 200 // Return the embedded `ServiceLevelIndicator` in the form in which it was 201 // defined. If it was defined using a `BasicSli`, return that `BasicSli`. 202 FULL = 2; 203 204 // For `ServiceLevelIndicator`s using `BasicSli` articulation, instead 205 // return the `ServiceLevelIndicator` with its mode of computation fully 206 // spelled out as a `RequestBasedSli`. For `ServiceLevelIndicator`s using 207 // `RequestBasedSli` or `WindowsBasedSli`, return the 208 // `ServiceLevelIndicator` as it was provided. 209 EXPLICIT = 1; 210 } 211 212 // Resource name for this `ServiceLevelObjective`. The format is: 213 // 214 // projects/[PROJECT_ID_OR_NUMBER]/services/[SERVICE_ID]/serviceLevelObjectives/[SLO_NAME] 215 string name = 1; 216 217 // Name used for UI elements listing this SLO. 218 string display_name = 11; 219 220 // The definition of good service, used to measure and calculate the quality 221 // of the `Service`'s performance with respect to a single aspect of service 222 // quality. 223 ServiceLevelIndicator service_level_indicator = 3; 224 225 // The fraction of service that must be good in order for this objective to be 226 // met. `0 < goal <= 0.999`. 227 double goal = 4; 228 229 // The time period over which the objective will be evaluated. 230 oneof period { 231 // A rolling time period, semantically "in the past `<rolling_period>`". 232 // Must be an integer multiple of 1 day no larger than 30 days. 233 google.protobuf.Duration rolling_period = 5; 234 235 // A calendar period, semantically "since the start of the current 236 // `<calendar_period>`". At this time, only `DAY`, `WEEK`, `FORTNIGHT`, and 237 // `MONTH` are supported. 238 google.type.CalendarPeriod calendar_period = 6; 239 } 240 241 // Labels which have been used to annotate the service-level objective. Label 242 // keys must start with a letter. Label keys and values may contain lowercase 243 // letters, numbers, underscores, and dashes. Label keys and values have a 244 // maximum length of 63 characters, and must be less than 128 bytes in size. 245 // Up to 64 label entries may be stored. For labels which do not have a 246 // semantic value, the empty string may be supplied for the label value. 247 map<string, string> user_labels = 12; 248} 249 250// A Service-Level Indicator (SLI) describes the "performance" of a service. For 251// some services, the SLI is well-defined. In such cases, the SLI can be 252// described easily by referencing the well-known SLI and providing the needed 253// parameters. Alternatively, a "custom" SLI can be defined with a query to the 254// underlying metric store. An SLI is defined to be `good_service / 255// total_service` over any queried time interval. The value of performance 256// always falls into the range `0 <= performance <= 1`. A custom SLI describes 257// how to compute this ratio, whether this is by dividing values from a pair of 258// time series, cutting a `Distribution` into good and bad counts, or counting 259// time windows in which the service complies with a criterion. For separation 260// of concerns, a single Service-Level Indicator measures performance for only 261// one aspect of service quality, such as fraction of successful queries or 262// fast-enough queries. 263message ServiceLevelIndicator { 264 // Service level indicators can be grouped by whether the "unit" of service 265 // being measured is based on counts of good requests or on counts of good 266 // time windows 267 oneof type { 268 // Basic SLI on a well-known service type. 269 BasicSli basic_sli = 4; 270 271 // Request-based SLIs 272 RequestBasedSli request_based = 1; 273 274 // Windows-based SLIs 275 WindowsBasedSli windows_based = 2; 276 } 277} 278 279// An SLI measuring performance on a well-known service type. Performance will 280// be computed on the basis of pre-defined metrics. The type of the 281// `service_resource` determines the metrics to use and the 282// `service_resource.labels` and `metric_labels` are used to construct a 283// monitoring filter to filter that metric down to just the data relevant to 284// this service. 285message BasicSli { 286 // Future parameters for the availability SLI. 287 message AvailabilityCriteria { 288 289 } 290 291 // Parameters for a latency threshold SLI. 292 message LatencyCriteria { 293 // Good service is defined to be the count of requests made to this service 294 // that return in no more than `threshold`. 295 google.protobuf.Duration threshold = 3; 296 } 297 298 // OPTIONAL: The set of RPCs to which this SLI is relevant. Telemetry from 299 // other methods will not be used to calculate performance for this SLI. If 300 // omitted, this SLI applies to all the Service's methods. For service types 301 // that don't support breaking down by method, setting this field will result 302 // in an error. 303 repeated string method = 7; 304 305 // OPTIONAL: The set of locations to which this SLI is relevant. Telemetry 306 // from other locations will not be used to calculate performance for this 307 // SLI. If omitted, this SLI applies to all locations in which the Service has 308 // activity. For service types that don't support breaking down by location, 309 // setting this field will result in an error. 310 repeated string location = 8; 311 312 // OPTIONAL: The set of API versions to which this SLI is relevant. Telemetry 313 // from other API versions will not be used to calculate performance for this 314 // SLI. If omitted, this SLI applies to all API versions. For service types 315 // that don't support breaking down by version, setting this field will result 316 // in an error. 317 repeated string version = 9; 318 319 // This SLI can be evaluated on the basis of availability or latency. 320 oneof sli_criteria { 321 // Good service is defined to be the count of requests made to this service 322 // that return successfully. 323 AvailabilityCriteria availability = 2; 324 325 // Good service is defined to be the count of requests made to this service 326 // that are fast enough with respect to `latency.threshold`. 327 LatencyCriteria latency = 3; 328 } 329} 330 331// Range of numerical values within `min` and `max`. 332message Range { 333 // Range minimum. 334 double min = 1; 335 336 // Range maximum. 337 double max = 2; 338} 339 340// Service Level Indicators for which atomic units of service are counted 341// directly. 342message RequestBasedSli { 343 // The means to compute a ratio of `good_service` to `total_service`. 344 oneof method { 345 // `good_total_ratio` is used when the ratio of `good_service` to 346 // `total_service` is computed from two `TimeSeries`. 347 TimeSeriesRatio good_total_ratio = 1; 348 349 // `distribution_cut` is used when `good_service` is a count of values 350 // aggregated in a `Distribution` that fall into a good range. The 351 // `total_service` is the total count of all values aggregated in the 352 // `Distribution`. 353 DistributionCut distribution_cut = 3; 354 } 355} 356 357// A `TimeSeriesRatio` specifies two `TimeSeries` to use for computing the 358// `good_service / total_service` ratio. The specified `TimeSeries` must have 359// `ValueType = DOUBLE` or `ValueType = INT64` and must have `MetricKind = 360// DELTA` or `MetricKind = CUMULATIVE`. The `TimeSeriesRatio` must specify 361// exactly two of good, bad, and total, and the relationship `good_service + 362// bad_service = total_service` will be assumed. 363message TimeSeriesRatio { 364 // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters) 365 // specifying a `TimeSeries` quantifying good service provided. Must have 366 // `ValueType = DOUBLE` or `ValueType = INT64` and must have `MetricKind = 367 // DELTA` or `MetricKind = CUMULATIVE`. 368 string good_service_filter = 4; 369 370 // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters) 371 // specifying a `TimeSeries` quantifying bad service, either demanded service 372 // that was not provided or demanded service that was of inadequate quality. 373 // Must have `ValueType = DOUBLE` or `ValueType = INT64` and must have 374 // `MetricKind = DELTA` or `MetricKind = CUMULATIVE`. 375 string bad_service_filter = 5; 376 377 // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters) 378 // specifying a `TimeSeries` quantifying total demanded service. Must have 379 // `ValueType = DOUBLE` or `ValueType = INT64` and must have `MetricKind = 380 // DELTA` or `MetricKind = CUMULATIVE`. 381 string total_service_filter = 6; 382} 383 384// A `DistributionCut` defines a `TimeSeries` and thresholds used for measuring 385// good service and total service. The `TimeSeries` must have `ValueType = 386// DISTRIBUTION` and `MetricKind = DELTA` or `MetricKind = CUMULATIVE`. The 387// computed `good_service` will be the estimated count of values in the 388// `Distribution` that fall within the specified `min` and `max`. 389message DistributionCut { 390 // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters) 391 // specifying a `TimeSeries` aggregating values. Must have `ValueType = 392 // DISTRIBUTION` and `MetricKind = DELTA` or `MetricKind = CUMULATIVE`. 393 string distribution_filter = 4; 394 395 // Range of values considered "good." For a one-sided range, set one bound to 396 // an infinite value. 397 Range range = 5; 398} 399 400// A `WindowsBasedSli` defines `good_service` as the count of time windows for 401// which the provided service was of good quality. Criteria for determining 402// if service was good are embedded in the `window_criterion`. 403message WindowsBasedSli { 404 // A `PerformanceThreshold` is used when each window is good when that window 405 // has a sufficiently high `performance`. 406 message PerformanceThreshold { 407 // The means, either a request-based SLI or a basic SLI, by which to compute 408 // performance over a window. 409 oneof type { 410 // `RequestBasedSli` to evaluate to judge window quality. 411 RequestBasedSli performance = 1; 412 413 // `BasicSli` to evaluate to judge window quality. 414 BasicSli basic_sli_performance = 3; 415 } 416 417 // If window `performance >= threshold`, the window is counted as good. 418 double threshold = 2; 419 } 420 421 // A `MetricRange` is used when each window is good when the value x of a 422 // single `TimeSeries` satisfies `range.min <= x <= range.max`. The provided 423 // `TimeSeries` must have `ValueType = INT64` or `ValueType = DOUBLE` and 424 // `MetricKind = GAUGE`. 425 message MetricRange { 426 // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters) 427 // specifying the `TimeSeries` to use for evaluating window quality. 428 string time_series = 1; 429 430 // Range of values considered "good." For a one-sided range, set one bound 431 // to an infinite value. 432 Range range = 4; 433 } 434 435 // The criterion to use for evaluating window goodness. 436 oneof window_criterion { 437 // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters) 438 // specifying a `TimeSeries` with `ValueType = BOOL`. The window is good if 439 // any `true` values appear in the window. 440 string good_bad_metric_filter = 5; 441 442 // A window is good if its `performance` is high enough. 443 PerformanceThreshold good_total_ratio_threshold = 2; 444 445 // A window is good if the metric's value is in a good range, averaged 446 // across returned streams. 447 MetricRange metric_mean_in_range = 6; 448 449 // A window is good if the metric's value is in a good range, summed across 450 // returned streams. 451 MetricRange metric_sum_in_range = 7; 452 } 453 454 // Duration over which window quality is evaluated. Must be an integer 455 // fraction of a day and at least `60s`. 456 google.protobuf.Duration window_period = 4; 457} 458