1// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.automl.v1beta1;
18
19
20option go_package = "cloud.google.com/go/automl/apiv1beta1/automlpb;automlpb";
21option java_multiple_files = true;
22option java_package = "com.google.cloud.automl.v1beta1";
23option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";
24option ruby_package = "Google::Cloud::AutoML::V1beta1";
25
26// The data statistics of a series of values that share the same DataType.
27message DataStats {
28  // The data statistics specific to a DataType.
29  oneof stats {
30    // The statistics for FLOAT64 DataType.
31    Float64Stats float64_stats = 3;
32
33    // The statistics for STRING DataType.
34    StringStats string_stats = 4;
35
36    // The statistics for TIMESTAMP DataType.
37    TimestampStats timestamp_stats = 5;
38
39    // The statistics for ARRAY DataType.
40    ArrayStats array_stats = 6;
41
42    // The statistics for STRUCT DataType.
43    StructStats struct_stats = 7;
44
45    // The statistics for CATEGORY DataType.
46    CategoryStats category_stats = 8;
47  }
48
49  // The number of distinct values.
50  int64 distinct_value_count = 1;
51
52  // The number of values that are null.
53  int64 null_value_count = 2;
54
55  // The number of values that are valid.
56  int64 valid_value_count = 9;
57}
58
59// The data statistics of a series of FLOAT64 values.
60message Float64Stats {
61  // A bucket of a histogram.
62  message HistogramBucket {
63    // The minimum value of the bucket, inclusive.
64    double min = 1;
65
66    // The maximum value of the bucket, exclusive unless max = `"Infinity"`, in
67    // which case it's inclusive.
68    double max = 2;
69
70    // The number of data values that are in the bucket, i.e. are between
71    // min and max values.
72    int64 count = 3;
73  }
74
75  // The mean of the series.
76  double mean = 1;
77
78  // The standard deviation of the series.
79  double standard_deviation = 2;
80
81  // Ordered from 0 to k k-quantile values of the data series of n values.
82  // The value at index i is, approximately, the i*n/k-th smallest value in the
83  // series; for i = 0 and i = k these are, respectively, the min and max
84  // values.
85  repeated double quantiles = 3;
86
87  // Histogram buckets of the data series. Sorted by the min value of the
88  // bucket, ascendingly, and the number of the buckets is dynamically
89  // generated. The buckets are non-overlapping and completely cover whole
90  // FLOAT64 range with min of first bucket being `"-Infinity"`, and max of
91  // the last one being `"Infinity"`.
92  repeated HistogramBucket histogram_buckets = 4;
93}
94
95// The data statistics of a series of STRING values.
96message StringStats {
97  // The statistics of a unigram.
98  message UnigramStats {
99    // The unigram.
100    string value = 1;
101
102    // The number of occurrences of this unigram in the series.
103    int64 count = 2;
104  }
105
106  // The statistics of the top 20 unigrams, ordered by
107  // [count][google.cloud.automl.v1beta1.StringStats.UnigramStats.count].
108  repeated UnigramStats top_unigram_stats = 1;
109}
110
111// The data statistics of a series of TIMESTAMP values.
112message TimestampStats {
113  // Stats split by a defined in context granularity.
114  message GranularStats {
115    // A map from granularity key to example count for that key.
116    // E.g. for hour_of_day `13` means 1pm, or for month_of_year `5` means May).
117    map<int32, int64> buckets = 1;
118  }
119
120  // The string key is the pre-defined granularity. Currently supported:
121  // hour_of_day, day_of_week, month_of_year.
122  // Granularities finer that the granularity of timestamp data are not
123  // populated (e.g. if timestamps are at day granularity, then hour_of_day
124  // is not populated).
125  map<string, GranularStats> granular_stats = 1;
126}
127
128// The data statistics of a series of ARRAY values.
129message ArrayStats {
130  // Stats of all the values of all arrays, as if they were a single long
131  // series of data. The type depends on the element type of the array.
132  DataStats member_stats = 2;
133}
134
135// The data statistics of a series of STRUCT values.
136message StructStats {
137  // Map from a field name of the struct to data stats aggregated over series
138  // of all data in that field across all the structs.
139  map<string, DataStats> field_stats = 1;
140}
141
142// The data statistics of a series of CATEGORY values.
143message CategoryStats {
144  // The statistics of a single CATEGORY value.
145  message SingleCategoryStats {
146    // The CATEGORY value.
147    string value = 1;
148
149    // The number of occurrences of this value in the series.
150    int64 count = 2;
151  }
152
153  // The statistics of the top 20 CATEGORY values, ordered by
154  //
155  // [count][google.cloud.automl.v1beta1.CategoryStats.SingleCategoryStats.count].
156  repeated SingleCategoryStats top_category_stats = 1;
157}
158
159// A correlation statistics between two series of DataType values. The series
160// may have differing DataType-s, but within a single series the DataType must
161// be the same.
162message CorrelationStats {
163  // The correlation value using the Cramer's V measure.
164  double cramers_v = 1;
165}
166