1// Copyright 2020 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.automl.v1beta1; 18 19 20option go_package = "cloud.google.com/go/automl/apiv1beta1/automlpb;automlpb"; 21option java_multiple_files = true; 22option java_package = "com.google.cloud.automl.v1beta1"; 23option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1"; 24option ruby_package = "Google::Cloud::AutoML::V1beta1"; 25 26// The data statistics of a series of values that share the same DataType. 27message DataStats { 28 // The data statistics specific to a DataType. 29 oneof stats { 30 // The statistics for FLOAT64 DataType. 31 Float64Stats float64_stats = 3; 32 33 // The statistics for STRING DataType. 34 StringStats string_stats = 4; 35 36 // The statistics for TIMESTAMP DataType. 37 TimestampStats timestamp_stats = 5; 38 39 // The statistics for ARRAY DataType. 40 ArrayStats array_stats = 6; 41 42 // The statistics for STRUCT DataType. 43 StructStats struct_stats = 7; 44 45 // The statistics for CATEGORY DataType. 46 CategoryStats category_stats = 8; 47 } 48 49 // The number of distinct values. 50 int64 distinct_value_count = 1; 51 52 // The number of values that are null. 53 int64 null_value_count = 2; 54 55 // The number of values that are valid. 56 int64 valid_value_count = 9; 57} 58 59// The data statistics of a series of FLOAT64 values. 60message Float64Stats { 61 // A bucket of a histogram. 62 message HistogramBucket { 63 // The minimum value of the bucket, inclusive. 64 double min = 1; 65 66 // The maximum value of the bucket, exclusive unless max = `"Infinity"`, in 67 // which case it's inclusive. 68 double max = 2; 69 70 // The number of data values that are in the bucket, i.e. are between 71 // min and max values. 72 int64 count = 3; 73 } 74 75 // The mean of the series. 76 double mean = 1; 77 78 // The standard deviation of the series. 79 double standard_deviation = 2; 80 81 // Ordered from 0 to k k-quantile values of the data series of n values. 82 // The value at index i is, approximately, the i*n/k-th smallest value in the 83 // series; for i = 0 and i = k these are, respectively, the min and max 84 // values. 85 repeated double quantiles = 3; 86 87 // Histogram buckets of the data series. Sorted by the min value of the 88 // bucket, ascendingly, and the number of the buckets is dynamically 89 // generated. The buckets are non-overlapping and completely cover whole 90 // FLOAT64 range with min of first bucket being `"-Infinity"`, and max of 91 // the last one being `"Infinity"`. 92 repeated HistogramBucket histogram_buckets = 4; 93} 94 95// The data statistics of a series of STRING values. 96message StringStats { 97 // The statistics of a unigram. 98 message UnigramStats { 99 // The unigram. 100 string value = 1; 101 102 // The number of occurrences of this unigram in the series. 103 int64 count = 2; 104 } 105 106 // The statistics of the top 20 unigrams, ordered by 107 // [count][google.cloud.automl.v1beta1.StringStats.UnigramStats.count]. 108 repeated UnigramStats top_unigram_stats = 1; 109} 110 111// The data statistics of a series of TIMESTAMP values. 112message TimestampStats { 113 // Stats split by a defined in context granularity. 114 message GranularStats { 115 // A map from granularity key to example count for that key. 116 // E.g. for hour_of_day `13` means 1pm, or for month_of_year `5` means May). 117 map<int32, int64> buckets = 1; 118 } 119 120 // The string key is the pre-defined granularity. Currently supported: 121 // hour_of_day, day_of_week, month_of_year. 122 // Granularities finer that the granularity of timestamp data are not 123 // populated (e.g. if timestamps are at day granularity, then hour_of_day 124 // is not populated). 125 map<string, GranularStats> granular_stats = 1; 126} 127 128// The data statistics of a series of ARRAY values. 129message ArrayStats { 130 // Stats of all the values of all arrays, as if they were a single long 131 // series of data. The type depends on the element type of the array. 132 DataStats member_stats = 2; 133} 134 135// The data statistics of a series of STRUCT values. 136message StructStats { 137 // Map from a field name of the struct to data stats aggregated over series 138 // of all data in that field across all the structs. 139 map<string, DataStats> field_stats = 1; 140} 141 142// The data statistics of a series of CATEGORY values. 143message CategoryStats { 144 // The statistics of a single CATEGORY value. 145 message SingleCategoryStats { 146 // The CATEGORY value. 147 string value = 1; 148 149 // The number of occurrences of this value in the series. 150 int64 count = 2; 151 } 152 153 // The statistics of the top 20 CATEGORY values, ordered by 154 // 155 // [count][google.cloud.automl.v1beta1.CategoryStats.SingleCategoryStats.count]. 156 repeated SingleCategoryStats top_category_stats = 1; 157} 158 159// A correlation statistics between two series of DataType values. The series 160// may have differing DataType-s, but within a single series the DataType must 161// be the same. 162message CorrelationStats { 163 // The correlation value using the Cramer's V measure. 164 double cramers_v = 1; 165} 166