1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/cloud/aiplatform/v1/encryption_spec.proto"; 22import "google/cloud/aiplatform/v1/job_state.proto"; 23import "google/protobuf/struct.proto"; 24import "google/protobuf/timestamp.proto"; 25import "google/rpc/status.proto"; 26import "google/type/money.proto"; 27 28option csharp_namespace = "Google.Cloud.AIPlatform.V1"; 29option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; 30option java_multiple_files = true; 31option java_outer_classname = "DataLabelingJobProto"; 32option java_package = "com.google.cloud.aiplatform.v1"; 33option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; 34option ruby_package = "Google::Cloud::AIPlatform::V1"; 35 36// DataLabelingJob is used to trigger a human labeling job on unlabeled data 37// from the following Dataset: 38message DataLabelingJob { 39 option (google.api.resource) = { 40 type: "aiplatform.googleapis.com/DataLabelingJob" 41 pattern: "projects/{project}/locations/{location}/dataLabelingJobs/{data_labeling_job}" 42 }; 43 44 // Output only. Resource name of the DataLabelingJob. 45 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 46 47 // Required. The user-defined name of the DataLabelingJob. 48 // The name can be up to 128 characters long and can consist of any UTF-8 49 // characters. 50 // Display name of a DataLabelingJob. 51 string display_name = 2 [(google.api.field_behavior) = REQUIRED]; 52 53 // Required. Dataset resource names. Right now we only support labeling from a 54 // single Dataset. Format: 55 // `projects/{project}/locations/{location}/datasets/{dataset}` 56 repeated string datasets = 3 [ 57 (google.api.field_behavior) = REQUIRED, 58 (google.api.resource_reference) = { 59 type: "aiplatform.googleapis.com/Dataset" 60 } 61 ]; 62 63 // Labels to assign to annotations generated by this DataLabelingJob. 64 // 65 // Label keys and values can be no longer than 64 characters 66 // (Unicode codepoints), can only contain lowercase letters, numeric 67 // characters, underscores and dashes. International characters are allowed. 68 // See https://goo.gl/xmQnxf for more information and examples of labels. 69 // System reserved label keys are prefixed with "aiplatform.googleapis.com/" 70 // and are immutable. 71 map<string, string> annotation_labels = 12; 72 73 // Required. Number of labelers to work on each DataItem. 74 int32 labeler_count = 4 [(google.api.field_behavior) = REQUIRED]; 75 76 // Required. The Google Cloud Storage location of the instruction pdf. This 77 // pdf is shared with labelers, and provides detailed description on how to 78 // label DataItems in Datasets. 79 string instruction_uri = 5 [(google.api.field_behavior) = REQUIRED]; 80 81 // Required. Points to a YAML file stored on Google Cloud Storage describing 82 // the config for a specific type of DataLabelingJob. The schema files that 83 // can be used here are found in the 84 // https://storage.googleapis.com/google-cloud-aiplatform bucket in the 85 // /schema/datalabelingjob/inputs/ folder. 86 string inputs_schema_uri = 6 [(google.api.field_behavior) = REQUIRED]; 87 88 // Required. Input config parameters for the DataLabelingJob. 89 google.protobuf.Value inputs = 7 [(google.api.field_behavior) = REQUIRED]; 90 91 // Output only. The detailed state of the job. 92 JobState state = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; 93 94 // Output only. Current labeling job progress percentage scaled in interval 95 // [0, 100], indicating the percentage of DataItems that has been finished. 96 int32 labeling_progress = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; 97 98 // Output only. Estimated cost(in US dollars) that the DataLabelingJob has 99 // incurred to date. 100 google.type.Money current_spend = 14 101 [(google.api.field_behavior) = OUTPUT_ONLY]; 102 103 // Output only. Timestamp when this DataLabelingJob was created. 104 google.protobuf.Timestamp create_time = 9 105 [(google.api.field_behavior) = OUTPUT_ONLY]; 106 107 // Output only. Timestamp when this DataLabelingJob was updated most recently. 108 google.protobuf.Timestamp update_time = 10 109 [(google.api.field_behavior) = OUTPUT_ONLY]; 110 111 // Output only. DataLabelingJob errors. It is only populated when job's state 112 // is `JOB_STATE_FAILED` or `JOB_STATE_CANCELLED`. 113 google.rpc.Status error = 22 [(google.api.field_behavior) = OUTPUT_ONLY]; 114 115 // The labels with user-defined metadata to organize your DataLabelingJobs. 116 // 117 // Label keys and values can be no longer than 64 characters 118 // (Unicode codepoints), can only contain lowercase letters, numeric 119 // characters, underscores and dashes. International characters are allowed. 120 // 121 // See https://goo.gl/xmQnxf for more information and examples of labels. 122 // System reserved label keys are prefixed with "aiplatform.googleapis.com/" 123 // and are immutable. Following system labels exist for each DataLabelingJob: 124 // 125 // * "aiplatform.googleapis.com/schema": output only, its value is the 126 // [inputs_schema][google.cloud.aiplatform.v1.DataLabelingJob.inputs_schema_uri]'s 127 // title. 128 map<string, string> labels = 11; 129 130 // The SpecialistPools' resource names associated with this job. 131 repeated string specialist_pools = 16; 132 133 // Customer-managed encryption key spec for a DataLabelingJob. If set, this 134 // DataLabelingJob will be secured by this key. 135 // 136 // Note: Annotations created in the DataLabelingJob are associated with 137 // the EncryptionSpec of the Dataset they are exported to. 138 EncryptionSpec encryption_spec = 20; 139 140 // Parameters that configure the active learning pipeline. Active learning 141 // will label the data incrementally via several iterations. For every 142 // iteration, it will select a batch of data based on the sampling strategy. 143 ActiveLearningConfig active_learning_config = 21; 144} 145 146// Parameters that configure the active learning pipeline. Active learning will 147// label the data incrementally by several iterations. For every iteration, it 148// will select a batch of data based on the sampling strategy. 149message ActiveLearningConfig { 150 // Required. Max human labeling DataItems. The rest part will be labeled by 151 // machine. 152 oneof human_labeling_budget { 153 // Max number of human labeled DataItems. 154 int64 max_data_item_count = 1; 155 156 // Max percent of total DataItems for human labeling. 157 int32 max_data_item_percentage = 2; 158 } 159 160 // Active learning data sampling config. For every active learning labeling 161 // iteration, it will select a batch of data based on the sampling strategy. 162 SampleConfig sample_config = 3; 163 164 // CMLE training config. For every active learning labeling iteration, system 165 // will train a machine learning model on CMLE. The trained model will be used 166 // by data sampling algorithm to select DataItems. 167 TrainingConfig training_config = 4; 168} 169 170// Active learning data sampling config. For every active learning labeling 171// iteration, it will select a batch of data based on the sampling strategy. 172message SampleConfig { 173 // Sample strategy decides which subset of DataItems should be selected for 174 // human labeling in every batch. 175 enum SampleStrategy { 176 // Default will be treated as UNCERTAINTY. 177 SAMPLE_STRATEGY_UNSPECIFIED = 0; 178 179 // Sample the most uncertain data to label. 180 UNCERTAINTY = 1; 181 } 182 183 // Decides sample size for the initial batch. initial_batch_sample_percentage 184 // is used by default. 185 oneof initial_batch_sample_size { 186 // The percentage of data needed to be labeled in the first batch. 187 int32 initial_batch_sample_percentage = 1; 188 } 189 190 // Decides sample size for the following batches. 191 // following_batch_sample_percentage is used by default. 192 oneof following_batch_sample_size { 193 // The percentage of data needed to be labeled in each following batch 194 // (except the first batch). 195 int32 following_batch_sample_percentage = 3; 196 } 197 198 // Field to choose sampling strategy. Sampling strategy will decide which data 199 // should be selected for human labeling in every batch. 200 SampleStrategy sample_strategy = 5; 201} 202 203// CMLE training config. For every active learning labeling iteration, system 204// will train a machine learning model on CMLE. The trained model will be used 205// by data sampling algorithm to select DataItems. 206message TrainingConfig { 207 // The timeout hours for the CMLE training job, expressed in milli hours 208 // i.e. 1,000 value in this field means 1 hour. 209 int64 timeout_training_milli_hours = 1; 210} 211