1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.documentai.v1beta3; 18 19option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3"; 20option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb"; 21option java_multiple_files = true; 22option java_outer_classname = "DocumentAiDocumentSchema"; 23option java_package = "com.google.cloud.documentai.v1beta3"; 24option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3"; 25option ruby_package = "Google::Cloud::DocumentAI::V1beta3"; 26 27// Metadata for document summarization. 28message SummaryOptions { 29 // The Length enum. 30 enum Length { 31 // Default. 32 LENGTH_UNSPECIFIED = 0; 33 34 // A brief summary of one or two sentences. 35 BRIEF = 1; 36 37 // A paragraph-length summary. 38 MODERATE = 2; 39 40 // The longest option available. 41 COMPREHENSIVE = 3; 42 } 43 44 // The Format enum. 45 enum Format { 46 // Default. 47 FORMAT_UNSPECIFIED = 0; 48 49 // Format the output in paragraphs. 50 PARAGRAPH = 1; 51 52 // Format the output in bullets. 53 BULLETS = 2; 54 } 55 56 // How long the summary should be. 57 Length length = 1; 58 59 // The format the summary should be in. 60 Format format = 2; 61} 62 63// Metadata for how this field value is extracted. 64message FieldExtractionMetadata { 65 // Summary options config. 66 SummaryOptions summary_options = 2; 67} 68 69// Metadata about a property. 70message PropertyMetadata { 71 // Whether the property should be considered as "inactive". 72 bool inactive = 3; 73 74 // Field extraction metadata on the property. 75 FieldExtractionMetadata field_extraction_metadata = 9; 76} 77 78// Metadata about an entity type. 79message EntityTypeMetadata { 80 // Whether the entity type should be considered inactive. 81 bool inactive = 5; 82} 83 84// The schema defines the output of the processed document by a processor. 85message DocumentSchema { 86 // EntityType is the wrapper of a label of the corresponding model with 87 // detailed attributes and limitations for entity-based processors. Multiple 88 // types can also compose a dependency tree to represent nested types. 89 message EntityType { 90 // Defines the a list of enum values. 91 message EnumValues { 92 // The individual values that this enum values type can include. 93 repeated string values = 1; 94 } 95 96 // Defines properties that can be part of the entity type. 97 message Property { 98 // Types of occurrences of the entity type in the document. This 99 // represents the number of instances, not mentions, of an entity. 100 // For example, a bank statement might only have one 101 // `account_number`, but this account number can be mentioned in several 102 // places on the document. In this case, the `account_number` is 103 // considered a `REQUIRED_ONCE` entity type. If, on the other hand, we 104 // expect a bank statement to contain the status of multiple different 105 // accounts for the customers, the occurrence type is set to 106 // `REQUIRED_MULTIPLE`. 107 enum OccurrenceType { 108 // Unspecified occurrence type. 109 OCCURRENCE_TYPE_UNSPECIFIED = 0; 110 111 // There will be zero or one instance of this entity type. The same 112 // entity instance may be mentioned multiple times. 113 OPTIONAL_ONCE = 1; 114 115 // The entity type will appear zero or multiple times. 116 OPTIONAL_MULTIPLE = 2; 117 118 // The entity type will only appear exactly once. The same 119 // entity instance may be mentioned multiple times. 120 REQUIRED_ONCE = 3; 121 122 // The entity type will appear once or more times. 123 REQUIRED_MULTIPLE = 4; 124 } 125 126 // The name of the property. Follows the same guidelines as the 127 // EntityType name. 128 string name = 1; 129 130 // User defined name for the property. 131 string display_name = 6; 132 133 // A reference to the value type of the property. This type is subject 134 // to the same conventions as the `Entity.base_types` field. 135 string value_type = 2; 136 137 // Occurrence type limits the number of instances an entity type appears 138 // in the document. 139 OccurrenceType occurrence_type = 3; 140 141 // Any additional metadata about the property can be added here. 142 PropertyMetadata property_metadata = 5; 143 } 144 145 oneof value_source { 146 // If specified, lists all the possible values for this entity. This 147 // should not be more than a handful of values. If the number of values 148 // is >10 or could change frequently use the `EntityType.value_ontology` 149 // field and specify a list of all possible values in a value ontology 150 // file. 151 EnumValues enum_values = 14; 152 } 153 154 // User defined name for the type. 155 string display_name = 13; 156 157 // Name of the type. It must be unique within the schema file and 158 // cannot be a "Common Type". The following naming conventions are used: 159 // 160 // - Use `snake_casing`. 161 // - Name matching is case-sensitive. 162 // - Maximum 64 characters. 163 // - Must start with a letter. 164 // - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward 165 // compatibility internal infrastructure and tooling can handle any ascii 166 // character.) 167 // - The `/` is sometimes used to denote a property of a type. For example 168 // `line_item/amount`. This convention is deprecated, but will still be 169 // honored for backward compatibility. 170 string name = 1; 171 172 // The entity type that this type is derived from. For now, one and only 173 // one should be set. 174 repeated string base_types = 2; 175 176 // Description the nested structure, or composition of an entity. 177 repeated Property properties = 6; 178 179 // Metadata for the entity type. 180 EntityTypeMetadata entity_type_metadata = 11; 181 } 182 183 // Metadata for global schema behavior. 184 message Metadata { 185 // If true, a `document` entity type can be applied to subdocument 186 // (splitting). Otherwise, it can only be applied to the entire document 187 // (classification). 188 bool document_splitter = 1; 189 190 // If true, on a given page, there can be multiple `document` annotations 191 // covering it. 192 bool document_allow_multiple_labels = 2; 193 194 // If set, all the nested entities must be prefixed with the parents. 195 bool prefixed_naming_on_properties = 6; 196 197 // If set, we will skip the naming format validation in the schema. So the 198 // string values in `DocumentSchema.EntityType.name` and 199 // `DocumentSchema.EntityType.Property.name` will not be checked. 200 bool skip_naming_validation = 7; 201 } 202 203 // Display name to show to users. 204 string display_name = 1; 205 206 // Description of the schema. 207 string description = 2; 208 209 // Entity types of the schema. 210 repeated EntityType entity_types = 3; 211 212 // Metadata of the schema. 213 Metadata metadata = 4; 214} 215