1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.documentai.v1; 18 19import "google/protobuf/field_mask.proto"; 20 21option csharp_namespace = "Google.Cloud.DocumentAI.V1"; 22option go_package = "cloud.google.com/go/documentai/apiv1/documentaipb;documentaipb"; 23option java_multiple_files = true; 24option java_outer_classname = "DocumentIoProto"; 25option java_package = "com.google.cloud.documentai.v1"; 26option php_namespace = "Google\\Cloud\\DocumentAI\\V1"; 27option ruby_package = "Google::Cloud::DocumentAI::V1"; 28 29// Payload message of raw document content (bytes). 30message RawDocument { 31 // Inline document content. 32 bytes content = 1; 33 34 // An IANA MIME type (RFC6838) indicating the nature and format of the 35 // [content][google.cloud.documentai.v1.RawDocument.content]. 36 string mime_type = 2; 37} 38 39// Specifies a document stored on Cloud Storage. 40message GcsDocument { 41 // The Cloud Storage object uri. 42 string gcs_uri = 1; 43 44 // An IANA MIME type (RFC6838) of the content. 45 string mime_type = 2; 46} 47 48// Specifies a set of documents on Cloud Storage. 49message GcsDocuments { 50 // The list of documents. 51 repeated GcsDocument documents = 1; 52} 53 54// Specifies all documents on Cloud Storage with a common prefix. 55message GcsPrefix { 56 // The URI prefix. 57 string gcs_uri_prefix = 1; 58} 59 60// The common config to specify a set of documents used as input. 61message BatchDocumentsInputConfig { 62 // The source. 63 oneof source { 64 // The set of documents that match the specified Cloud Storage `gcs_prefix`. 65 GcsPrefix gcs_prefix = 1; 66 67 // The set of documents individually specified on Cloud Storage. 68 GcsDocuments gcs_documents = 2; 69 } 70} 71 72// Config that controls the output of documents. All documents will be written 73// as a JSON file. 74message DocumentOutputConfig { 75 // The configuration used when outputting documents. 76 message GcsOutputConfig { 77 // The sharding config for the output document. 78 message ShardingConfig { 79 // The number of pages per shard. 80 int32 pages_per_shard = 1; 81 82 // The number of overlapping pages between consecutive shards. 83 int32 pages_overlap = 2; 84 } 85 86 // The Cloud Storage uri (a directory) of the output. 87 string gcs_uri = 1; 88 89 // Specifies which fields to include in the output documents. 90 // Only supports top level document and pages field so it must be in the 91 // form of `{document_field_name}` or `pages.{page_field_name}`. 92 google.protobuf.FieldMask field_mask = 2; 93 94 // Specifies the sharding config for the output document. 95 ShardingConfig sharding_config = 3; 96 } 97 98 // The destination of the results. 99 oneof destination { 100 // Output config to write the results to Cloud Storage. 101 GcsOutputConfig gcs_output_config = 1; 102 } 103} 104