1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.documentai.v1;
18
19import "google/protobuf/field_mask.proto";
20
21option csharp_namespace = "Google.Cloud.DocumentAI.V1";
22option go_package = "cloud.google.com/go/documentai/apiv1/documentaipb;documentaipb";
23option java_multiple_files = true;
24option java_outer_classname = "DocumentIoProto";
25option java_package = "com.google.cloud.documentai.v1";
26option php_namespace = "Google\\Cloud\\DocumentAI\\V1";
27option ruby_package = "Google::Cloud::DocumentAI::V1";
28
29// Payload message of raw document content (bytes).
30message RawDocument {
31  // Inline document content.
32  bytes content = 1;
33
34  // An IANA MIME type (RFC6838) indicating the nature and format of the
35  // [content][google.cloud.documentai.v1.RawDocument.content].
36  string mime_type = 2;
37}
38
39// Specifies a document stored on Cloud Storage.
40message GcsDocument {
41  // The Cloud Storage object uri.
42  string gcs_uri = 1;
43
44  // An IANA MIME type (RFC6838) of the content.
45  string mime_type = 2;
46}
47
48// Specifies a set of documents on Cloud Storage.
49message GcsDocuments {
50  // The list of documents.
51  repeated GcsDocument documents = 1;
52}
53
54// Specifies all documents on Cloud Storage with a common prefix.
55message GcsPrefix {
56  // The URI prefix.
57  string gcs_uri_prefix = 1;
58}
59
60// The common config to specify a set of documents used as input.
61message BatchDocumentsInputConfig {
62  // The source.
63  oneof source {
64    // The set of documents that match the specified Cloud Storage `gcs_prefix`.
65    GcsPrefix gcs_prefix = 1;
66
67    // The set of documents individually specified on Cloud Storage.
68    GcsDocuments gcs_documents = 2;
69  }
70}
71
72// Config that controls the output of documents. All documents will be written
73// as a JSON file.
74message DocumentOutputConfig {
75  // The configuration used when outputting documents.
76  message GcsOutputConfig {
77    // The sharding config for the output document.
78    message ShardingConfig {
79      // The number of pages per shard.
80      int32 pages_per_shard = 1;
81
82      // The number of overlapping pages between consecutive shards.
83      int32 pages_overlap = 2;
84    }
85
86    // The Cloud Storage uri (a directory) of the output.
87    string gcs_uri = 1;
88
89    // Specifies which fields to include in the output documents.
90    // Only supports top level document and pages field so it must be in the
91    // form of `{document_field_name}` or `pages.{page_field_name}`.
92    google.protobuf.FieldMask field_mask = 2;
93
94    // Specifies the sharding config for the output document.
95    ShardingConfig sharding_config = 3;
96  }
97
98  // The destination of the results.
99  oneof destination {
100    // Output config to write the results to Cloud Storage.
101    GcsOutputConfig gcs_output_config = 1;
102  }
103}
104