1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "page.h"
18 
19 #include <stddef.h>
20 #include <stdint.h>
21 
22 #include <algorithm>
23 #include <limits>
24 #include <span>
25 #include <string>
26 #include <vector>
27 
28 #include "cpp/fpdf_scopers.h"
29 #include "form_filler.h"
30 #include "form_widget_info.h"
31 #include "fpdf_annot.h"
32 #include "fpdf_doc.h"
33 #include "fpdf_text.h"
34 #include "fpdfview.h"
35 #include "logging.h"
36 #include "normalize.h"
37 #include "rect.h"
38 #include "utf.h"
39 #include "utils/annot_hider.h"
40 #include "utils/text.h"
41 
42 #define LOG_TAG "page"
43 
44 using std::vector;
45 
46 namespace pdfClient {
47 
48 static const int kBytesPerPixel = 4;
49 
50 static const Rectangle_i kEmptyIntRectangle = IntRect(0, 0, 0, 0);
51 
52 // The acceptable fatness / inaccuracy of a user's finger in points.
53 static const int kFingerTolerance = 10;
54 
55 static const int RENDER_MODE_FOR_DISPLAY = 1;
56 static const int RENDER_MODE_FOR_PRINT = 2;
57 
Page(FPDF_DOCUMENT doc,int page_num,FormFiller * form_filler)58 Page::Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler)
59     : document_(doc),
60       page_(FPDF_LoadPage(doc, page_num)),
61       form_filler_(form_filler),
62       invalid_rect_(kEmptyIntRectangle),
63       page_num_(page_num) {}
64 
65 Page::Page(Page&& p) = default;
66 
~Page()67 Page::~Page() {}
68 
Width() const69 int Page::Width() const {
70     return FPDF_GetPageWidth(page_.get());
71 }
72 
Height() const73 int Page::Height() const {
74     return FPDF_GetPageHeight(page_.get());
75 }
76 
Dimensions() const77 Rectangle_i Page::Dimensions() const {
78     return IntRect(0, 0, Width(), Height());
79 }
80 
Render(FPDF_BITMAP bitmap,FS_MATRIX transform,int clip_left,int clip_top,int clip_right,int clip_bottom,int render_mode,int show_annot_types,bool render_form_fields)81 void Page::Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top,
82                   int clip_right, int clip_bottom, int render_mode, int show_annot_types,
83                   bool render_form_fields) {
84     std::unordered_set<int> types;
85     for (auto renderFlag_annot : renderFlagsAnnotsMap) {
86         if ((renderFlag_annot.first & show_annot_types) != 0) {
87             for (int annot_type : renderFlag_annot.second) {
88                 types.insert(annot_type);
89             }
90         }
91     }
92     if (render_form_fields) types.insert(FPDF_ANNOT_WIDGET);
93     pdfClient_utils::AnnotHider annot_hider(page_.get(), types);
94     int renderFlags = FPDF_REVERSE_BYTE_ORDER;
95     if (render_mode == RENDER_MODE_FOR_DISPLAY) {
96         renderFlags |= FPDF_LCD_TEXT | FPDF_ANNOT;
97     } else if (render_mode == RENDER_MODE_FOR_PRINT) {
98         renderFlags |= FPDF_PRINTING;
99     }
100 
101     FS_RECTF clip = {(float)clip_left, (float)clip_top, (float)clip_right, (float)clip_bottom};
102     FPDF_RenderPageBitmapWithMatrix(bitmap, page_.get(), &transform, &clip, renderFlags);
103 
104     if (render_form_fields) {
105         form_filler_->RenderTile(page_.get(), bitmap, transform, clip, renderFlags);
106     }
107 }
108 
ApplyPageTransform(const Point_d & input) const109 Point_i Page::ApplyPageTransform(const Point_d& input) const {
110     Point_i output;
111     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
112                       &output.y);
113     return output;
114 }
115 
ApplyPageTransform(const Rectangle_d & input) const116 Rectangle_i Page::ApplyPageTransform(const Rectangle_d& input) const {
117     return ApplyPageTransform(OuterIntRect(input));
118 }
119 
ApplyPageTransform(const Rectangle_i & input) const120 Rectangle_i Page::ApplyPageTransform(const Rectangle_i& input) const {
121     Point_i output1, output2;
122     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.left, input.top, &output1.x,
123                       &output1.y);
124     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.right, input.bottom,
125                       &output2.x, &output2.y);
126 
127     Rectangle_i output = IntRect(output1, output2);
128     // Constrain output within the page.
129     output = Intersect(output, Dimensions());
130     return output;
131 }
132 
UnapplyPageTransform(const Point_i & input) const133 Point_d Page::UnapplyPageTransform(const Point_i& input) const {
134     Point_d output;
135     FPDF_DeviceToPage(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
136                       &output.y);
137     return output;
138 }
139 
NumChars()140 int Page::NumChars() {
141     return FPDFText_CountChars(text_page());
142 }
143 
GetUnicode(int char_index)144 uint32_t Page::GetUnicode(int char_index) {
145     return FPDFText_GetUnicode(text_page(), char_index);
146 }
147 
GetTextUtf8()148 std::string Page::GetTextUtf8() {
149     return GetTextUtf8(first_printable_char_index(), last_printable_char_index() + 1);
150 }
151 
GetTextUtf8(const int start_index,const int stop_index)152 std::string Page::GetTextUtf8(const int start_index, const int stop_index) {
153     std::string result;
154     for (int i = start_index; i < stop_index; i++) {
155         AppendpdfClientCodepointAsUtf8(GetUnicode(i), &result);
156     }
157     return result;
158 }
159 
GetAltTextUtf8(vector<std::string> * result) const160 void Page::GetAltTextUtf8(vector<std::string>* result) const {
161     ::pdfClient_utils::GetAltText(page_.get(), result);
162 }
163 
FindMatchesUtf8(std::string_view utf8,vector<TextRange> * matches)164 int Page::FindMatchesUtf8(std::string_view utf8, vector<TextRange>* matches) {
165     std::u32string query(Utf8ToUtf32(utf8));
166     // Normalize characters of string for searching - ignore case and accents.
167     NormalizeStringForSearch(&query);
168     TextRange match;
169     int page_start = first_printable_char_index();
170     int page_stop = last_printable_char_index() + 1;
171     int num_matches = 0;
172     while (FindMatch(query, page_start, page_stop, &match)) {
173         if (matches != nullptr) {
174             matches->push_back(match);
175         }
176         num_matches++;
177         page_start = match.second;
178     }
179     return num_matches;
180 }
181 
BoundsOfMatchesUtf8(std::string_view utf8,vector<Rectangle_i> * rects,vector<int> * match_to_rect,vector<int> * char_indexes)182 int Page::BoundsOfMatchesUtf8(std::string_view utf8, vector<Rectangle_i>* rects,
183                               vector<int>* match_to_rect, vector<int>* char_indexes) {
184     vector<TextRange> matches;
185     int num_matches = FindMatchesUtf8(utf8, &matches);
186     int num_rects = 0;
187     int num_matches_with_rects = 0;
188     for (int i = 0; i < num_matches; i++) {
189         int start = matches[i].first, stop = matches[i].second;
190         int num_rects_for_match = GetTextBounds(start, stop, rects);
191         if (num_rects_for_match == 0) {
192             continue;
193         }
194         if (match_to_rect != nullptr) {
195             match_to_rect->push_back(num_rects);
196         }
197         if (char_indexes != nullptr) {
198             char_indexes->push_back(start);
199         }
200         num_rects += num_rects_for_match;
201         num_matches_with_rects++;
202     }
203     return num_matches_with_rects;
204 }
205 
GetTextBounds(const int start_index,const int stop_index,vector<Rectangle_i> * rects)206 int Page::GetTextBounds(const int start_index, const int stop_index, vector<Rectangle_i>* rects) {
207     int num_rects = 0;
208     Rectangle_d rect = DoubleRect(0, 0, 0, 0);
209     for (int index = start_index; index < stop_index; index++) {
210         double x1, x2, y1, y2;
211         // This call doesn't apply the page transform - have to apply later.
212         FPDFText_GetCharBox(text_page(), index, &x1, &x2, &y1, &y2);
213         if (x1 != x2 && y1 != y2) {
214             if (IsEmpty(rect)) {
215                 rect = DoubleRect(x1, y1, x2, y2);
216             } else {
217                 rect = Union(rect, DoubleRect(x1, y1, x2, y2));
218             }
219         }
220         // Starting a new line - push current rect, start a new rect.
221         if (IsLineBreak(GetUnicode(index))) {
222             if (!IsEmpty(rect)) {
223                 num_rects++;
224                 rects->push_back(ApplyPageTransform(rect));
225             }
226             rect = DoubleRect(0, 0, 0, 0);
227         }
228     }
229     // Push the last current rect.
230     if (!IsEmpty(rect)) {
231         num_rects++;
232         rects->push_back(ApplyPageTransform(rect));
233     }
234     return num_rects;
235 }
236 
SelectWordAt(const Point_i & point,SelectionBoundary * start,SelectionBoundary * stop)237 bool Page::SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop) {
238     Point_d char_point = UnapplyPageTransform(point);
239     int char_index = FPDFText_GetCharIndexAtPos(text_page(), char_point.x, char_point.y,
240                                                 kFingerTolerance, kFingerTolerance);
241     if (char_index < 0 || IsWordBreak(GetUnicode(char_index))) {
242         return false;  // No word at the given point to select.
243     }
244     start->index = GetWordStartIndex(char_index);
245     stop->index = GetWordStopIndex(char_index);
246     ConstrainBoundary(start);
247     ConstrainBoundary(stop);
248     return true;
249 }
250 
ConstrainBoundary(SelectionBoundary * boundary)251 void Page::ConstrainBoundary(SelectionBoundary* boundary) {
252     if (boundary->index < 0) {
253         // Index is not specified - find the nearest index to the given point.
254         *boundary = GetBoundaryAtPoint(boundary->point);
255     } else {
256         // Index is specified - find the point at that index.
257         int index = std::max(boundary->index, first_printable_char_index());
258         index = std::min(index, last_printable_char_index() + 1);
259         *boundary = GetBoundaryAtIndex(index);
260     }
261 }
262 
GetFontSize(int index)263 int Page::GetFontSize(int index) {
264     return FPDFText_GetFontSize(text_page(), index);
265 }
266 
GetLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const267 int Page::GetLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
268                        vector<std::string>* urls) const {
269     return GetAnnotatedLinksUtf8(rects, link_to_rect, urls) +
270            GetInferredLinksUtf8(rects, link_to_rect, urls);
271 }
272 
GetGotoLinks() const273 vector<GotoLink> Page::GetGotoLinks() const {
274     vector<GotoLink> links;
275 
276     FPDF_LINK link = nullptr;
277     int pos = 0;
278     while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
279         if (!IsGotoLink(link)) {
280             continue;
281         }
282         // Get the bounds of the actual link
283         vector<Rectangle_i> goto_link_rects;
284         Rectangle_i rect = GetRect(link);
285         goto_link_rects.push_back(rect);
286 
287         GotoLinkDest* goto_link_dest = new GotoLinkDest();
288 
289         // Get and parse the destination
290         FPDF_DEST fpdf_dest = FPDFLink_GetDest(document_, link);
291         int dest_page_index = FPDFDest_GetDestPageIndex(document_, fpdf_dest);
292         if (dest_page_index < 0) {
293             LOGE("Goto Link has invalid destination page index");
294             continue;
295         }
296         goto_link_dest->set_page_number(dest_page_index);
297 
298         FPDF_BOOL has_x_coord;
299         FPDF_BOOL has_y_coord;
300         FPDF_BOOL has_zoom;
301         FS_FLOAT x;
302         FS_FLOAT y;
303         FS_FLOAT zoom;
304         FPDF_BOOL success = FPDFDest_GetLocationInPage(fpdf_dest, &has_x_coord, &has_y_coord,
305                                                        &has_zoom, &x, &y, &zoom);
306 
307         if (!success) {
308             continue;
309         }
310         if (has_x_coord) {
311             auto point = DoublePoint(x, 0);
312             auto tPoint = ApplyPageTransform(point);
313             goto_link_dest->set_x(tPoint.x);
314         }
315         if (has_y_coord) {
316             auto point = DoublePoint(0, y);
317             auto tPoint = ApplyPageTransform(point);
318             goto_link_dest->set_y(tPoint.y);
319         }
320         if (has_zoom) {
321             goto_link_dest->set_zoom(zoom);
322         }
323 
324         GotoLink goto_link = GotoLink{goto_link_rects, *goto_link_dest};
325 
326         // Ensure that links are within page bounds
327         if (goto_link_dest->x >= 0 && goto_link_dest->y >= 0) {
328             links.push_back(goto_link);
329         } else {
330             LOGE("Goto Link out of bound (x=%f, y=%f). Page width=%d, height =%d",
331                  goto_link_dest->x, goto_link_dest->y, Width(), Height());
332         }
333     }
334     return links;
335 }
336 
InitializeFormFilling()337 void Page::InitializeFormFilling() {
338     form_filler_->NotifyAfterPageLoad(page_.get());
339 }
340 
TerminateFormFilling()341 void Page::TerminateFormFilling() {
342     form_filler_->NotifyBeforePageClose(page_.get());
343 }
344 
GetFormWidgetInfo(Point_i point)345 FormWidgetInfo Page::GetFormWidgetInfo(Point_i point) {
346     Point_d page_point = UnapplyPageTransform(point);
347     FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), page_point);
348     if (result.FoundWidget()) {
349         // widget_rect is in page coords, transform to device coords before
350         // returning to user.
351         Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
352         result.set_widget_rect(transformed_widget_rect);
353     }
354 
355     // Consume any rectangle that was invalidated by this action. Some
356     // info-gathering actions may cause temporary invalidation without
357     // actually doing anything that we need to redraw for.
358     ConsumeInvalidRect();
359     return result;
360 }
361 
GetFormWidgetInfo(int annotation_index)362 FormWidgetInfo Page::GetFormWidgetInfo(int annotation_index) {
363     FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), annotation_index);
364     if (result.FoundWidget()) {
365         // widget_rect is in page coords; transform to device coords before
366         // returning to user.
367         Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
368         result.set_widget_rect(transformed_widget_rect);
369     }
370 
371     // Consume any rectangle that was invalidated by this action. Some
372     // info-gathering actions may cause temporary invalidation without
373     // actually doing anything that we need to redraw for.
374     ConsumeInvalidRect();
375     return result;
376 }
377 
GetFormWidgetInfos(const std::unordered_set<int> & type_ids,std::vector<FormWidgetInfo> * widget_infos)378 void Page::GetFormWidgetInfos(const std::unordered_set<int>& type_ids,
379                               std::vector<FormWidgetInfo>* widget_infos) {
380     form_filler_->GetFormWidgetInfos(page_.get(), type_ids, widget_infos);
381     for (FormWidgetInfo& widget_info : *widget_infos) {
382         // widget_rect is in page coords; transform to device coords before
383         // returning to user.
384         Rectangle_i transformed_widget_rect = ApplyPageTransform(widget_info.widget_rect());
385         widget_info.set_widget_rect(transformed_widget_rect);
386     }
387 
388     // Consume any rectangles that were invalidated by this action. Some
389     // info-gathering actions may cause temporary invalidation without
390     // actually doing anything that we need to redraw for.
391     ConsumeInvalidRect();
392 }
393 
ClickOnPoint(Point_i point)394 bool Page::ClickOnPoint(Point_i point) {
395     Point_d page_point = UnapplyPageTransform(point);
396     return form_filler_->ClickOnPoint(page_.get(), page_point);
397 }
SetFormFieldText(int annotation_index,std::string_view text)398 bool Page::SetFormFieldText(int annotation_index, std::string_view text) {
399     return form_filler_->SetText(page_.get(), annotation_index, text);
400 }
401 
SetChoiceSelection(int annotation_index,std::span<const int> selected_indices)402 bool Page::SetChoiceSelection(int annotation_index, std::span<const int> selected_indices) {
403     return form_filler_->SetChoiceSelection(page_.get(), annotation_index, selected_indices);
404 }
NotifyInvalidRect(Rectangle_i rect)405 void Page::NotifyInvalidRect(Rectangle_i rect) {
406     if (rect.left < 0 || rect.top < 0 || rect.right < 0 || rect.bottom < 0 || IsEmpty(rect)) {
407         return;
408     }
409 
410     Rectangle_i device_rect = ApplyPageTransform(rect);
411     // If invalid_rect_ is currently empty, avoid unioning so we don't extend
412     // |rect|'s top left corner to (0,0) for no reason.
413     if (IsEmpty(invalid_rect_)) {
414         invalid_rect_ = device_rect;
415         return;
416     }
417 
418     invalid_rect_ = Union(invalid_rect_, device_rect);
419 }
420 
HasInvalidRect()421 bool Page::HasInvalidRect() {
422     return !IsEmpty(invalid_rect_);
423 }
424 
ConsumeInvalidRect()425 Rectangle_i Page::ConsumeInvalidRect() {
426     Rectangle_i copy = invalid_rect_;
427     invalid_rect_ = kEmptyIntRectangle;
428     return copy;
429 }
430 
page()431 void* Page::page() {
432     return page_.get();
433 }
434 
text_page()435 FPDF_TEXTPAGE Page::text_page() {
436     EnsureTextPageInitialized();
437     return text_page_.get();
438 }
439 
first_printable_char_index()440 int Page::first_printable_char_index() {
441     EnsureTextPageInitialized();
442     return first_printable_char_index_;
443 }
444 
last_printable_char_index()445 int Page::last_printable_char_index() {
446     EnsureTextPageInitialized();
447     return last_printable_char_index_;
448 }
449 
EnsureTextPageInitialized()450 void Page::EnsureTextPageInitialized() {
451     if (text_page_) {
452         return;
453     }
454     if (!page_.get()) {
455         // Page should never be null but a partner has an unexplained bug b/376796346
456         LOGE("Null page (err=%lu). for (page_num=%d)", FPDF_GetLastError(), page_num_);
457         // since the text_page_ would not have a page to load from
458         return;
459     }
460 
461     text_page_.reset(FPDFText_LoadPage(page_.get()));
462     if (!text_page_) {
463         // This will get into infinite recursion if not returned - b/376796346
464         LOGE("Failed to load text (err=%lu). for (page_num=%d)", FPDF_GetLastError(), page_num_);
465         return;
466     }
467 
468     int num_chars = NumChars();
469 
470     int i;
471     for (i = 0; i < num_chars && IsWordBreak(GetUnicode(i)); i++) {
472     }
473     first_printable_char_index_ = i;
474 
475     for (i = num_chars - 1; i >= first_printable_char_index_ && IsWordBreak(GetUnicode(i)); i--) {
476     }
477     last_printable_char_index_ = i;
478 }
479 
InPlaceSwapRedBlueChannels(void * pixels,const int num_pixels) const480 void Page::InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const {
481     uint8_t* channels = static_cast<uint8_t*>(pixels);
482     uint8_t* channel1 = channels;
483     uint8_t* channel3 = channels + 2;
484 
485     for (int i = 0; i < num_pixels; ++i, channel1 += kBytesPerPixel, channel3 += kBytesPerPixel) {
486         std::swap(*channel1, *channel3);
487     }
488 }
489 
FindMatch(const std::u32string & query,const int page_start,const int page_stop,TextRange * match)490 bool Page::FindMatch(const std::u32string& query, const int page_start, const int page_stop,
491                      TextRange* match) {
492     if (query.empty()) {
493         return false;
494     }
495 
496     int max_match_start = page_stop - query.length();
497     for (int m = page_start; m <= max_match_start; m++) {
498         if (IsMatch(query, m, page_stop, match)) {
499             return true;
500         }
501     }
502     return false;
503 }
504 
IsMatch(const std::u32string & query,const int match_start,const int page_stop,TextRange * match)505 bool Page::IsMatch(const std::u32string& query, const int match_start, const int page_stop,
506                    TextRange* match) {
507     int page_index = match_start;
508     size_t query_index = 0;
509     uint32_t page_char = 0, prev_char = 0;
510     while (query_index < query.length()) {
511         prev_char = page_char;
512         page_char = GetUnicode(page_index);
513 
514         if (NormalizeForSearch(page_char) == query[query_index]) {
515             // This codepoint matches (ignoring case and accents). Move to next.
516             query_index++;
517             page_index++;
518         } else if (IsSkippableForSearch(page_char, prev_char) && query_index > 0) {
519             // Don't increment query index - skip over skippable character.
520             page_index++;
521             if ((page_stop - page_index) < (query.length() - query_index)) {
522                 return false;  // Not enough room for query string before page_stop.
523             }
524         } else {
525             return false;
526         }
527     }
528     // Update match to contain page indices of match start and match stop.
529     match->first = match_start;
530     match->second = page_index;
531     return true;
532 }
533 
GetBoundaryAtIndex(const int index)534 SelectionBoundary Page::GetBoundaryAtIndex(const int index) {
535     return GetBoundaryAtIndex(index, IsRtlAtIndex(index));
536 }
537 
IsRtlAtIndex(const int index)538 bool Page::IsRtlAtIndex(const int index) {
539     int start_index = GetWordStartIndex(index);
540     int stop_index = GetWordStopIndex(index);
541     int word_length = stop_index - start_index;
542     if (word_length <= 1) {
543         // Can't tell directionality from a single character, guess LTR.
544         return false;
545     }
546     Rectangle_i start_bounds = GetCharBounds(start_index);
547     Rectangle_i stop_bounds = GetCharBounds(stop_index - 1);
548     return start_bounds.Center().x > stop_bounds.Center().x;
549 }
550 
GetBoundaryAtIndex(const int index,bool is_rtl)551 SelectionBoundary Page::GetBoundaryAtIndex(const int index, bool is_rtl) {
552     // Normally we align the boundary on the start edge of next character:
553     int char_index = index;
554     bool use_end_edge = false;
555 
556     // Printable characters have well defined bounding boxes, word-breaks (spaces
557     // and newlines) may not - so we use the end edge of the previous printable
558     // character instead if the next character is not printable.
559     if (index == NumChars() || IsWordBreak(GetUnicode(index))) {
560         char_index = index - 1;
561         use_end_edge = true;
562     }
563     bool use_right_edge = use_end_edge ^ is_rtl;
564 
565     SelectionBoundary boundary(index, 0, 0, is_rtl);
566     Rectangle_i char_bounds = GetCharBounds(char_index);
567     boundary.point.x = use_right_edge ? char_bounds.right : char_bounds.left;
568     // Use the baseline (not the bottom) of the char as the y-value.
569     boundary.point.y = GetCharOrigin(char_index).y;
570     return boundary;
571 }
572 
GetBoundaryAtPoint(const Point_i & point)573 SelectionBoundary Page::GetBoundaryAtPoint(const Point_i& point) {
574     SelectionBoundary best_boundary(0, point.x, point.y, false);
575     int best_distance_sq = std::numeric_limits<int>::max();
576 
577     bool prev_char_is_word_char = false;
578     bool is_rtl = false;
579     for (int index = first_printable_char_index(); index <= last_printable_char_index() + 1;
580          index++) {
581         bool cur_char_is_word_char =
582                 (index <= last_printable_char_index()) && !IsWordBreak(GetUnicode(index));
583         // Starting a new word:
584         if (cur_char_is_word_char && !prev_char_is_word_char) {
585             // Finding out RTL involves looking at each end of the word,
586             // so we only do it at the start of each word:
587             is_rtl = IsRtlAtIndex(index);
588         }
589         if (cur_char_is_word_char || prev_char_is_word_char) {
590             SelectionBoundary boundary = GetBoundaryAtIndex(index, is_rtl);
591             int dx = boundary.point.x - point.x;
592             int dy = boundary.point.y - point.y;
593             int distance_sq = dx * dx + dy * dy;
594             if (distance_sq < best_distance_sq) {
595                 best_boundary = boundary;
596                 best_distance_sq = distance_sq;
597             }
598         }
599         prev_char_is_word_char = cur_char_is_word_char;
600     }
601     return best_boundary;
602 }
603 
GetWordStartIndex(const int index)604 int Page::GetWordStartIndex(const int index) {
605     int start_index = index;
606     while (start_index > 0 && !IsWordBreak(GetUnicode(start_index - 1))) {
607         --start_index;  // Move start_index to the start of the word.
608     }
609     return start_index;
610 }
611 
GetWordStopIndex(const int index)612 int Page::GetWordStopIndex(const int index) {
613     int stop_index = index;
614     int num_chars = NumChars();
615     while (stop_index < num_chars && !IsWordBreak(GetUnicode(stop_index))) {
616         ++stop_index;  // Move stop_index to the end of the word.
617     }
618     return stop_index;
619 }
620 
GetRawCharBounds(const int char_index)621 Rectangle_d Page::GetRawCharBounds(const int char_index) {
622     double x1, x2, y1, y2;
623     FPDFText_GetCharBox(text_page(), char_index, &x1, &x2, &y1, &y2);
624     return DoubleRect(x1, y1, x2, y2);
625 }
626 
GetCharBounds(const int char_index)627 Rectangle_i Page::GetCharBounds(const int char_index) {
628     return ApplyPageTransform(GetRawCharBounds(char_index));
629 }
630 
GetCharOrigin(const int char_index)631 Point_i Page::GetCharOrigin(const int char_index) {
632     double x = 0.0, y = 0.0;
633     FPDFText_GetCharOrigin(text_page(), char_index, &x, &y);
634     return ApplyPageTransform(DoublePoint(x, y));
635 }
636 
GetAnnotatedLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const637 int Page::GetAnnotatedLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
638                                 vector<std::string>* urls) const {
639     FPDF_LINK link = nullptr;
640     int pos = 0;
641     int num_links_with_rect = 0;
642     while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
643         if (!IsUrlLink(link)) {
644             continue;
645         }
646 
647         std::string url = GetUrlUtf8(link);
648         Rectangle_i rect = GetRect(link);
649         if (IsEmpty(rect)) {
650             continue;
651         }
652 
653         link_to_rect->push_back(rects->size());
654         rects->push_back(rect);
655         urls->push_back(url);
656         num_links_with_rect++;
657     }
658     return num_links_with_rect;
659 }
660 
GetInferredLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const661 int Page::GetInferredLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
662                                vector<std::string>* urls) const {
663     // TODO(b/312730882): Infer links by looking for http:// and similar and for
664     // email addresses to use as mailto: links. There are some pdfClient methods for
665     // doing this, but these have some bugs which need patching or working around.
666     return 0;
667 }
668 
GetUrlUtf8(FPDF_LINK link) const669 std::string Page::GetUrlUtf8(FPDF_LINK link) const {
670     FPDF_ACTION action = FPDFLink_GetAction(link);
671     // Allocate a string big enough to hold the URL.
672     std::string url(FPDFAction_GetURIPath(document_, action, nullptr, 0), '\0');
673     // Then write the URL to it.
674     FPDFAction_GetURIPath(document_, action, &url[0], url.length());
675     EraseTrailingNulls(&url);
676     return url;
677 }
678 
GetRect(FPDF_LINK link) const679 Rectangle_i Page::GetRect(FPDF_LINK link) const {
680     FS_RECTF r;
681     if (!FPDFLink_GetAnnotRect(link, &r)) {
682         return Rectangle_i();
683     }
684 
685     Rectangle_d rect_d = DoubleRect(r.left, r.top, r.right, r.bottom);
686     return ApplyPageTransform(rect_d);
687 }
688 
IsGotoLink(FPDF_LINK link) const689 bool Page::IsGotoLink(FPDF_LINK link) const {
690     FPDF_ACTION action = FPDFLink_GetAction(link);
691     return action != nullptr && FPDFAction_GetType(action) == PDFACTION_GOTO;
692 }
693 
IsUrlLink(FPDF_LINK link) const694 bool Page::IsUrlLink(FPDF_LINK link) const {
695     FPDF_ACTION action = FPDFLink_GetAction(link);
696     return action != nullptr && FPDFAction_GetType(action) == PDFACTION_URI;
697 }
698 
699 }  // namespace pdfClient