1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "page.h"
18
19 #include <stddef.h>
20 #include <stdint.h>
21
22 #include <algorithm>
23 #include <limits>
24 #include <span>
25 #include <string>
26 #include <vector>
27
28 #include "cpp/fpdf_scopers.h"
29 #include "form_filler.h"
30 #include "form_widget_info.h"
31 #include "fpdf_annot.h"
32 #include "fpdf_doc.h"
33 #include "fpdf_text.h"
34 #include "fpdfview.h"
35 #include "logging.h"
36 #include "normalize.h"
37 #include "rect.h"
38 #include "utf.h"
39 #include "utils/annot_hider.h"
40 #include "utils/text.h"
41
42 #define LOG_TAG "page"
43
44 using std::vector;
45
46 namespace pdfClient {
47
48 static const int kBytesPerPixel = 4;
49
50 static const Rectangle_i kEmptyIntRectangle = IntRect(0, 0, 0, 0);
51
52 // The acceptable fatness / inaccuracy of a user's finger in points.
53 static const int kFingerTolerance = 10;
54
55 static const int RENDER_MODE_FOR_DISPLAY = 1;
56 static const int RENDER_MODE_FOR_PRINT = 2;
57
Page(FPDF_DOCUMENT doc,int page_num,FormFiller * form_filler)58 Page::Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler)
59 : document_(doc),
60 page_(FPDF_LoadPage(doc, page_num)),
61 form_filler_(form_filler),
62 invalid_rect_(kEmptyIntRectangle),
63 page_num_(page_num) {}
64
65 Page::Page(Page&& p) = default;
66
~Page()67 Page::~Page() {}
68
Width() const69 int Page::Width() const {
70 return FPDF_GetPageWidth(page_.get());
71 }
72
Height() const73 int Page::Height() const {
74 return FPDF_GetPageHeight(page_.get());
75 }
76
Dimensions() const77 Rectangle_i Page::Dimensions() const {
78 return IntRect(0, 0, Width(), Height());
79 }
80
Render(FPDF_BITMAP bitmap,FS_MATRIX transform,int clip_left,int clip_top,int clip_right,int clip_bottom,int render_mode,int show_annot_types,bool render_form_fields)81 void Page::Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top,
82 int clip_right, int clip_bottom, int render_mode, int show_annot_types,
83 bool render_form_fields) {
84 std::unordered_set<int> types;
85 for (auto renderFlag_annot : renderFlagsAnnotsMap) {
86 if ((renderFlag_annot.first & show_annot_types) != 0) {
87 for (int annot_type : renderFlag_annot.second) {
88 types.insert(annot_type);
89 }
90 }
91 }
92 if (render_form_fields) types.insert(FPDF_ANNOT_WIDGET);
93 pdfClient_utils::AnnotHider annot_hider(page_.get(), types);
94 int renderFlags = FPDF_REVERSE_BYTE_ORDER;
95 if (render_mode == RENDER_MODE_FOR_DISPLAY) {
96 renderFlags |= FPDF_LCD_TEXT | FPDF_ANNOT;
97 } else if (render_mode == RENDER_MODE_FOR_PRINT) {
98 renderFlags |= FPDF_PRINTING;
99 }
100
101 FS_RECTF clip = {(float)clip_left, (float)clip_top, (float)clip_right, (float)clip_bottom};
102 FPDF_RenderPageBitmapWithMatrix(bitmap, page_.get(), &transform, &clip, renderFlags);
103
104 if (render_form_fields) {
105 form_filler_->RenderTile(page_.get(), bitmap, transform, clip, renderFlags);
106 }
107 }
108
ApplyPageTransform(const Point_d & input) const109 Point_i Page::ApplyPageTransform(const Point_d& input) const {
110 Point_i output;
111 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
112 &output.y);
113 return output;
114 }
115
ApplyPageTransform(const Rectangle_d & input) const116 Rectangle_i Page::ApplyPageTransform(const Rectangle_d& input) const {
117 return ApplyPageTransform(OuterIntRect(input));
118 }
119
ApplyPageTransform(const Rectangle_i & input) const120 Rectangle_i Page::ApplyPageTransform(const Rectangle_i& input) const {
121 Point_i output1, output2;
122 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.left, input.top, &output1.x,
123 &output1.y);
124 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.right, input.bottom,
125 &output2.x, &output2.y);
126
127 Rectangle_i output = IntRect(output1, output2);
128 // Constrain output within the page.
129 output = Intersect(output, Dimensions());
130 return output;
131 }
132
UnapplyPageTransform(const Point_i & input) const133 Point_d Page::UnapplyPageTransform(const Point_i& input) const {
134 Point_d output;
135 FPDF_DeviceToPage(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
136 &output.y);
137 return output;
138 }
139
NumChars()140 int Page::NumChars() {
141 return FPDFText_CountChars(text_page());
142 }
143
GetUnicode(int char_index)144 uint32_t Page::GetUnicode(int char_index) {
145 return FPDFText_GetUnicode(text_page(), char_index);
146 }
147
GetTextUtf8()148 std::string Page::GetTextUtf8() {
149 return GetTextUtf8(first_printable_char_index(), last_printable_char_index() + 1);
150 }
151
GetTextUtf8(const int start_index,const int stop_index)152 std::string Page::GetTextUtf8(const int start_index, const int stop_index) {
153 std::string result;
154 for (int i = start_index; i < stop_index; i++) {
155 AppendpdfClientCodepointAsUtf8(GetUnicode(i), &result);
156 }
157 return result;
158 }
159
GetAltTextUtf8(vector<std::string> * result) const160 void Page::GetAltTextUtf8(vector<std::string>* result) const {
161 ::pdfClient_utils::GetAltText(page_.get(), result);
162 }
163
FindMatchesUtf8(std::string_view utf8,vector<TextRange> * matches)164 int Page::FindMatchesUtf8(std::string_view utf8, vector<TextRange>* matches) {
165 std::u32string query(Utf8ToUtf32(utf8));
166 // Normalize characters of string for searching - ignore case and accents.
167 NormalizeStringForSearch(&query);
168 TextRange match;
169 int page_start = first_printable_char_index();
170 int page_stop = last_printable_char_index() + 1;
171 int num_matches = 0;
172 while (FindMatch(query, page_start, page_stop, &match)) {
173 if (matches != nullptr) {
174 matches->push_back(match);
175 }
176 num_matches++;
177 page_start = match.second;
178 }
179 return num_matches;
180 }
181
BoundsOfMatchesUtf8(std::string_view utf8,vector<Rectangle_i> * rects,vector<int> * match_to_rect,vector<int> * char_indexes)182 int Page::BoundsOfMatchesUtf8(std::string_view utf8, vector<Rectangle_i>* rects,
183 vector<int>* match_to_rect, vector<int>* char_indexes) {
184 vector<TextRange> matches;
185 int num_matches = FindMatchesUtf8(utf8, &matches);
186 int num_rects = 0;
187 int num_matches_with_rects = 0;
188 for (int i = 0; i < num_matches; i++) {
189 int start = matches[i].first, stop = matches[i].second;
190 int num_rects_for_match = GetTextBounds(start, stop, rects);
191 if (num_rects_for_match == 0) {
192 continue;
193 }
194 if (match_to_rect != nullptr) {
195 match_to_rect->push_back(num_rects);
196 }
197 if (char_indexes != nullptr) {
198 char_indexes->push_back(start);
199 }
200 num_rects += num_rects_for_match;
201 num_matches_with_rects++;
202 }
203 return num_matches_with_rects;
204 }
205
GetTextBounds(const int start_index,const int stop_index,vector<Rectangle_i> * rects)206 int Page::GetTextBounds(const int start_index, const int stop_index, vector<Rectangle_i>* rects) {
207 int num_rects = 0;
208 Rectangle_d rect = DoubleRect(0, 0, 0, 0);
209 for (int index = start_index; index < stop_index; index++) {
210 double x1, x2, y1, y2;
211 // This call doesn't apply the page transform - have to apply later.
212 FPDFText_GetCharBox(text_page(), index, &x1, &x2, &y1, &y2);
213 if (x1 != x2 && y1 != y2) {
214 if (IsEmpty(rect)) {
215 rect = DoubleRect(x1, y1, x2, y2);
216 } else {
217 rect = Union(rect, DoubleRect(x1, y1, x2, y2));
218 }
219 }
220 // Starting a new line - push current rect, start a new rect.
221 if (IsLineBreak(GetUnicode(index))) {
222 if (!IsEmpty(rect)) {
223 num_rects++;
224 rects->push_back(ApplyPageTransform(rect));
225 }
226 rect = DoubleRect(0, 0, 0, 0);
227 }
228 }
229 // Push the last current rect.
230 if (!IsEmpty(rect)) {
231 num_rects++;
232 rects->push_back(ApplyPageTransform(rect));
233 }
234 return num_rects;
235 }
236
SelectWordAt(const Point_i & point,SelectionBoundary * start,SelectionBoundary * stop)237 bool Page::SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop) {
238 Point_d char_point = UnapplyPageTransform(point);
239 int char_index = FPDFText_GetCharIndexAtPos(text_page(), char_point.x, char_point.y,
240 kFingerTolerance, kFingerTolerance);
241 if (char_index < 0 || IsWordBreak(GetUnicode(char_index))) {
242 return false; // No word at the given point to select.
243 }
244 start->index = GetWordStartIndex(char_index);
245 stop->index = GetWordStopIndex(char_index);
246 ConstrainBoundary(start);
247 ConstrainBoundary(stop);
248 return true;
249 }
250
ConstrainBoundary(SelectionBoundary * boundary)251 void Page::ConstrainBoundary(SelectionBoundary* boundary) {
252 if (boundary->index < 0) {
253 // Index is not specified - find the nearest index to the given point.
254 *boundary = GetBoundaryAtPoint(boundary->point);
255 } else {
256 // Index is specified - find the point at that index.
257 int index = std::max(boundary->index, first_printable_char_index());
258 index = std::min(index, last_printable_char_index() + 1);
259 *boundary = GetBoundaryAtIndex(index);
260 }
261 }
262
GetFontSize(int index)263 int Page::GetFontSize(int index) {
264 return FPDFText_GetFontSize(text_page(), index);
265 }
266
GetLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const267 int Page::GetLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
268 vector<std::string>* urls) const {
269 return GetAnnotatedLinksUtf8(rects, link_to_rect, urls) +
270 GetInferredLinksUtf8(rects, link_to_rect, urls);
271 }
272
GetGotoLinks() const273 vector<GotoLink> Page::GetGotoLinks() const {
274 vector<GotoLink> links;
275
276 FPDF_LINK link = nullptr;
277 int pos = 0;
278 while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
279 if (!IsGotoLink(link)) {
280 continue;
281 }
282 // Get the bounds of the actual link
283 vector<Rectangle_i> goto_link_rects;
284 Rectangle_i rect = GetRect(link);
285 goto_link_rects.push_back(rect);
286
287 GotoLinkDest* goto_link_dest = new GotoLinkDest();
288
289 // Get and parse the destination
290 FPDF_DEST fpdf_dest = FPDFLink_GetDest(document_, link);
291 int dest_page_index = FPDFDest_GetDestPageIndex(document_, fpdf_dest);
292 if (dest_page_index < 0) {
293 LOGE("Goto Link has invalid destination page index");
294 continue;
295 }
296 goto_link_dest->set_page_number(dest_page_index);
297
298 FPDF_BOOL has_x_coord;
299 FPDF_BOOL has_y_coord;
300 FPDF_BOOL has_zoom;
301 FS_FLOAT x;
302 FS_FLOAT y;
303 FS_FLOAT zoom;
304 FPDF_BOOL success = FPDFDest_GetLocationInPage(fpdf_dest, &has_x_coord, &has_y_coord,
305 &has_zoom, &x, &y, &zoom);
306
307 if (!success) {
308 continue;
309 }
310 if (has_x_coord) {
311 auto point = DoublePoint(x, 0);
312 auto tPoint = ApplyPageTransform(point);
313 goto_link_dest->set_x(tPoint.x);
314 }
315 if (has_y_coord) {
316 auto point = DoublePoint(0, y);
317 auto tPoint = ApplyPageTransform(point);
318 goto_link_dest->set_y(tPoint.y);
319 }
320 if (has_zoom) {
321 goto_link_dest->set_zoom(zoom);
322 }
323
324 GotoLink goto_link = GotoLink{goto_link_rects, *goto_link_dest};
325
326 // Ensure that links are within page bounds
327 if (goto_link_dest->x >= 0 && goto_link_dest->y >= 0) {
328 links.push_back(goto_link);
329 } else {
330 LOGE("Goto Link out of bound (x=%f, y=%f). Page width=%d, height =%d",
331 goto_link_dest->x, goto_link_dest->y, Width(), Height());
332 }
333 }
334 return links;
335 }
336
InitializeFormFilling()337 void Page::InitializeFormFilling() {
338 form_filler_->NotifyAfterPageLoad(page_.get());
339 }
340
TerminateFormFilling()341 void Page::TerminateFormFilling() {
342 form_filler_->NotifyBeforePageClose(page_.get());
343 }
344
GetFormWidgetInfo(Point_i point)345 FormWidgetInfo Page::GetFormWidgetInfo(Point_i point) {
346 Point_d page_point = UnapplyPageTransform(point);
347 FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), page_point);
348 if (result.FoundWidget()) {
349 // widget_rect is in page coords, transform to device coords before
350 // returning to user.
351 Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
352 result.set_widget_rect(transformed_widget_rect);
353 }
354
355 // Consume any rectangle that was invalidated by this action. Some
356 // info-gathering actions may cause temporary invalidation without
357 // actually doing anything that we need to redraw for.
358 ConsumeInvalidRect();
359 return result;
360 }
361
GetFormWidgetInfo(int annotation_index)362 FormWidgetInfo Page::GetFormWidgetInfo(int annotation_index) {
363 FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), annotation_index);
364 if (result.FoundWidget()) {
365 // widget_rect is in page coords; transform to device coords before
366 // returning to user.
367 Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
368 result.set_widget_rect(transformed_widget_rect);
369 }
370
371 // Consume any rectangle that was invalidated by this action. Some
372 // info-gathering actions may cause temporary invalidation without
373 // actually doing anything that we need to redraw for.
374 ConsumeInvalidRect();
375 return result;
376 }
377
GetFormWidgetInfos(const std::unordered_set<int> & type_ids,std::vector<FormWidgetInfo> * widget_infos)378 void Page::GetFormWidgetInfos(const std::unordered_set<int>& type_ids,
379 std::vector<FormWidgetInfo>* widget_infos) {
380 form_filler_->GetFormWidgetInfos(page_.get(), type_ids, widget_infos);
381 for (FormWidgetInfo& widget_info : *widget_infos) {
382 // widget_rect is in page coords; transform to device coords before
383 // returning to user.
384 Rectangle_i transformed_widget_rect = ApplyPageTransform(widget_info.widget_rect());
385 widget_info.set_widget_rect(transformed_widget_rect);
386 }
387
388 // Consume any rectangles that were invalidated by this action. Some
389 // info-gathering actions may cause temporary invalidation without
390 // actually doing anything that we need to redraw for.
391 ConsumeInvalidRect();
392 }
393
ClickOnPoint(Point_i point)394 bool Page::ClickOnPoint(Point_i point) {
395 Point_d page_point = UnapplyPageTransform(point);
396 return form_filler_->ClickOnPoint(page_.get(), page_point);
397 }
SetFormFieldText(int annotation_index,std::string_view text)398 bool Page::SetFormFieldText(int annotation_index, std::string_view text) {
399 return form_filler_->SetText(page_.get(), annotation_index, text);
400 }
401
SetChoiceSelection(int annotation_index,std::span<const int> selected_indices)402 bool Page::SetChoiceSelection(int annotation_index, std::span<const int> selected_indices) {
403 return form_filler_->SetChoiceSelection(page_.get(), annotation_index, selected_indices);
404 }
NotifyInvalidRect(Rectangle_i rect)405 void Page::NotifyInvalidRect(Rectangle_i rect) {
406 if (rect.left < 0 || rect.top < 0 || rect.right < 0 || rect.bottom < 0 || IsEmpty(rect)) {
407 return;
408 }
409
410 Rectangle_i device_rect = ApplyPageTransform(rect);
411 // If invalid_rect_ is currently empty, avoid unioning so we don't extend
412 // |rect|'s top left corner to (0,0) for no reason.
413 if (IsEmpty(invalid_rect_)) {
414 invalid_rect_ = device_rect;
415 return;
416 }
417
418 invalid_rect_ = Union(invalid_rect_, device_rect);
419 }
420
HasInvalidRect()421 bool Page::HasInvalidRect() {
422 return !IsEmpty(invalid_rect_);
423 }
424
ConsumeInvalidRect()425 Rectangle_i Page::ConsumeInvalidRect() {
426 Rectangle_i copy = invalid_rect_;
427 invalid_rect_ = kEmptyIntRectangle;
428 return copy;
429 }
430
page()431 void* Page::page() {
432 return page_.get();
433 }
434
text_page()435 FPDF_TEXTPAGE Page::text_page() {
436 EnsureTextPageInitialized();
437 return text_page_.get();
438 }
439
first_printable_char_index()440 int Page::first_printable_char_index() {
441 EnsureTextPageInitialized();
442 return first_printable_char_index_;
443 }
444
last_printable_char_index()445 int Page::last_printable_char_index() {
446 EnsureTextPageInitialized();
447 return last_printable_char_index_;
448 }
449
EnsureTextPageInitialized()450 void Page::EnsureTextPageInitialized() {
451 if (text_page_) {
452 return;
453 }
454 if (!page_.get()) {
455 // Page should never be null but a partner has an unexplained bug b/376796346
456 LOGE("Null page (err=%lu). for (page_num=%d)", FPDF_GetLastError(), page_num_);
457 // since the text_page_ would not have a page to load from
458 return;
459 }
460
461 text_page_.reset(FPDFText_LoadPage(page_.get()));
462 if (!text_page_) {
463 // This will get into infinite recursion if not returned - b/376796346
464 LOGE("Failed to load text (err=%lu). for (page_num=%d)", FPDF_GetLastError(), page_num_);
465 return;
466 }
467
468 int num_chars = NumChars();
469
470 int i;
471 for (i = 0; i < num_chars && IsWordBreak(GetUnicode(i)); i++) {
472 }
473 first_printable_char_index_ = i;
474
475 for (i = num_chars - 1; i >= first_printable_char_index_ && IsWordBreak(GetUnicode(i)); i--) {
476 }
477 last_printable_char_index_ = i;
478 }
479
InPlaceSwapRedBlueChannels(void * pixels,const int num_pixels) const480 void Page::InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const {
481 uint8_t* channels = static_cast<uint8_t*>(pixels);
482 uint8_t* channel1 = channels;
483 uint8_t* channel3 = channels + 2;
484
485 for (int i = 0; i < num_pixels; ++i, channel1 += kBytesPerPixel, channel3 += kBytesPerPixel) {
486 std::swap(*channel1, *channel3);
487 }
488 }
489
FindMatch(const std::u32string & query,const int page_start,const int page_stop,TextRange * match)490 bool Page::FindMatch(const std::u32string& query, const int page_start, const int page_stop,
491 TextRange* match) {
492 if (query.empty()) {
493 return false;
494 }
495
496 int max_match_start = page_stop - query.length();
497 for (int m = page_start; m <= max_match_start; m++) {
498 if (IsMatch(query, m, page_stop, match)) {
499 return true;
500 }
501 }
502 return false;
503 }
504
IsMatch(const std::u32string & query,const int match_start,const int page_stop,TextRange * match)505 bool Page::IsMatch(const std::u32string& query, const int match_start, const int page_stop,
506 TextRange* match) {
507 int page_index = match_start;
508 size_t query_index = 0;
509 uint32_t page_char = 0, prev_char = 0;
510 while (query_index < query.length()) {
511 prev_char = page_char;
512 page_char = GetUnicode(page_index);
513
514 if (NormalizeForSearch(page_char) == query[query_index]) {
515 // This codepoint matches (ignoring case and accents). Move to next.
516 query_index++;
517 page_index++;
518 } else if (IsSkippableForSearch(page_char, prev_char) && query_index > 0) {
519 // Don't increment query index - skip over skippable character.
520 page_index++;
521 if ((page_stop - page_index) < (query.length() - query_index)) {
522 return false; // Not enough room for query string before page_stop.
523 }
524 } else {
525 return false;
526 }
527 }
528 // Update match to contain page indices of match start and match stop.
529 match->first = match_start;
530 match->second = page_index;
531 return true;
532 }
533
GetBoundaryAtIndex(const int index)534 SelectionBoundary Page::GetBoundaryAtIndex(const int index) {
535 return GetBoundaryAtIndex(index, IsRtlAtIndex(index));
536 }
537
IsRtlAtIndex(const int index)538 bool Page::IsRtlAtIndex(const int index) {
539 int start_index = GetWordStartIndex(index);
540 int stop_index = GetWordStopIndex(index);
541 int word_length = stop_index - start_index;
542 if (word_length <= 1) {
543 // Can't tell directionality from a single character, guess LTR.
544 return false;
545 }
546 Rectangle_i start_bounds = GetCharBounds(start_index);
547 Rectangle_i stop_bounds = GetCharBounds(stop_index - 1);
548 return start_bounds.Center().x > stop_bounds.Center().x;
549 }
550
GetBoundaryAtIndex(const int index,bool is_rtl)551 SelectionBoundary Page::GetBoundaryAtIndex(const int index, bool is_rtl) {
552 // Normally we align the boundary on the start edge of next character:
553 int char_index = index;
554 bool use_end_edge = false;
555
556 // Printable characters have well defined bounding boxes, word-breaks (spaces
557 // and newlines) may not - so we use the end edge of the previous printable
558 // character instead if the next character is not printable.
559 if (index == NumChars() || IsWordBreak(GetUnicode(index))) {
560 char_index = index - 1;
561 use_end_edge = true;
562 }
563 bool use_right_edge = use_end_edge ^ is_rtl;
564
565 SelectionBoundary boundary(index, 0, 0, is_rtl);
566 Rectangle_i char_bounds = GetCharBounds(char_index);
567 boundary.point.x = use_right_edge ? char_bounds.right : char_bounds.left;
568 // Use the baseline (not the bottom) of the char as the y-value.
569 boundary.point.y = GetCharOrigin(char_index).y;
570 return boundary;
571 }
572
GetBoundaryAtPoint(const Point_i & point)573 SelectionBoundary Page::GetBoundaryAtPoint(const Point_i& point) {
574 SelectionBoundary best_boundary(0, point.x, point.y, false);
575 int best_distance_sq = std::numeric_limits<int>::max();
576
577 bool prev_char_is_word_char = false;
578 bool is_rtl = false;
579 for (int index = first_printable_char_index(); index <= last_printable_char_index() + 1;
580 index++) {
581 bool cur_char_is_word_char =
582 (index <= last_printable_char_index()) && !IsWordBreak(GetUnicode(index));
583 // Starting a new word:
584 if (cur_char_is_word_char && !prev_char_is_word_char) {
585 // Finding out RTL involves looking at each end of the word,
586 // so we only do it at the start of each word:
587 is_rtl = IsRtlAtIndex(index);
588 }
589 if (cur_char_is_word_char || prev_char_is_word_char) {
590 SelectionBoundary boundary = GetBoundaryAtIndex(index, is_rtl);
591 int dx = boundary.point.x - point.x;
592 int dy = boundary.point.y - point.y;
593 int distance_sq = dx * dx + dy * dy;
594 if (distance_sq < best_distance_sq) {
595 best_boundary = boundary;
596 best_distance_sq = distance_sq;
597 }
598 }
599 prev_char_is_word_char = cur_char_is_word_char;
600 }
601 return best_boundary;
602 }
603
GetWordStartIndex(const int index)604 int Page::GetWordStartIndex(const int index) {
605 int start_index = index;
606 while (start_index > 0 && !IsWordBreak(GetUnicode(start_index - 1))) {
607 --start_index; // Move start_index to the start of the word.
608 }
609 return start_index;
610 }
611
GetWordStopIndex(const int index)612 int Page::GetWordStopIndex(const int index) {
613 int stop_index = index;
614 int num_chars = NumChars();
615 while (stop_index < num_chars && !IsWordBreak(GetUnicode(stop_index))) {
616 ++stop_index; // Move stop_index to the end of the word.
617 }
618 return stop_index;
619 }
620
GetRawCharBounds(const int char_index)621 Rectangle_d Page::GetRawCharBounds(const int char_index) {
622 double x1, x2, y1, y2;
623 FPDFText_GetCharBox(text_page(), char_index, &x1, &x2, &y1, &y2);
624 return DoubleRect(x1, y1, x2, y2);
625 }
626
GetCharBounds(const int char_index)627 Rectangle_i Page::GetCharBounds(const int char_index) {
628 return ApplyPageTransform(GetRawCharBounds(char_index));
629 }
630
GetCharOrigin(const int char_index)631 Point_i Page::GetCharOrigin(const int char_index) {
632 double x = 0.0, y = 0.0;
633 FPDFText_GetCharOrigin(text_page(), char_index, &x, &y);
634 return ApplyPageTransform(DoublePoint(x, y));
635 }
636
GetAnnotatedLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const637 int Page::GetAnnotatedLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
638 vector<std::string>* urls) const {
639 FPDF_LINK link = nullptr;
640 int pos = 0;
641 int num_links_with_rect = 0;
642 while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
643 if (!IsUrlLink(link)) {
644 continue;
645 }
646
647 std::string url = GetUrlUtf8(link);
648 Rectangle_i rect = GetRect(link);
649 if (IsEmpty(rect)) {
650 continue;
651 }
652
653 link_to_rect->push_back(rects->size());
654 rects->push_back(rect);
655 urls->push_back(url);
656 num_links_with_rect++;
657 }
658 return num_links_with_rect;
659 }
660
GetInferredLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const661 int Page::GetInferredLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
662 vector<std::string>* urls) const {
663 // TODO(b/312730882): Infer links by looking for http:// and similar and for
664 // email addresses to use as mailto: links. There are some pdfClient methods for
665 // doing this, but these have some bugs which need patching or working around.
666 return 0;
667 }
668
GetUrlUtf8(FPDF_LINK link) const669 std::string Page::GetUrlUtf8(FPDF_LINK link) const {
670 FPDF_ACTION action = FPDFLink_GetAction(link);
671 // Allocate a string big enough to hold the URL.
672 std::string url(FPDFAction_GetURIPath(document_, action, nullptr, 0), '\0');
673 // Then write the URL to it.
674 FPDFAction_GetURIPath(document_, action, &url[0], url.length());
675 EraseTrailingNulls(&url);
676 return url;
677 }
678
GetRect(FPDF_LINK link) const679 Rectangle_i Page::GetRect(FPDF_LINK link) const {
680 FS_RECTF r;
681 if (!FPDFLink_GetAnnotRect(link, &r)) {
682 return Rectangle_i();
683 }
684
685 Rectangle_d rect_d = DoubleRect(r.left, r.top, r.right, r.bottom);
686 return ApplyPageTransform(rect_d);
687 }
688
IsGotoLink(FPDF_LINK link) const689 bool Page::IsGotoLink(FPDF_LINK link) const {
690 FPDF_ACTION action = FPDFLink_GetAction(link);
691 return action != nullptr && FPDFAction_GetType(action) == PDFACTION_GOTO;
692 }
693
IsUrlLink(FPDF_LINK link) const694 bool Page::IsUrlLink(FPDF_LINK link) const {
695 FPDF_ACTION action = FPDFLink_GetAction(link);
696 return action != nullptr && FPDFAction_GetType(action) == PDFACTION_URI;
697 }
698
699 } // namespace pdfClient