xref: /aosp_15_r20/external/fmtlib/test/scan.h (revision 5c90c05cd622c0a81b57953a4d343e0e489f2e08)
1 // Formatting library for C++ - scanning API proof of concept
2 //
3 // Copyright (c) 2019 - present, Victor Zverovich
4 // All rights reserved.
5 //
6 // For the license information refer to format.h.
7 
8 #include <array>
9 #include <cassert>
10 #include <climits>
11 #include <tuple>
12 
13 #include "fmt/format-inl.h"
14 
15 FMT_BEGIN_NAMESPACE
16 namespace detail {
17 
18 inline auto is_whitespace(char c) -> bool { return c == ' ' || c == '\n'; }
19 
20 // If c is a hex digit returns its numeric value, otherwise -1.
21 inline auto to_hex_digit(char c) -> int {
22   if (c >= '0' && c <= '9') return c - '0';
23   if (c >= 'a' && c <= 'f') return c - 'a' + 10;
24   if (c >= 'A' && c <= 'F') return c - 'A' + 10;
25   return -1;
26 }
27 
28 struct maybe_contiguous_range {
29   const char* begin;
30   const char* end;
31 
32   explicit operator bool() const { return begin != nullptr; }
33 };
34 
35 class scan_buffer {
36  private:
37   const char* ptr_;
38   const char* end_;
39   bool contiguous_;
40 
41  protected:
scan_buffer(const char * ptr,const char * end,bool contiguous)42   scan_buffer(const char* ptr, const char* end, bool contiguous)
43       : ptr_(ptr), end_(end), contiguous_(contiguous) {}
44   ~scan_buffer() = default;
45 
set(span<const char> buf)46   void set(span<const char> buf) {
47     ptr_ = buf.data;
48     end_ = buf.data + buf.size;
49   }
50 
51   auto ptr() const -> const char* { return ptr_; }
52 
53  public:
54   scan_buffer(const scan_buffer&) = delete;
55   void operator=(const scan_buffer&) = delete;
56 
57   // Fills the buffer with more input if available.
58   virtual void consume() = 0;
59 
60   class sentinel {};
61 
62   class iterator {
63    private:
64     const char** ptr_;
65     scan_buffer* buf_;  // This could be merged with ptr_.
66     char value_;
67 
68     static auto get_sentinel() -> const char** {
69       static const char* ptr = nullptr;
70       return &ptr;
71     }
72 
73     friend class scan_buffer;
74 
75     friend auto operator==(iterator lhs, sentinel) -> bool {
76       return *lhs.ptr_ == nullptr;
77     }
78     friend auto operator!=(iterator lhs, sentinel) -> bool {
79       return *lhs.ptr_ != nullptr;
80     }
81 
iterator(scan_buffer * buf)82     iterator(scan_buffer* buf) : buf_(buf) {
83       if (buf->ptr_ == buf->end_) {
84         ptr_ = get_sentinel();
85         return;
86       }
87       ptr_ = &buf->ptr_;
88       value_ = *buf->ptr_;
89     }
90 
get_buffer(iterator it)91     friend scan_buffer& get_buffer(iterator it) { return *it.buf_; }
92 
93    public:
iterator()94     iterator() : ptr_(get_sentinel()), buf_(nullptr) {}
95 
96     auto operator++() -> iterator& {
97       if (!buf_->try_consume()) ptr_ = get_sentinel();
98       value_ = *buf_->ptr_;
99       return *this;
100     }
101     auto operator++(int) -> iterator {
102       iterator copy = *this;
103       ++*this;
104       return copy;
105     }
106     auto operator*() const -> char { return value_; }
107 
108     auto base() const -> const char* { return buf_->ptr_; }
109 
110     friend auto to_contiguous(iterator it) -> maybe_contiguous_range;
111     friend auto advance(iterator it, size_t n) -> iterator;
112   };
113 
114   friend auto to_contiguous(iterator it) -> maybe_contiguous_range {
115     if (it.buf_->is_contiguous()) return {it.buf_->ptr_, it.buf_->end_};
116     return {nullptr, nullptr};
117   }
118   friend auto advance(iterator it, size_t n) -> iterator {
119     FMT_ASSERT(it.buf_->is_contiguous(), "");
120     const char*& ptr = it.buf_->ptr_;
121     ptr += n;
122     it.value_ = *ptr;
123     if (ptr == it.buf_->end_) it.ptr_ = iterator::get_sentinel();
124     return it;
125   }
126 
127   auto begin() -> iterator { return this; }
128   auto end() -> sentinel { return {}; }
129 
130   auto is_contiguous() const -> bool { return contiguous_; }
131 
132   // Tries consuming a single code unit. Returns true iff there is more input.
133   auto try_consume() -> bool {
134     FMT_ASSERT(ptr_ != end_, "");
135     ++ptr_;
136     if (ptr_ != end_) return true;
137     consume();
138     return ptr_ != end_;
139   }
140 };
141 
142 using scan_iterator = scan_buffer::iterator;
143 using scan_sentinel = scan_buffer::sentinel;
144 
145 class string_scan_buffer final : public scan_buffer {
146  private:
consume()147   void consume() override {}
148 
149  public:
string_scan_buffer(string_view s)150   explicit string_scan_buffer(string_view s)
151       : scan_buffer(s.begin(), s.end(), true) {}
152 };
153 
154 class file_scan_buffer final : public scan_buffer {
155  private:
156   template <typename F, FMT_ENABLE_IF(sizeof(F::_IO_read_ptr) != 0 &&
157                                       !FMT_USE_FALLBACK_FILE)>
158   static auto get_file(F* f, int) -> glibc_file<F> {
159     return f;
160   }
161   template <typename F,
162             FMT_ENABLE_IF(sizeof(F::_p) != 0 && !FMT_USE_FALLBACK_FILE)>
163   static auto get_file(F* f, int) -> apple_file<F> {
164     return f;
165   }
166   static auto get_file(FILE* f, ...) -> fallback_file<FILE> { return f; }
167 
168   decltype(get_file(static_cast<FILE*>(nullptr), 0)) file_;
169 
170   // Fills the buffer if it is empty.
fill()171   void fill() {
172     span<const char> buf = file_.get_read_buffer();
173     if (buf.size == 0) {
174       int c = file_.get();
175       // Put the character back since we are only filling the buffer.
176       if (c != EOF) file_.unget(static_cast<char>(c));
177       buf = file_.get_read_buffer();
178     }
179     set(buf);
180   }
181 
consume()182   void consume() override {
183     // Consume the current buffer content.
184     size_t n = to_unsigned(ptr() - file_.get_read_buffer().data);
185     for (size_t i = 0; i != n; ++i) file_.get();
186     fill();
187   }
188 
189  public:
file_scan_buffer(FILE * f)190   explicit file_scan_buffer(FILE* f)
191       : scan_buffer(nullptr, nullptr, false), file_(f) {
192     flockfile(f);
193     fill();
194   }
~file_scan_buffer()195   ~file_scan_buffer() {
196     FILE* f = file_;
197     funlockfile(f);
198   }
199 };
200 }  // namespace detail
201 
202 template <typename T, typename Char = char> struct scanner {
203   // A deleted default constructor indicates a disabled scanner.
204   scanner() = delete;
205 };
206 
207 class scan_parse_context {
208  private:
209   string_view format_;
210 
211  public:
212   using iterator = string_view::iterator;
213 
scan_parse_context(string_view format)214   explicit FMT_CONSTEXPR scan_parse_context(string_view format)
215       : format_(format) {}
216 
217   FMT_CONSTEXPR auto begin() const -> iterator { return format_.begin(); }
218   FMT_CONSTEXPR auto end() const -> iterator { return format_.end(); }
219 
advance_to(iterator it)220   void advance_to(iterator it) {
221     format_.remove_prefix(detail::to_unsigned(it - begin()));
222   }
223 };
224 
225 namespace detail {
226 enum class scan_type {
227   none_type,
228   int_type,
229   uint_type,
230   long_long_type,
231   ulong_long_type,
232   string_type,
233   string_view_type,
234   custom_type
235 };
236 
237 template <typename Context> struct custom_scan_arg {
238   void* value;
239   void (*scan)(void* arg, scan_parse_context& parse_ctx, Context& ctx);
240 };
241 }  // namespace detail
242 
243 // A scan argument. Context is a template parameter for the compiled API where
244 // output can be unbuffered.
245 template <typename Context> class basic_scan_arg {
246  private:
247   using scan_type = detail::scan_type;
248   scan_type type_;
249   union {
250     int* int_value_;
251     unsigned* uint_value_;
252     long long* long_long_value_;
253     unsigned long long* ulong_long_value_;
254     std::string* string_;
255     string_view* string_view_;
256     detail::custom_scan_arg<Context> custom_;
257     // TODO: more types
258   };
259 
260   template <typename T>
scan_custom_arg(void * arg,scan_parse_context & parse_ctx,Context & ctx)261   static void scan_custom_arg(void* arg, scan_parse_context& parse_ctx,
262                               Context& ctx) {
263     auto s = scanner<T>();
264     parse_ctx.advance_to(s.parse(parse_ctx));
265     ctx.advance_to(s.scan(*static_cast<T*>(arg), ctx));
266   }
267 
268  public:
basic_scan_arg()269   FMT_CONSTEXPR basic_scan_arg()
270       : type_(scan_type::none_type), int_value_(nullptr) {}
basic_scan_arg(int & value)271   FMT_CONSTEXPR basic_scan_arg(int& value)
272       : type_(scan_type::int_type), int_value_(&value) {}
basic_scan_arg(unsigned & value)273   FMT_CONSTEXPR basic_scan_arg(unsigned& value)
274       : type_(scan_type::uint_type), uint_value_(&value) {}
basic_scan_arg(long long & value)275   FMT_CONSTEXPR basic_scan_arg(long long& value)
276       : type_(scan_type::long_long_type), long_long_value_(&value) {}
basic_scan_arg(unsigned long long & value)277   FMT_CONSTEXPR basic_scan_arg(unsigned long long& value)
278       : type_(scan_type::ulong_long_type), ulong_long_value_(&value) {}
basic_scan_arg(std::string & value)279   FMT_CONSTEXPR basic_scan_arg(std::string& value)
280       : type_(scan_type::string_type), string_(&value) {}
basic_scan_arg(string_view & value)281   FMT_CONSTEXPR basic_scan_arg(string_view& value)
282       : type_(scan_type::string_view_type), string_view_(&value) {}
283   template <typename T>
basic_scan_arg(T & value)284   FMT_CONSTEXPR basic_scan_arg(T& value) : type_(scan_type::custom_type) {
285     custom_.value = &value;
286     custom_.scan = scan_custom_arg<T>;
287   }
288 
289   constexpr explicit operator bool() const noexcept {
290     return type_ != scan_type::none_type;
291   }
292 
293   auto type() const -> detail::scan_type { return type_; }
294 
295   template <typename Visitor>
296   auto visit(Visitor&& vis) -> decltype(vis(monostate())) {
297     switch (type_) {
298     case scan_type::none_type:
299       break;
300     case scan_type::int_type:
301       return vis(*int_value_);
302     case scan_type::uint_type:
303       return vis(*uint_value_);
304     case scan_type::long_long_type:
305       return vis(*long_long_value_);
306     case scan_type::ulong_long_type:
307       return vis(*ulong_long_value_);
308     case scan_type::string_type:
309       return vis(*string_);
310     case scan_type::string_view_type:
311       return vis(*string_view_);
312     case scan_type::custom_type:
313       break;
314     }
315     return vis(monostate());
316   }
317 
318   auto scan_custom(const char* parse_begin, scan_parse_context& parse_ctx,
319                    Context& ctx) const -> bool {
320     if (type_ != scan_type::custom_type) return false;
321     parse_ctx.advance_to(parse_begin);
322     custom_.scan(custom_.value, parse_ctx, ctx);
323     return true;
324   }
325 };
326 
327 class scan_context;
328 using scan_arg = basic_scan_arg<scan_context>;
329 
330 struct scan_args {
331   int size;
332   const scan_arg* data;
333 
334   template <size_t N>
scan_argsscan_args335   FMT_CONSTEXPR scan_args(const std::array<scan_arg, N>& store)
336       : size(N), data(store.data()) {
337     static_assert(N < INT_MAX, "too many arguments");
338   }
339 };
340 
341 class scan_context {
342  private:
343   detail::scan_buffer& buf_;
344   scan_args args_;
345 
346  public:
347   using iterator = detail::scan_iterator;
348   using sentinel = detail::scan_sentinel;
349 
scan_context(detail::scan_buffer & buf,scan_args args)350   explicit FMT_CONSTEXPR scan_context(detail::scan_buffer& buf, scan_args args)
351       : buf_(buf), args_(args) {}
352 
353   FMT_CONSTEXPR auto arg(int id) const -> scan_arg {
354     return id < args_.size ? args_.data[id] : scan_arg();
355   }
356 
357   auto begin() const -> iterator { return buf_.begin(); }
358   auto end() const -> sentinel { return {}; }
359 
advance_to(iterator)360   void advance_to(iterator) { buf_.consume(); }
361 };
362 
363 namespace detail {
364 
parse_scan_specs(const char * begin,const char * end,format_specs & specs,scan_type)365 const char* parse_scan_specs(const char* begin, const char* end,
366                              format_specs& specs, scan_type) {
367   while (begin != end) {
368     switch (to_ascii(*begin)) {
369     // TODO: parse more scan format specifiers
370     case 'x':
371       specs.set_type(presentation_type::hex);
372       ++begin;
373       break;
374     case '}':
375       return begin;
376     }
377   }
378   return begin;
379 }
380 
381 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
382 auto read(scan_iterator it, T& value) -> scan_iterator {
383   if (it == scan_sentinel()) return it;
384   char c = *it;
385   if (c < '0' || c > '9') report_error("invalid input");
386 
387   int num_digits = 0;
388   T n = 0, prev = 0;
389   char prev_digit = c;
390   do {
391     prev = n;
392     n = n * 10 + static_cast<unsigned>(c - '0');
393     prev_digit = c;
394     c = *++it;
395     ++num_digits;
396     if (c < '0' || c > '9') break;
397   } while (it != scan_sentinel());
398 
399   // Check overflow.
400   if (num_digits <= std::numeric_limits<int>::digits10) {
401     value = n;
402     return it;
403   }
404   unsigned max = to_unsigned((std::numeric_limits<int>::max)());
405   if (num_digits == std::numeric_limits<int>::digits10 + 1 &&
406       prev * 10ull + unsigned(prev_digit - '0') <= max) {
407     value = n;
408   } else {
409     report_error("number is too big");
410   }
411   return it;
412 }
413 
414 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
415 auto read_hex(scan_iterator it, T& value) -> scan_iterator {
416   if (it == scan_sentinel()) return it;
417   int digit = to_hex_digit(*it);
418   if (digit < 0) report_error("invalid input");
419 
420   int num_digits = 0;
421   T n = 0;
422   do {
423     n = (n << 4) + static_cast<unsigned>(digit);
424     ++num_digits;
425     digit = to_hex_digit(*++it);
426     if (digit < 0) break;
427   } while (it != scan_sentinel());
428 
429   // Check overflow.
430   if (num_digits <= (std::numeric_limits<T>::digits >> 2))
431     value = n;
432   else
433     report_error("number is too big");
434   return it;
435 }
436 
437 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
438 auto read(scan_iterator it, T& value, const format_specs& specs)
439     -> scan_iterator {
440   if (specs.type() == presentation_type::hex) return read_hex(it, value);
441   return read(it, value);
442 }
443 
444 template <typename T, FMT_ENABLE_IF(std::is_signed<T>::value)>
445 auto read(scan_iterator it, T& value, const format_specs& specs = {})
446     -> scan_iterator {
447   bool negative = it != scan_sentinel() && *it == '-';
448   if (negative) {
449     ++it;
450     if (it == scan_sentinel()) report_error("invalid input");
451   }
452   using unsigned_type = typename std::make_unsigned<T>::type;
453   unsigned_type abs_value = 0;
454   it = read(it, abs_value, specs);
455   auto n = static_cast<T>(abs_value);
456   value = negative ? -n : n;
457   return it;
458 }
459 
460 auto read(scan_iterator it, std::string& value, const format_specs& = {})
461     -> scan_iterator {
462   while (it != scan_sentinel() && *it != ' ') value.push_back(*it++);
463   return it;
464 }
465 
466 auto read(scan_iterator it, string_view& value, const format_specs& = {})
467     -> scan_iterator {
468   auto range = to_contiguous(it);
469   // This could also be checked at compile time in scan.
470   if (!range) report_error("string_view requires contiguous input");
471   auto p = range.begin;
472   while (p != range.end && *p != ' ') ++p;
473   size_t size = to_unsigned(p - range.begin);
474   value = {range.begin, size};
475   return advance(it, size);
476 }
477 
478 auto read(scan_iterator it, monostate, const format_specs& = {})
479     -> scan_iterator {
480   return it;
481 }
482 
483 // An argument scanner that uses the default format, e.g. decimal for integers.
484 struct default_arg_scanner {
485   scan_iterator it;
486 
487   template <typename T> FMT_INLINE auto operator()(T&& value) -> scan_iterator {
488     return read(it, value);
489   }
490 };
491 
492 // An argument scanner with format specifiers.
493 struct arg_scanner {
494   scan_iterator it;
495   const format_specs& specs;
496 
497   template <typename T> auto operator()(T&& value) -> scan_iterator {
498     return read(it, value, specs);
499   }
500 };
501 
502 struct scan_handler {
503  private:
504   scan_parse_context parse_ctx_;
505   scan_context scan_ctx_;
506   int next_arg_id_;
507 
508   using sentinel = scan_buffer::sentinel;
509 
510  public:
scan_handlerscan_handler511   FMT_CONSTEXPR scan_handler(string_view format, scan_buffer& buf,
512                              scan_args args)
513       : parse_ctx_(format), scan_ctx_(buf, args), next_arg_id_(0) {}
514 
515   auto pos() const -> scan_buffer::iterator { return scan_ctx_.begin(); }
516 
on_textscan_handler517   void on_text(const char* begin, const char* end) {
518     if (begin == end) return;
519     auto it = scan_ctx_.begin();
520     for (; begin != end; ++begin, ++it) {
521       if (it == sentinel() || *begin != *it) on_error("invalid input");
522     }
523     scan_ctx_.advance_to(it);
524   }
525 
526   FMT_CONSTEXPR auto on_arg_id() -> int { return on_arg_id(next_arg_id_++); }
527   FMT_CONSTEXPR auto on_arg_id(int id) -> int {
528     if (!scan_ctx_.arg(id)) on_error("argument index out of range");
529     return id;
530   }
531   FMT_CONSTEXPR auto on_arg_id(string_view id) -> int {
532     if (id.data()) on_error("invalid format");
533     return 0;
534   }
535 
on_replacement_fieldscan_handler536   void on_replacement_field(int arg_id, const char* begin) {
537     scan_arg arg = scan_ctx_.arg(arg_id);
538     if (arg.scan_custom(begin, parse_ctx_, scan_ctx_)) return;
539     auto it = scan_ctx_.begin();
540     while (it != sentinel() && is_whitespace(*it)) ++it;
541     scan_ctx_.advance_to(arg.visit(default_arg_scanner{it}));
542   }
543 
544   auto on_format_specs(int arg_id, const char* begin, const char* end) -> const
545       char* {
546     scan_arg arg = scan_ctx_.arg(arg_id);
547     if (arg.scan_custom(begin, parse_ctx_, scan_ctx_))
548       return parse_ctx_.begin();
549     auto specs = format_specs();
550     begin = parse_scan_specs(begin, end, specs, arg.type());
551     if (begin == end || *begin != '}') on_error("missing '}' in format string");
552     scan_ctx_.advance_to(arg.visit(arg_scanner{scan_ctx_.begin(), specs}));
553     return begin;
554   }
555 
on_errorscan_handler556   FMT_NORETURN void on_error(const char* message) { report_error(message); }
557 };
558 
vscan(detail::scan_buffer & buf,string_view fmt,scan_args args)559 void vscan(detail::scan_buffer& buf, string_view fmt, scan_args args) {
560   auto h = detail::scan_handler(fmt, buf, args);
561   detail::parse_format_string(fmt, h);
562 }
563 
564 template <size_t I, typename... T, FMT_ENABLE_IF(I == sizeof...(T))>
make_args(std::array<scan_arg,sizeof...(T)> &,std::tuple<T...> &)565 void make_args(std::array<scan_arg, sizeof...(T)>&, std::tuple<T...>&) {}
566 
567 template <size_t I, typename... T, FMT_ENABLE_IF(I < sizeof...(T))>
make_args(std::array<scan_arg,sizeof...(T)> & args,std::tuple<T...> & values)568 void make_args(std::array<scan_arg, sizeof...(T)>& args,
569                std::tuple<T...>& values) {
570   using element_type = typename std::tuple_element<I, std::tuple<T...>>::type;
571   static_assert(std::is_same<remove_cvref_t<element_type>, element_type>::value,
572                 "");
573   args[I] = std::get<I>(values);
574   make_args<I + 1>(args, values);
575 }
576 }  // namespace detail
577 
578 template <typename Range, typename... T> class scan_data {
579  private:
580   std::tuple<T...> values_;
581   Range range_;
582 
583  public:
584   scan_data() = default;
scan_data(T...values)585   scan_data(T... values) : values_(std::move(values)...) {}
586 
587   auto value() const -> decltype(std::get<0>(values_)) {
588     return std::get<0>(values_);
589   }
590 
591   auto values() const -> const std::tuple<T...>& { return values_; }
592 
593   auto make_args() -> std::array<scan_arg, sizeof...(T)> {
594     auto args = std::array<scan_arg, sizeof...(T)>();
595     detail::make_args<0>(args, values_);
596     return args;
597   }
598 
599   auto range() const -> Range { return range_; }
600 
601   auto begin() const -> decltype(range_.begin()) { return range_.begin(); }
602   auto end() const -> decltype(range_.end()) { return range_.end(); }
603 };
604 
605 template <typename... T>
606 auto make_scan_args(T&... args) -> std::array<scan_arg, sizeof...(T)> {
607   return {{args...}};
608 }
609 
610 class scan_error {};
611 
612 // A rudimentary version of std::expected for testing the API shape.
613 template <typename T, typename E> class expected {
614  private:
615   T value_;
616   bool has_value_ = true;
617 
618  public:
expected(T value)619   expected(T value) : value_(std::move(value)) {}
620 
621   explicit operator bool() const { return has_value_; }
622 
623   auto operator->() const -> const T* { return &value_; }
624 
625   auto error() -> E const { return E(); }
626 };
627 
628 template <typename Range, typename... T>
629 using scan_result = expected<scan_data<Range, T...>, scan_error>;
630 
631 auto vscan(string_view input, string_view fmt, scan_args args)
632     -> string_view::iterator {
633   auto&& buf = detail::string_scan_buffer(input);
634   detail::vscan(buf, fmt, args);
635   return input.begin() + (buf.begin().base() - input.data());
636 }
637 
638 // Scans the input and stores the results (in)to args.
639 template <typename... T>
640 auto scan_to(string_view input, string_view fmt, T&... args)
641     -> string_view::iterator {
642   return vscan(input, fmt, make_scan_args(args...));
643 }
644 
645 template <typename... T>
646 auto scan(string_view input, string_view fmt)
647     -> scan_result<string_view, T...> {
648   auto data = scan_data<string_view, T...>();
649   vscan(input, fmt, data.make_args());
650   return data;
651 }
652 
653 template <typename Range, typename... T,
654           FMT_ENABLE_IF(!std::is_convertible<Range, string_view>::value)>
655 auto scan_to(Range&& input, string_view fmt, T&... args)
656     -> decltype(std::begin(input)) {
657   auto it = std::begin(input);
658   detail::vscan(get_buffer(it), fmt, make_scan_args(args...));
659   return it;
660 }
661 
662 template <typename... T>
663 auto scan_to(FILE* f, string_view fmt, T&... args) -> bool {
664   auto&& buf = detail::file_scan_buffer(f);
665   detail::vscan(buf, fmt, make_scan_args(args...));
666   return buf.begin() != buf.end();
667 }
668 
669 FMT_END_NAMESPACE
670