1 // Formatting library for C++ - scanning API proof of concept
2 //
3 // Copyright (c) 2019 - present, Victor Zverovich
4 // All rights reserved.
5 //
6 // For the license information refer to format.h.
7
8 #include <array>
9 #include <cassert>
10 #include <climits>
11 #include <tuple>
12
13 #include "fmt/format-inl.h"
14
15 FMT_BEGIN_NAMESPACE
16 namespace detail {
17
18 inline auto is_whitespace(char c) -> bool { return c == ' ' || c == '\n'; }
19
20 // If c is a hex digit returns its numeric value, otherwise -1.
21 inline auto to_hex_digit(char c) -> int {
22 if (c >= '0' && c <= '9') return c - '0';
23 if (c >= 'a' && c <= 'f') return c - 'a' + 10;
24 if (c >= 'A' && c <= 'F') return c - 'A' + 10;
25 return -1;
26 }
27
28 struct maybe_contiguous_range {
29 const char* begin;
30 const char* end;
31
32 explicit operator bool() const { return begin != nullptr; }
33 };
34
35 class scan_buffer {
36 private:
37 const char* ptr_;
38 const char* end_;
39 bool contiguous_;
40
41 protected:
scan_buffer(const char * ptr,const char * end,bool contiguous)42 scan_buffer(const char* ptr, const char* end, bool contiguous)
43 : ptr_(ptr), end_(end), contiguous_(contiguous) {}
44 ~scan_buffer() = default;
45
set(span<const char> buf)46 void set(span<const char> buf) {
47 ptr_ = buf.data;
48 end_ = buf.data + buf.size;
49 }
50
51 auto ptr() const -> const char* { return ptr_; }
52
53 public:
54 scan_buffer(const scan_buffer&) = delete;
55 void operator=(const scan_buffer&) = delete;
56
57 // Fills the buffer with more input if available.
58 virtual void consume() = 0;
59
60 class sentinel {};
61
62 class iterator {
63 private:
64 const char** ptr_;
65 scan_buffer* buf_; // This could be merged with ptr_.
66 char value_;
67
68 static auto get_sentinel() -> const char** {
69 static const char* ptr = nullptr;
70 return &ptr;
71 }
72
73 friend class scan_buffer;
74
75 friend auto operator==(iterator lhs, sentinel) -> bool {
76 return *lhs.ptr_ == nullptr;
77 }
78 friend auto operator!=(iterator lhs, sentinel) -> bool {
79 return *lhs.ptr_ != nullptr;
80 }
81
iterator(scan_buffer * buf)82 iterator(scan_buffer* buf) : buf_(buf) {
83 if (buf->ptr_ == buf->end_) {
84 ptr_ = get_sentinel();
85 return;
86 }
87 ptr_ = &buf->ptr_;
88 value_ = *buf->ptr_;
89 }
90
get_buffer(iterator it)91 friend scan_buffer& get_buffer(iterator it) { return *it.buf_; }
92
93 public:
iterator()94 iterator() : ptr_(get_sentinel()), buf_(nullptr) {}
95
96 auto operator++() -> iterator& {
97 if (!buf_->try_consume()) ptr_ = get_sentinel();
98 value_ = *buf_->ptr_;
99 return *this;
100 }
101 auto operator++(int) -> iterator {
102 iterator copy = *this;
103 ++*this;
104 return copy;
105 }
106 auto operator*() const -> char { return value_; }
107
108 auto base() const -> const char* { return buf_->ptr_; }
109
110 friend auto to_contiguous(iterator it) -> maybe_contiguous_range;
111 friend auto advance(iterator it, size_t n) -> iterator;
112 };
113
114 friend auto to_contiguous(iterator it) -> maybe_contiguous_range {
115 if (it.buf_->is_contiguous()) return {it.buf_->ptr_, it.buf_->end_};
116 return {nullptr, nullptr};
117 }
118 friend auto advance(iterator it, size_t n) -> iterator {
119 FMT_ASSERT(it.buf_->is_contiguous(), "");
120 const char*& ptr = it.buf_->ptr_;
121 ptr += n;
122 it.value_ = *ptr;
123 if (ptr == it.buf_->end_) it.ptr_ = iterator::get_sentinel();
124 return it;
125 }
126
127 auto begin() -> iterator { return this; }
128 auto end() -> sentinel { return {}; }
129
130 auto is_contiguous() const -> bool { return contiguous_; }
131
132 // Tries consuming a single code unit. Returns true iff there is more input.
133 auto try_consume() -> bool {
134 FMT_ASSERT(ptr_ != end_, "");
135 ++ptr_;
136 if (ptr_ != end_) return true;
137 consume();
138 return ptr_ != end_;
139 }
140 };
141
142 using scan_iterator = scan_buffer::iterator;
143 using scan_sentinel = scan_buffer::sentinel;
144
145 class string_scan_buffer final : public scan_buffer {
146 private:
consume()147 void consume() override {}
148
149 public:
string_scan_buffer(string_view s)150 explicit string_scan_buffer(string_view s)
151 : scan_buffer(s.begin(), s.end(), true) {}
152 };
153
154 class file_scan_buffer final : public scan_buffer {
155 private:
156 template <typename F, FMT_ENABLE_IF(sizeof(F::_IO_read_ptr) != 0 &&
157 !FMT_USE_FALLBACK_FILE)>
158 static auto get_file(F* f, int) -> glibc_file<F> {
159 return f;
160 }
161 template <typename F,
162 FMT_ENABLE_IF(sizeof(F::_p) != 0 && !FMT_USE_FALLBACK_FILE)>
163 static auto get_file(F* f, int) -> apple_file<F> {
164 return f;
165 }
166 static auto get_file(FILE* f, ...) -> fallback_file<FILE> { return f; }
167
168 decltype(get_file(static_cast<FILE*>(nullptr), 0)) file_;
169
170 // Fills the buffer if it is empty.
fill()171 void fill() {
172 span<const char> buf = file_.get_read_buffer();
173 if (buf.size == 0) {
174 int c = file_.get();
175 // Put the character back since we are only filling the buffer.
176 if (c != EOF) file_.unget(static_cast<char>(c));
177 buf = file_.get_read_buffer();
178 }
179 set(buf);
180 }
181
consume()182 void consume() override {
183 // Consume the current buffer content.
184 size_t n = to_unsigned(ptr() - file_.get_read_buffer().data);
185 for (size_t i = 0; i != n; ++i) file_.get();
186 fill();
187 }
188
189 public:
file_scan_buffer(FILE * f)190 explicit file_scan_buffer(FILE* f)
191 : scan_buffer(nullptr, nullptr, false), file_(f) {
192 flockfile(f);
193 fill();
194 }
~file_scan_buffer()195 ~file_scan_buffer() {
196 FILE* f = file_;
197 funlockfile(f);
198 }
199 };
200 } // namespace detail
201
202 template <typename T, typename Char = char> struct scanner {
203 // A deleted default constructor indicates a disabled scanner.
204 scanner() = delete;
205 };
206
207 class scan_parse_context {
208 private:
209 string_view format_;
210
211 public:
212 using iterator = string_view::iterator;
213
scan_parse_context(string_view format)214 explicit FMT_CONSTEXPR scan_parse_context(string_view format)
215 : format_(format) {}
216
217 FMT_CONSTEXPR auto begin() const -> iterator { return format_.begin(); }
218 FMT_CONSTEXPR auto end() const -> iterator { return format_.end(); }
219
advance_to(iterator it)220 void advance_to(iterator it) {
221 format_.remove_prefix(detail::to_unsigned(it - begin()));
222 }
223 };
224
225 namespace detail {
226 enum class scan_type {
227 none_type,
228 int_type,
229 uint_type,
230 long_long_type,
231 ulong_long_type,
232 string_type,
233 string_view_type,
234 custom_type
235 };
236
237 template <typename Context> struct custom_scan_arg {
238 void* value;
239 void (*scan)(void* arg, scan_parse_context& parse_ctx, Context& ctx);
240 };
241 } // namespace detail
242
243 // A scan argument. Context is a template parameter for the compiled API where
244 // output can be unbuffered.
245 template <typename Context> class basic_scan_arg {
246 private:
247 using scan_type = detail::scan_type;
248 scan_type type_;
249 union {
250 int* int_value_;
251 unsigned* uint_value_;
252 long long* long_long_value_;
253 unsigned long long* ulong_long_value_;
254 std::string* string_;
255 string_view* string_view_;
256 detail::custom_scan_arg<Context> custom_;
257 // TODO: more types
258 };
259
260 template <typename T>
scan_custom_arg(void * arg,scan_parse_context & parse_ctx,Context & ctx)261 static void scan_custom_arg(void* arg, scan_parse_context& parse_ctx,
262 Context& ctx) {
263 auto s = scanner<T>();
264 parse_ctx.advance_to(s.parse(parse_ctx));
265 ctx.advance_to(s.scan(*static_cast<T*>(arg), ctx));
266 }
267
268 public:
basic_scan_arg()269 FMT_CONSTEXPR basic_scan_arg()
270 : type_(scan_type::none_type), int_value_(nullptr) {}
basic_scan_arg(int & value)271 FMT_CONSTEXPR basic_scan_arg(int& value)
272 : type_(scan_type::int_type), int_value_(&value) {}
basic_scan_arg(unsigned & value)273 FMT_CONSTEXPR basic_scan_arg(unsigned& value)
274 : type_(scan_type::uint_type), uint_value_(&value) {}
basic_scan_arg(long long & value)275 FMT_CONSTEXPR basic_scan_arg(long long& value)
276 : type_(scan_type::long_long_type), long_long_value_(&value) {}
basic_scan_arg(unsigned long long & value)277 FMT_CONSTEXPR basic_scan_arg(unsigned long long& value)
278 : type_(scan_type::ulong_long_type), ulong_long_value_(&value) {}
basic_scan_arg(std::string & value)279 FMT_CONSTEXPR basic_scan_arg(std::string& value)
280 : type_(scan_type::string_type), string_(&value) {}
basic_scan_arg(string_view & value)281 FMT_CONSTEXPR basic_scan_arg(string_view& value)
282 : type_(scan_type::string_view_type), string_view_(&value) {}
283 template <typename T>
basic_scan_arg(T & value)284 FMT_CONSTEXPR basic_scan_arg(T& value) : type_(scan_type::custom_type) {
285 custom_.value = &value;
286 custom_.scan = scan_custom_arg<T>;
287 }
288
289 constexpr explicit operator bool() const noexcept {
290 return type_ != scan_type::none_type;
291 }
292
293 auto type() const -> detail::scan_type { return type_; }
294
295 template <typename Visitor>
296 auto visit(Visitor&& vis) -> decltype(vis(monostate())) {
297 switch (type_) {
298 case scan_type::none_type:
299 break;
300 case scan_type::int_type:
301 return vis(*int_value_);
302 case scan_type::uint_type:
303 return vis(*uint_value_);
304 case scan_type::long_long_type:
305 return vis(*long_long_value_);
306 case scan_type::ulong_long_type:
307 return vis(*ulong_long_value_);
308 case scan_type::string_type:
309 return vis(*string_);
310 case scan_type::string_view_type:
311 return vis(*string_view_);
312 case scan_type::custom_type:
313 break;
314 }
315 return vis(monostate());
316 }
317
318 auto scan_custom(const char* parse_begin, scan_parse_context& parse_ctx,
319 Context& ctx) const -> bool {
320 if (type_ != scan_type::custom_type) return false;
321 parse_ctx.advance_to(parse_begin);
322 custom_.scan(custom_.value, parse_ctx, ctx);
323 return true;
324 }
325 };
326
327 class scan_context;
328 using scan_arg = basic_scan_arg<scan_context>;
329
330 struct scan_args {
331 int size;
332 const scan_arg* data;
333
334 template <size_t N>
scan_argsscan_args335 FMT_CONSTEXPR scan_args(const std::array<scan_arg, N>& store)
336 : size(N), data(store.data()) {
337 static_assert(N < INT_MAX, "too many arguments");
338 }
339 };
340
341 class scan_context {
342 private:
343 detail::scan_buffer& buf_;
344 scan_args args_;
345
346 public:
347 using iterator = detail::scan_iterator;
348 using sentinel = detail::scan_sentinel;
349
scan_context(detail::scan_buffer & buf,scan_args args)350 explicit FMT_CONSTEXPR scan_context(detail::scan_buffer& buf, scan_args args)
351 : buf_(buf), args_(args) {}
352
353 FMT_CONSTEXPR auto arg(int id) const -> scan_arg {
354 return id < args_.size ? args_.data[id] : scan_arg();
355 }
356
357 auto begin() const -> iterator { return buf_.begin(); }
358 auto end() const -> sentinel { return {}; }
359
advance_to(iterator)360 void advance_to(iterator) { buf_.consume(); }
361 };
362
363 namespace detail {
364
parse_scan_specs(const char * begin,const char * end,format_specs & specs,scan_type)365 const char* parse_scan_specs(const char* begin, const char* end,
366 format_specs& specs, scan_type) {
367 while (begin != end) {
368 switch (to_ascii(*begin)) {
369 // TODO: parse more scan format specifiers
370 case 'x':
371 specs.set_type(presentation_type::hex);
372 ++begin;
373 break;
374 case '}':
375 return begin;
376 }
377 }
378 return begin;
379 }
380
381 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
382 auto read(scan_iterator it, T& value) -> scan_iterator {
383 if (it == scan_sentinel()) return it;
384 char c = *it;
385 if (c < '0' || c > '9') report_error("invalid input");
386
387 int num_digits = 0;
388 T n = 0, prev = 0;
389 char prev_digit = c;
390 do {
391 prev = n;
392 n = n * 10 + static_cast<unsigned>(c - '0');
393 prev_digit = c;
394 c = *++it;
395 ++num_digits;
396 if (c < '0' || c > '9') break;
397 } while (it != scan_sentinel());
398
399 // Check overflow.
400 if (num_digits <= std::numeric_limits<int>::digits10) {
401 value = n;
402 return it;
403 }
404 unsigned max = to_unsigned((std::numeric_limits<int>::max)());
405 if (num_digits == std::numeric_limits<int>::digits10 + 1 &&
406 prev * 10ull + unsigned(prev_digit - '0') <= max) {
407 value = n;
408 } else {
409 report_error("number is too big");
410 }
411 return it;
412 }
413
414 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
415 auto read_hex(scan_iterator it, T& value) -> scan_iterator {
416 if (it == scan_sentinel()) return it;
417 int digit = to_hex_digit(*it);
418 if (digit < 0) report_error("invalid input");
419
420 int num_digits = 0;
421 T n = 0;
422 do {
423 n = (n << 4) + static_cast<unsigned>(digit);
424 ++num_digits;
425 digit = to_hex_digit(*++it);
426 if (digit < 0) break;
427 } while (it != scan_sentinel());
428
429 // Check overflow.
430 if (num_digits <= (std::numeric_limits<T>::digits >> 2))
431 value = n;
432 else
433 report_error("number is too big");
434 return it;
435 }
436
437 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
438 auto read(scan_iterator it, T& value, const format_specs& specs)
439 -> scan_iterator {
440 if (specs.type() == presentation_type::hex) return read_hex(it, value);
441 return read(it, value);
442 }
443
444 template <typename T, FMT_ENABLE_IF(std::is_signed<T>::value)>
445 auto read(scan_iterator it, T& value, const format_specs& specs = {})
446 -> scan_iterator {
447 bool negative = it != scan_sentinel() && *it == '-';
448 if (negative) {
449 ++it;
450 if (it == scan_sentinel()) report_error("invalid input");
451 }
452 using unsigned_type = typename std::make_unsigned<T>::type;
453 unsigned_type abs_value = 0;
454 it = read(it, abs_value, specs);
455 auto n = static_cast<T>(abs_value);
456 value = negative ? -n : n;
457 return it;
458 }
459
460 auto read(scan_iterator it, std::string& value, const format_specs& = {})
461 -> scan_iterator {
462 while (it != scan_sentinel() && *it != ' ') value.push_back(*it++);
463 return it;
464 }
465
466 auto read(scan_iterator it, string_view& value, const format_specs& = {})
467 -> scan_iterator {
468 auto range = to_contiguous(it);
469 // This could also be checked at compile time in scan.
470 if (!range) report_error("string_view requires contiguous input");
471 auto p = range.begin;
472 while (p != range.end && *p != ' ') ++p;
473 size_t size = to_unsigned(p - range.begin);
474 value = {range.begin, size};
475 return advance(it, size);
476 }
477
478 auto read(scan_iterator it, monostate, const format_specs& = {})
479 -> scan_iterator {
480 return it;
481 }
482
483 // An argument scanner that uses the default format, e.g. decimal for integers.
484 struct default_arg_scanner {
485 scan_iterator it;
486
487 template <typename T> FMT_INLINE auto operator()(T&& value) -> scan_iterator {
488 return read(it, value);
489 }
490 };
491
492 // An argument scanner with format specifiers.
493 struct arg_scanner {
494 scan_iterator it;
495 const format_specs& specs;
496
497 template <typename T> auto operator()(T&& value) -> scan_iterator {
498 return read(it, value, specs);
499 }
500 };
501
502 struct scan_handler {
503 private:
504 scan_parse_context parse_ctx_;
505 scan_context scan_ctx_;
506 int next_arg_id_;
507
508 using sentinel = scan_buffer::sentinel;
509
510 public:
scan_handlerscan_handler511 FMT_CONSTEXPR scan_handler(string_view format, scan_buffer& buf,
512 scan_args args)
513 : parse_ctx_(format), scan_ctx_(buf, args), next_arg_id_(0) {}
514
515 auto pos() const -> scan_buffer::iterator { return scan_ctx_.begin(); }
516
on_textscan_handler517 void on_text(const char* begin, const char* end) {
518 if (begin == end) return;
519 auto it = scan_ctx_.begin();
520 for (; begin != end; ++begin, ++it) {
521 if (it == sentinel() || *begin != *it) on_error("invalid input");
522 }
523 scan_ctx_.advance_to(it);
524 }
525
526 FMT_CONSTEXPR auto on_arg_id() -> int { return on_arg_id(next_arg_id_++); }
527 FMT_CONSTEXPR auto on_arg_id(int id) -> int {
528 if (!scan_ctx_.arg(id)) on_error("argument index out of range");
529 return id;
530 }
531 FMT_CONSTEXPR auto on_arg_id(string_view id) -> int {
532 if (id.data()) on_error("invalid format");
533 return 0;
534 }
535
on_replacement_fieldscan_handler536 void on_replacement_field(int arg_id, const char* begin) {
537 scan_arg arg = scan_ctx_.arg(arg_id);
538 if (arg.scan_custom(begin, parse_ctx_, scan_ctx_)) return;
539 auto it = scan_ctx_.begin();
540 while (it != sentinel() && is_whitespace(*it)) ++it;
541 scan_ctx_.advance_to(arg.visit(default_arg_scanner{it}));
542 }
543
544 auto on_format_specs(int arg_id, const char* begin, const char* end) -> const
545 char* {
546 scan_arg arg = scan_ctx_.arg(arg_id);
547 if (arg.scan_custom(begin, parse_ctx_, scan_ctx_))
548 return parse_ctx_.begin();
549 auto specs = format_specs();
550 begin = parse_scan_specs(begin, end, specs, arg.type());
551 if (begin == end || *begin != '}') on_error("missing '}' in format string");
552 scan_ctx_.advance_to(arg.visit(arg_scanner{scan_ctx_.begin(), specs}));
553 return begin;
554 }
555
on_errorscan_handler556 FMT_NORETURN void on_error(const char* message) { report_error(message); }
557 };
558
vscan(detail::scan_buffer & buf,string_view fmt,scan_args args)559 void vscan(detail::scan_buffer& buf, string_view fmt, scan_args args) {
560 auto h = detail::scan_handler(fmt, buf, args);
561 detail::parse_format_string(fmt, h);
562 }
563
564 template <size_t I, typename... T, FMT_ENABLE_IF(I == sizeof...(T))>
make_args(std::array<scan_arg,sizeof...(T)> &,std::tuple<T...> &)565 void make_args(std::array<scan_arg, sizeof...(T)>&, std::tuple<T...>&) {}
566
567 template <size_t I, typename... T, FMT_ENABLE_IF(I < sizeof...(T))>
make_args(std::array<scan_arg,sizeof...(T)> & args,std::tuple<T...> & values)568 void make_args(std::array<scan_arg, sizeof...(T)>& args,
569 std::tuple<T...>& values) {
570 using element_type = typename std::tuple_element<I, std::tuple<T...>>::type;
571 static_assert(std::is_same<remove_cvref_t<element_type>, element_type>::value,
572 "");
573 args[I] = std::get<I>(values);
574 make_args<I + 1>(args, values);
575 }
576 } // namespace detail
577
578 template <typename Range, typename... T> class scan_data {
579 private:
580 std::tuple<T...> values_;
581 Range range_;
582
583 public:
584 scan_data() = default;
scan_data(T...values)585 scan_data(T... values) : values_(std::move(values)...) {}
586
587 auto value() const -> decltype(std::get<0>(values_)) {
588 return std::get<0>(values_);
589 }
590
591 auto values() const -> const std::tuple<T...>& { return values_; }
592
593 auto make_args() -> std::array<scan_arg, sizeof...(T)> {
594 auto args = std::array<scan_arg, sizeof...(T)>();
595 detail::make_args<0>(args, values_);
596 return args;
597 }
598
599 auto range() const -> Range { return range_; }
600
601 auto begin() const -> decltype(range_.begin()) { return range_.begin(); }
602 auto end() const -> decltype(range_.end()) { return range_.end(); }
603 };
604
605 template <typename... T>
606 auto make_scan_args(T&... args) -> std::array<scan_arg, sizeof...(T)> {
607 return {{args...}};
608 }
609
610 class scan_error {};
611
612 // A rudimentary version of std::expected for testing the API shape.
613 template <typename T, typename E> class expected {
614 private:
615 T value_;
616 bool has_value_ = true;
617
618 public:
expected(T value)619 expected(T value) : value_(std::move(value)) {}
620
621 explicit operator bool() const { return has_value_; }
622
623 auto operator->() const -> const T* { return &value_; }
624
625 auto error() -> E const { return E(); }
626 };
627
628 template <typename Range, typename... T>
629 using scan_result = expected<scan_data<Range, T...>, scan_error>;
630
631 auto vscan(string_view input, string_view fmt, scan_args args)
632 -> string_view::iterator {
633 auto&& buf = detail::string_scan_buffer(input);
634 detail::vscan(buf, fmt, args);
635 return input.begin() + (buf.begin().base() - input.data());
636 }
637
638 // Scans the input and stores the results (in)to args.
639 template <typename... T>
640 auto scan_to(string_view input, string_view fmt, T&... args)
641 -> string_view::iterator {
642 return vscan(input, fmt, make_scan_args(args...));
643 }
644
645 template <typename... T>
646 auto scan(string_view input, string_view fmt)
647 -> scan_result<string_view, T...> {
648 auto data = scan_data<string_view, T...>();
649 vscan(input, fmt, data.make_args());
650 return data;
651 }
652
653 template <typename Range, typename... T,
654 FMT_ENABLE_IF(!std::is_convertible<Range, string_view>::value)>
655 auto scan_to(Range&& input, string_view fmt, T&... args)
656 -> decltype(std::begin(input)) {
657 auto it = std::begin(input);
658 detail::vscan(get_buffer(it), fmt, make_scan_args(args...));
659 return it;
660 }
661
662 template <typename... T>
663 auto scan_to(FILE* f, string_view fmt, T&... args) -> bool {
664 auto&& buf = detail::file_scan_buffer(f);
665 detail::vscan(buf, fmt, make_scan_args(args...));
666 return buf.begin() != buf.end();
667 }
668
669 FMT_END_NAMESPACE
670