xref: /aosp_15_r20/external/cronet/third_party/rust/chromium_crates_io/vendor/qr_code-2.0.0/src/optimize.rs (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 //! Find the optimal data mode sequence to encode a piece of data.
2 use crate::types::{Mode, Version};
3 use std::borrow::Borrow;
4 use std::slice::Iter;
5 
6 //------------------------------------------------------------------------------
7 //{{{ Segment
8 
9 /// A segment of data committed to an encoding mode.
10 #[derive(PartialEq, Eq, Debug, Copy, Clone)]
11 pub struct Segment {
12     /// The encoding mode of the segment of data.
13     pub mode: Mode,
14 
15     /// The start index of the segment.
16     pub begin: usize,
17 
18     /// The end index (exclusive) of the segment.
19     pub end: usize,
20 }
21 
22 impl Segment {
23     /// Compute the number of bits (including the size of the mode indicator and
24     /// length bits) when this segment is encoded.
encoded_len(&self, version: Version) -> usize25     pub fn encoded_len(&self, version: Version) -> usize {
26         let byte_size = self.end - self.begin;
27         let chars_count = if self.mode == Mode::Kanji {
28             byte_size / 2
29         } else {
30             byte_size
31         };
32 
33         let mode_bits_count = version.mode_bits_count();
34         let length_bits_count = self.mode.length_bits_count(version);
35         let data_bits_count = self.mode.data_bits_count(chars_count);
36 
37         mode_bits_count + length_bits_count + data_bits_count
38     }
39 
40     /// Panics if `&self` is not a valid segment.
assert_invariants(&self)41     fn assert_invariants(&self) {
42         // TODO: It would be great if it would be impossible to construct an invalid `Segment` -
43         // either by 1) making the fields private and only allowing construction via a public,
44         // preconditions-checking API, or 2) keeping the fields public but replacing `end: usize`
45         // with `len: NonZeroUsize`.
46         assert!(self.begin < self.end);
47     }
48 }
49 
50 //}}}
51 //------------------------------------------------------------------------------
52 //{{{ Parser
53 
54 /// This iterator is basically equivalent to
55 ///
56 /// ```ignore
57 /// data.map(|c| ExclCharSet::from_u8(*c))
58 ///     .chain(Some(ExclCharSet::End).move_iter())
59 ///     .enumerate()
60 /// ```
61 ///
62 /// But the type is too hard to write, thus the new type.
63 ///
64 struct EcsIter<I> {
65     base: I,
66     index: usize,
67     ended: bool,
68 }
69 
70 impl<'a, I: Iterator<Item = &'a u8>> Iterator for EcsIter<I> {
71     type Item = (usize, ExclCharSet);
72 
next(&mut self) -> Option<(usize, ExclCharSet)>73     fn next(&mut self) -> Option<(usize, ExclCharSet)> {
74         if self.ended {
75             return None;
76         }
77 
78         match self.base.next() {
79             None => {
80                 self.ended = true;
81                 Some((self.index, ExclCharSet::End))
82             }
83             Some(c) => {
84                 let old_index = self.index;
85                 self.index += 1;
86                 Some((old_index, ExclCharSet::from_u8(*c)))
87             }
88         }
89     }
90 }
91 
92 /// QR code data parser to classify the input into distinct segments.
93 pub struct Parser<'a> {
94     ecs_iter: EcsIter<Iter<'a, u8>>,
95     state: State,
96     begin: usize,
97     pending_single_byte: bool,
98 }
99 
100 impl<'a> Parser<'a> {
101     /// Creates a new iterator which parse the data into segments that only
102     /// contains their exclusive subsets. No optimization is done at this point.
103     ///
104     ///     use qr_code::optimize::{Parser, Segment};
105     ///     use qr_code::types::Mode::{Alphanumeric, Numeric, Byte};
106     ///
107     ///     let parse_res = Parser::new(b"ABC123abcd").collect::<Vec<Segment>>();
108     ///     assert_eq!(parse_res, vec![Segment { mode: Alphanumeric, begin: 0, end: 3 },
109     ///                                Segment { mode: Numeric, begin: 3, end: 6 },
110     ///                                Segment { mode: Byte, begin: 6, end: 10 }]);
111     ///
new(data: &[u8]) -> Parser112     pub fn new(data: &[u8]) -> Parser {
113         Parser {
114             ecs_iter: EcsIter {
115                 base: data.iter(),
116                 index: 0,
117                 ended: false,
118             },
119             state: State::Init,
120             begin: 0,
121             pending_single_byte: false,
122         }
123     }
124 }
125 
126 impl<'a> Iterator for Parser<'a> {
127     type Item = Segment;
128 
next(&mut self) -> Option<Segment>129     fn next(&mut self) -> Option<Segment> {
130         if self.pending_single_byte {
131             self.pending_single_byte = false;
132             self.begin += 1;
133             return Some(Segment {
134                 mode: Mode::Byte,
135                 begin: self.begin - 1,
136                 end: self.begin,
137             });
138         }
139 
140         loop {
141             let (i, ecs) = match self.ecs_iter.next() {
142                 None => return None,
143                 Some(a) => a,
144             };
145             let (next_state, action) = STATE_TRANSITION[self.state as usize + ecs as usize];
146             self.state = next_state;
147 
148             let old_begin = self.begin;
149             let push_mode = match action {
150                 Action::Idle => continue,
151                 Action::Numeric => Mode::Numeric,
152                 Action::Alpha => Mode::Alphanumeric,
153                 Action::Byte => Mode::Byte,
154                 Action::Kanji => Mode::Kanji,
155                 Action::KanjiAndSingleByte => {
156                     let next_begin = i - 1;
157                     if self.begin == next_begin {
158                         Mode::Byte
159                     } else {
160                         self.pending_single_byte = true;
161                         self.begin = next_begin;
162                         return Some(Segment {
163                             mode: Mode::Kanji,
164                             begin: old_begin,
165                             end: next_begin,
166                         });
167                     }
168                 }
169             };
170 
171             self.begin = i;
172             return Some(Segment {
173                 mode: push_mode,
174                 begin: old_begin,
175                 end: i,
176             });
177         }
178     }
179 }
180 
181 #[cfg(test)]
182 mod parse_tests {
183     use crate::optimize::{Parser, Segment};
184     use crate::types::Mode;
185 
parse(data: &[u8]) -> Vec<Segment>186     fn parse(data: &[u8]) -> Vec<Segment> {
187         Parser::new(data).collect()
188     }
189 
190     #[test]
test_parse_1()191     fn test_parse_1() {
192         let segs = parse(b"01049123451234591597033130128%10ABC123");
193         assert_eq!(
194             segs,
195             vec![
196                 Segment {
197                     mode: Mode::Numeric,
198                     begin: 0,
199                     end: 29
200                 },
201                 Segment {
202                     mode: Mode::Alphanumeric,
203                     begin: 29,
204                     end: 30
205                 },
206                 Segment {
207                     mode: Mode::Numeric,
208                     begin: 30,
209                     end: 32
210                 },
211                 Segment {
212                     mode: Mode::Alphanumeric,
213                     begin: 32,
214                     end: 35
215                 },
216                 Segment {
217                     mode: Mode::Numeric,
218                     begin: 35,
219                     end: 38
220                 },
221             ]
222         );
223     }
224 
225     #[test]
test_parse_shift_jis_example_1()226     fn test_parse_shift_jis_example_1() {
227         let segs = parse(b"\x82\xa0\x81\x41\x41\xb1\x81\xf0"); // "あ、AアÅ"
228         assert_eq!(
229             segs,
230             vec![
231                 Segment {
232                     mode: Mode::Kanji,
233                     begin: 0,
234                     end: 4
235                 },
236                 Segment {
237                     mode: Mode::Alphanumeric,
238                     begin: 4,
239                     end: 5
240                 },
241                 Segment {
242                     mode: Mode::Byte,
243                     begin: 5,
244                     end: 6
245                 },
246                 Segment {
247                     mode: Mode::Kanji,
248                     begin: 6,
249                     end: 8
250                 },
251             ]
252         );
253     }
254 
255     #[test]
test_parse_utf_8()256     fn test_parse_utf_8() {
257         // Mojibake?
258         let segs = parse(b"\xe3\x81\x82\xe3\x80\x81A\xef\xbd\xb1\xe2\x84\xab");
259         assert_eq!(
260             segs,
261             vec![
262                 Segment {
263                     mode: Mode::Kanji,
264                     begin: 0,
265                     end: 4
266                 },
267                 Segment {
268                     mode: Mode::Byte,
269                     begin: 4,
270                     end: 5
271                 },
272                 Segment {
273                     mode: Mode::Kanji,
274                     begin: 5,
275                     end: 7
276                 },
277                 Segment {
278                     mode: Mode::Byte,
279                     begin: 7,
280                     end: 10
281                 },
282                 Segment {
283                     mode: Mode::Kanji,
284                     begin: 10,
285                     end: 12
286                 },
287                 Segment {
288                     mode: Mode::Byte,
289                     begin: 12,
290                     end: 13
291                 },
292             ]
293         );
294     }
295 
296     #[test]
test_not_kanji_1()297     fn test_not_kanji_1() {
298         let segs = parse(b"\x81\x30");
299         assert_eq!(
300             segs,
301             vec![
302                 Segment {
303                     mode: Mode::Byte,
304                     begin: 0,
305                     end: 1
306                 },
307                 Segment {
308                     mode: Mode::Numeric,
309                     begin: 1,
310                     end: 2
311                 },
312             ]
313         );
314     }
315 
316     #[test]
test_not_kanji_2()317     fn test_not_kanji_2() {
318         // Note that it's implementation detail that the byte seq is split into
319         // two. Perhaps adjust the test to check for this.
320         let segs = parse(b"\xeb\xc0");
321         assert_eq!(
322             segs,
323             vec![
324                 Segment {
325                     mode: Mode::Byte,
326                     begin: 0,
327                     end: 1
328                 },
329                 Segment {
330                     mode: Mode::Byte,
331                     begin: 1,
332                     end: 2
333                 },
334             ]
335         );
336     }
337 
338     #[test]
test_not_kanji_3()339     fn test_not_kanji_3() {
340         let segs = parse(b"\x81\x7f");
341         assert_eq!(
342             segs,
343             vec![
344                 Segment {
345                     mode: Mode::Byte,
346                     begin: 0,
347                     end: 1
348                 },
349                 Segment {
350                     mode: Mode::Byte,
351                     begin: 1,
352                     end: 2
353                 },
354             ]
355         );
356     }
357 
358     #[test]
test_not_kanji_4()359     fn test_not_kanji_4() {
360         let segs = parse(b"\x81\x40\x81");
361         assert_eq!(
362             segs,
363             vec![
364                 Segment {
365                     mode: Mode::Kanji,
366                     begin: 0,
367                     end: 2
368                 },
369                 Segment {
370                     mode: Mode::Byte,
371                     begin: 2,
372                     end: 3
373                 },
374             ]
375         );
376     }
377 }
378 
379 /// Implementation of a shortest path algorithm in a graph where:
380 ///
381 /// * Graph nodes represent an encoding that covers an initial slice of segments and ends with a
382 ///   given `Mode`.  For an input of N segments, the graph will contain at most 4 * N nodes (maybe
383 ///   less, because some reencodings are not possible - e.g. it is not possible to reencode a
384 ///   `Byte` segment as `Numeric`).
385 /// * Graph edges connect encodings that cover N segments with encodings
386 ///   that cover 1 additional segment.  Because of possible `Mode` transitions
387 ///   nodes can have up to 4 incoming edges and up to 4 outgoing edges.
388 ///
389 /// The algorithm follows the relaxation approach of the
390 /// https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm, but considers the nodes and edges in a
391 /// fixed order (rather than using a priority queue).  This is possible because of the constrained,
392 /// structured shape of the graph we are working with.
393 mod shortest_path {
394     use super::Segment;
395     use crate::types::{Mode, Version};
396     use std::cmp::{Ordering, PartialOrd};
397 
398     #[derive(Clone)]
399     struct Predecessor {
400         index_of_segment: usize,
401         mode: Mode,
402     }
403 
404     #[derive(Clone)]
405     struct ShortestPath {
406         length: usize,
407         predecessor: Option<Predecessor>,
408     }
409 
410     pub struct AlgorithmState<'a> {
411         /// Stores shortest paths - see `get_shortest_path` and `get_path_index` for more details.
412         paths: Vec<Option<ShortestPath>>,
413 
414         /// Initial segmentation that we are trying to improve.
415         segments: &'a [Segment],
416 
417         /// QR code version that determines how many bits are needed to encode a given `Segment`.
418         version: Version,
419     }
420 
421     /// Finds the index into `AlgorithmState::paths` for a path that:
422     /// * Includes all the `0..=index_of_segment` segments from `AlgorithmState::segments`
423     ///   (this range is inclusive on both ends).
424     /// * Encodes the last segment (i.e. the one at `index_of_segment`) using `mode`
get_path_index(index_of_segment: usize, mode: Mode) -> usize425     fn get_path_index(index_of_segment: usize, mode: Mode) -> usize {
426         index_of_segment * Mode::ALL_MODES.len() + (mode as usize)
427     }
428 
429     impl<'a> AlgorithmState<'a> {
430         /// Gets the shortest path that has been computed earlier.
431         ///
432         /// * For more details about `ending_mode` and `index_of_segment` see the documentation
433         ///   of `get_path_index`
434         /// * For more details about the return value see the documentation of
435         ///   `compute_shortest_path`.
get_shortest_path( &self, index_of_segment: usize, ending_mode: Mode, ) -> &Option<ShortestPath>436         fn get_shortest_path(
437             &self,
438             index_of_segment: usize,
439             ending_mode: Mode,
440         ) -> &Option<ShortestPath> {
441             &self.paths[get_path_index(index_of_segment, ending_mode)]
442         }
443 
get_end_of_predecessor(&self, predecessor: &Option<Predecessor>) -> usize444         fn get_end_of_predecessor(&self, predecessor: &Option<Predecessor>) -> usize {
445             match predecessor {
446                 None => 0,
447                 Some(predecessor) => self.segments[predecessor.index_of_segment].end,
448             }
449         }
450 
451         /// Computes the shortest path that
452         /// * Includes all the segments in `self.segments[0..=index_of_segment]`
453         /// * Encodes the last segment (i.e. the one at `index_of_segment`) using `desired_mode`
454         ///
455         /// Assumes that shortest paths for all earlier segments (i.e. ones before
456         /// `index_of_segment) have already been computed and memoized in `self.paths`.
457         ///
458         /// Returns `None` if the segment at `index_of_segment` can't be encoded using
459         /// `desired_mode` (e.g. contents of `Mode::Byte` segment can't be reencoded using
460         /// `Mode::Numeric` - see also the documentation of `PartialOrd` impl for `Mode`).
compute_shortest_path( &self, index_of_segment: usize, desired_mode: Mode, ) -> Option<ShortestPath>461         fn compute_shortest_path(
462             &self,
463             index_of_segment: usize,
464             desired_mode: Mode,
465         ) -> Option<ShortestPath> {
466             let curr_segment = self.segments[index_of_segment];
467 
468             // Check if `curr_segment` can be reencoded using `desired_mode`.
469             match curr_segment.mode.partial_cmp(&desired_mode) {
470                 None | Some(Ordering::Greater) => return None,
471                 Some(Ordering::Less) | Some(Ordering::Equal) => (),
472             }
473 
474             let length_of_reencoding_curr_segment_in_desired_mode = {
475                 let reencoded_segment = Segment {
476                     begin: curr_segment.begin,
477                     end: curr_segment.end,
478                     mode: desired_mode,
479                 };
480                 reencoded_segment.encoded_len(self.version)
481             };
482 
483             // Handle the case when there are no predecessors.
484             if index_of_segment == 0 {
485                 return Some(ShortestPath {
486                     length: length_of_reencoding_curr_segment_in_desired_mode,
487                     predecessor: None,
488                 });
489             }
490 
491             // Find the predecessor with the best mode / compute the shortest path.
492             let prev_index = index_of_segment - 1;
493             Mode::ALL_MODES
494                 .iter()
495                 .filter_map(|&prev_mode| {
496                     self.get_shortest_path(prev_index, prev_mode)
497                         .as_ref()
498                         .map(|prev_path| (prev_mode, prev_path))
499                 })
500                 .map(|(prev_mode, prev_path)| {
501                     if prev_mode == desired_mode {
502                         let merged_length = {
503                             let merged_segment = Segment {
504                                 begin: self.get_end_of_predecessor(&prev_path.predecessor),
505                                 end: curr_segment.end,
506                                 mode: desired_mode,
507                             };
508                             merged_segment.encoded_len(self.version)
509                         };
510                         let length_up_to_merged_segment = match prev_path.predecessor.as_ref() {
511                             None => 0,
512                             Some(predecessor) => {
513                                 self.get_shortest_path(
514                                     predecessor.index_of_segment,
515                                     predecessor.mode,
516                                 )
517                                 .as_ref()
518                                 .expect("Predecessors should point at a valid path")
519                                 .length
520                             }
521                         };
522                         ShortestPath {
523                             length: length_up_to_merged_segment + merged_length,
524                             predecessor: prev_path.predecessor.clone(),
525                         }
526                     } else {
527                         ShortestPath {
528                             length: prev_path.length
529                                 + length_of_reencoding_curr_segment_in_desired_mode,
530                             predecessor: Some(Predecessor {
531                                 index_of_segment: prev_index,
532                                 mode: prev_mode,
533                             }),
534                         }
535                     }
536                 })
537                 .min_by_key(|path| path.length)
538         }
539 
540         /// Runs the shortest-path algorithm, fully populating `Self::paths`.
find_shortest_paths(segments: &'a [Segment], version: Version) -> Self541         pub fn find_shortest_paths(segments: &'a [Segment], version: Version) -> Self {
542             let mut this = AlgorithmState::<'a> {
543                 segments,
544                 paths: vec![None; segments.len() * Mode::ALL_MODES.len()],
545                 version,
546             };
547             for index_of_segment in 0..segments.len() {
548                 for &mode in Mode::ALL_MODES {
549                     this.paths[get_path_index(index_of_segment, mode)] =
550                         this.compute_shortest_path(index_of_segment, mode);
551                 }
552             }
553             this
554         }
555 
556         /// Constructs the best segmentation from prepopulated `self.paths`.
construct_best_segmentation(&self) -> Vec<Segment>557         pub fn construct_best_segmentation(&self) -> Vec<Segment> {
558             let mut result = Vec::new();
559 
560             // Start at the last segment (since we want the best encoding
561             // that covers all of the input segments) and find the mode that
562             // results in the shortest path for the last segment.
563             let mut curr_index_of_segment = self.segments.len() - 1;
564             let (mut best_mode, mut shortest_path) = Mode::ALL_MODES
565                 .iter()
566                 .filter_map(|&mode| {
567                     self.get_shortest_path(curr_index_of_segment, mode)
568                         .as_ref()
569                         .map(|shortest_path| (mode, shortest_path))
570                 })
571                 .min_by_key(|(_mode, path)| path.length)
572                 .expect("At least one path should always exist");
573 
574             // Work backwards to construct the shortest path based on the predecessor information.
575             loop {
576                 result.push(Segment {
577                     begin: self.get_end_of_predecessor(&shortest_path.predecessor),
578                     end: self.segments[curr_index_of_segment].end,
579                     mode: best_mode,
580                 });
581                 match shortest_path.predecessor.as_ref() {
582                     None => {
583                         result.reverse();
584                         return result;
585                     }
586                     Some(predecessor) => {
587                         curr_index_of_segment = predecessor.index_of_segment;
588                         best_mode = predecessor.mode;
589                         shortest_path = self
590                             .get_shortest_path(curr_index_of_segment, best_mode)
591                             .as_ref()
592                             .expect("Predecessors should point at valid paths");
593                     }
594                 };
595             }
596         }
597     }
598 }
599 
600 /// Panics if `segments` are not consecutive (i.e. if there are gaps, or overlaps, or if the
601 /// segments are not ordered by their `begin` field).
assert_valid_segmentation(segments: &[Segment])602 fn assert_valid_segmentation(segments: &[Segment]) {
603     if segments.is_empty() {
604         return;
605     }
606 
607     for segment in segments.iter() {
608         segment.assert_invariants();
609     }
610 
611     let consecutive_pairs = segments[..(segments.len() - 1)]
612         .iter()
613         .zip(segments[1..].iter());
614     for (prev, next) in consecutive_pairs {
615         assert_eq!(prev.end, next.begin, "Non-adjacent segments");
616     }
617 }
618 
619 /// Optimize the segments by combining segments when possible.
620 ///
621 /// Optimization considers all possible segmentations where each of the input `segments`
622 /// may have been reencoded into a different mode (and where adjacent, same-mode segments are
623 /// merged).  The shortest of the considered segmentations is returned.  The implementation uses a
624 /// shortest-path algorithm that runs in O(N) time and uses additional O(N) memory.
625 ///
626 /// This function may panic if `segments` do not represent a valid segmentation
627 /// (e.g. if there are gaps between segments, if the segments overlap, etc.).
optimize_segmentation(segments: &[Segment], version: Version) -> Vec<Segment>628 pub fn optimize_segmentation(segments: &[Segment], version: Version) -> Vec<Segment> {
629     assert_valid_segmentation(segments);
630     if segments.is_empty() {
631         return Vec::new();
632     }
633 
634     let optimized_segments = shortest_path::AlgorithmState::find_shortest_paths(segments, version)
635         .construct_best_segmentation();
636 
637     #[cfg(fuzzing)]
638     optimize_test_helpers::assert_valid_optimization(&segments, &*optimized_segments, version);
639 
640     optimized_segments
641 }
642 
643 /// Computes the total encoded length of all segments.
total_encoded_len<I>(segments: I, version: Version) -> usize where I: IntoIterator, I::Item: Borrow<Segment>,644 pub fn total_encoded_len<I>(segments: I, version: Version) -> usize
645 where
646     I: IntoIterator,
647     I::Item: Borrow<Segment>,
648 {
649     segments
650         .into_iter()
651         .map(|seg| seg.borrow().encoded_len(version))
652         .sum()
653 }
654 
655 #[cfg(any(fuzzing, test))]
656 mod optimize_test_helpers {
657     use super::{assert_valid_segmentation, total_encoded_len, Segment};
658     use crate::types::{Mode, Version};
659     use std::cmp::Ordering;
660 
661     /// Panics if there exists an input that can be represented by `given` segments,
662     /// but that cannot be represented by `opt` segments.
assert_segmentation_equivalence(given: &[Segment], opt: &[Segment])663     pub fn assert_segmentation_equivalence(given: &[Segment], opt: &[Segment]) {
664         if given.is_empty() {
665             assert!(opt.is_empty());
666             return;
667         }
668         let begin = given.first().unwrap().begin;
669         let end = given.last().unwrap().end;
670 
671         // Verify that `opt` covers the same range as `given`.
672         // (This assumes that contiguous coverage has already been verified by
673         // `assert_valid_segmentation`.)
674         assert!(!opt.is_empty());
675         assert_eq!(begin, opt.first().unwrap().begin);
676         assert_eq!(end, opt.last().unwrap().end);
677 
678         // Verify that for each character, `opt` can represent all the characters that may be
679         // present in `given`.
680         for i in begin..end {
681             fn find_mode(segments: &[Segment], i: usize) {
682                 segments
683                     .iter()
684                     .filter(|s| (s.begin <= i) && (i < s.end))
685                     .next()
686                     .expect("Expecting exactly one segment")
687                     .mode;
688             }
689             let given_mode = find_mode(&*given, i);
690             let opt_mode = find_mode(&*given, i);
691             match given_mode.partial_cmp(&opt_mode) {
692                 Some(Ordering::Less) | Some(Ordering::Equal) => (),
693                 _ => panic!(
694                     "Character #{} is {:?}, which {:?} may not represent",
695                     i, given_mode, opt_mode,
696                 ),
697             }
698         }
699     }
700 
701     /// Panics if `optimized` is not an improved representation of `input`.
assert_valid_optimization(input: &[Segment], optimized: &[Segment], version: Version)702     pub fn assert_valid_optimization(input: &[Segment], optimized: &[Segment], version: Version) {
703         assert_valid_segmentation(input);
704         assert_valid_segmentation(optimized);
705         assert_segmentation_equivalence(input, optimized);
706 
707         let input_len = total_encoded_len(input, version);
708         let optimized_len = total_encoded_len(optimized, version);
709         assert!(optimized_len <= input_len);
710 
711         let single_bytes_segment_len = if input.is_empty() {
712             0
713         } else {
714             Segment {
715                 begin: input.first().unwrap().begin,
716                 end: input.last().unwrap().end,
717                 mode: Mode::Byte,
718             }
719             .encoded_len(version)
720         };
721         assert!(optimized_len <= single_bytes_segment_len);
722     }
723 }
724 
725 #[cfg(test)]
726 mod optimize_tests {
727     use super::optimize_test_helpers::*;
728     use super::{assert_valid_segmentation, optimize_segmentation, total_encoded_len, Segment};
729     use crate::types::{Mode, Version};
730     use std::cmp::Ordering;
731 
test_optimization_result(given: Vec<Segment>, expected: Vec<Segment>, version: Version)732     fn test_optimization_result(given: Vec<Segment>, expected: Vec<Segment>, version: Version) {
733         // Verify that the test input is valid.
734         assert_valid_segmentation(&given);
735 
736         // Verify that the test expectations are compatible with the test input.
737         assert_valid_segmentation(&expected);
738         assert_segmentation_equivalence(&given, &expected);
739 
740         let opt_segs = optimize_segmentation(given.as_slice(), version);
741         assert_valid_optimization(&given, &opt_segs, version);
742 
743         // Verify that optimization produces result that is as short as `expected`.
744         if opt_segs != expected {
745             let actual_len = total_encoded_len(&opt_segs, version);
746             let expected_len = total_encoded_len(&expected, version);
747             let msg = match actual_len.cmp(&expected_len) {
748                 Ordering::Less => "Optimization gave something better than expected",
749                 Ordering::Equal => "Optimization gave something different, but just as short",
750                 Ordering::Greater => "Optimization gave something worse than expected",
751             };
752             panic!(
753                 "{}: expected_len={}; actual_len={}; opt_segs=({:?})",
754                 msg, expected_len, actual_len, opt_segs
755             );
756         }
757     }
758 
759     #[test]
test_example_1()760     fn test_example_1() {
761         test_optimization_result(
762             vec![
763                 Segment {
764                     mode: Mode::Alphanumeric,
765                     begin: 0,
766                     end: 3,
767                 },
768                 Segment {
769                     mode: Mode::Numeric,
770                     begin: 3,
771                     end: 6,
772                 },
773                 Segment {
774                     mode: Mode::Byte,
775                     begin: 6,
776                     end: 10,
777                 },
778             ],
779             vec![
780                 Segment {
781                     mode: Mode::Alphanumeric,
782                     begin: 0,
783                     end: 6,
784                 },
785                 Segment {
786                     mode: Mode::Byte,
787                     begin: 6,
788                     end: 10,
789                 },
790             ],
791             Version::Normal(1),
792         );
793     }
794 
795     #[test]
test_example_2()796     fn test_example_2() {
797         test_optimization_result(
798             vec![
799                 Segment {
800                     mode: Mode::Numeric,
801                     begin: 0,
802                     end: 29,
803                 },
804                 Segment {
805                     mode: Mode::Alphanumeric,
806                     begin: 29,
807                     end: 30,
808                 },
809                 Segment {
810                     mode: Mode::Numeric,
811                     begin: 30,
812                     end: 32,
813                 },
814                 Segment {
815                     mode: Mode::Alphanumeric,
816                     begin: 32,
817                     end: 35,
818                 },
819                 Segment {
820                     mode: Mode::Numeric,
821                     begin: 35,
822                     end: 38,
823                 },
824             ],
825             vec![
826                 Segment {
827                     mode: Mode::Numeric,
828                     begin: 0,
829                     end: 29,
830                 },
831                 Segment {
832                     mode: Mode::Alphanumeric,
833                     begin: 29,
834                     end: 38,
835                 },
836             ],
837             Version::Normal(9),
838         );
839     }
840 
841     #[test]
test_example_3()842     fn test_example_3() {
843         test_optimization_result(
844             vec![
845                 Segment {
846                     mode: Mode::Kanji,
847                     begin: 0,
848                     end: 4,
849                 },
850                 Segment {
851                     mode: Mode::Alphanumeric,
852                     begin: 4,
853                     end: 5,
854                 },
855                 Segment {
856                     mode: Mode::Byte,
857                     begin: 5,
858                     end: 6,
859                 },
860                 Segment {
861                     mode: Mode::Kanji,
862                     begin: 6,
863                     end: 8,
864                 },
865             ],
866             vec![Segment {
867                 mode: Mode::Byte,
868                 begin: 0,
869                 end: 8,
870             }],
871             Version::Normal(1),
872         );
873     }
874 
875     #[test]
test_example_4()876     fn test_example_4() {
877         test_optimization_result(
878             vec![
879                 Segment {
880                     mode: Mode::Kanji,
881                     begin: 0,
882                     end: 10,
883                 },
884                 Segment {
885                     mode: Mode::Byte,
886                     begin: 10,
887                     end: 11,
888                 },
889             ],
890             vec![
891                 Segment {
892                     mode: Mode::Kanji,
893                     begin: 0,
894                     end: 10,
895                 },
896                 Segment {
897                     mode: Mode::Byte,
898                     begin: 10,
899                     end: 11,
900                 },
901             ],
902             Version::Normal(1),
903         );
904     }
905 
906     #[test]
test_empty()907     fn test_empty() {
908         test_optimization_result(vec![], vec![], Version::Normal(1));
909     }
910 
911     /// This example shows that greedy merging strategy (used by `qr_code` v1.1.0) can give
912     /// suboptimal results for the following input: `"bytesbytesA1B2C3D4E5"`.  The greedy strategy
913     /// will always consider it (locally) beneficial to merge the initial `Mode::Byte` segment with
914     /// the subsequent single-char segment - this will result in representing the whole input
915     /// in a single `Mode::Byte` segment.  Better segmentation can be done by first merging all the
916     /// `Mode::Alphanumeric` and `Mode::Numeric` segments.
917     #[test]
test_example_where_greedy_merging_is_suboptimal()918     fn test_example_where_greedy_merging_is_suboptimal() {
919         let mut input = vec![Segment {
920             mode: Mode::Byte,
921             begin: 0,
922             end: 10,
923         }];
924         for _ in 0..5 {
925             for &mode in [Mode::Alphanumeric, Mode::Numeric].iter() {
926                 let begin = input.last().unwrap().end;
927                 let end = begin + 1;
928                 input.push(Segment { mode, begin, end });
929             }
930         }
931         test_optimization_result(
932             input,
933             vec![
934                 Segment {
935                     mode: Mode::Byte,
936                     begin: 0,
937                     end: 10,
938                 },
939                 Segment {
940                     mode: Mode::Alphanumeric,
941                     begin: 10,
942                     end: 20,
943                 },
944             ],
945             Version::Normal(40),
946         );
947     }
948 
949     /// In this example merging 2 consecutive segments is always harmful, but merging many segments
950     /// may be beneficial.  This is because merging more than 2 segments saves additional header
951     /// bytes.
952     #[test]
test_example_where_merging_two_consecutive_segments_is_always_harmful()953     fn test_example_where_merging_two_consecutive_segments_is_always_harmful() {
954         let mut input = vec![];
955         for _ in 0..5 {
956             for &mode in [Mode::Byte, Mode::Alphanumeric].iter() {
957                 let begin = input.last().map(|s: &Segment| s.end).unwrap_or_default();
958                 let end = begin + 10;
959                 input.push(Segment { mode, begin, end });
960             }
961         }
962         test_optimization_result(
963             input,
964             vec![
965                 Segment {
966                     mode: Mode::Byte,
967                     begin: 0,
968                     end: 90,
969                 },
970                 Segment {
971                     mode: Mode::Alphanumeric,
972                     begin: 90,
973                     end: 100,
974                 },
975             ],
976             Version::Normal(40),
977         );
978     }
979 
980     #[test]
test_optimize_base64()981     fn test_optimize_base64() {
982         let input: &[u8] = include_bytes!("../test_data/large_base64.in");
983         let input: Vec<Segment> = super::Parser::new(input).collect();
984         test_optimization_result(
985             input,
986             vec![
987                 Segment {
988                     mode: Mode::Byte,
989                     begin: 0,
990                     end: 334,
991                 },
992                 Segment {
993                     mode: Mode::Alphanumeric,
994                     begin: 334,
995                     end: 349,
996                 },
997                 Segment {
998                     mode: Mode::Byte,
999                     begin: 349,
1000                     end: 387,
1001                 },
1002                 Segment {
1003                     mode: Mode::Alphanumeric,
1004                     begin: 387,
1005                     end: 402,
1006                 },
1007                 Segment {
1008                     mode: Mode::Byte,
1009                     begin: 402,
1010                     end: 850,
1011                 },
1012                 Segment {
1013                     mode: Mode::Alphanumeric,
1014                     begin: 850,
1015                     end: 866,
1016                 },
1017                 Segment {
1018                     mode: Mode::Byte,
1019                     begin: 866,
1020                     end: 1146,
1021                 },
1022                 Segment {
1023                     mode: Mode::Alphanumeric,
1024                     begin: 1146,
1025                     end: 1162,
1026                 },
1027                 Segment {
1028                     mode: Mode::Byte,
1029                     begin: 1162,
1030                     end: 2474,
1031                 },
1032                 Segment {
1033                     mode: Mode::Alphanumeric,
1034                     begin: 2474,
1035                     end: 2489,
1036                 },
1037                 Segment {
1038                     mode: Mode::Byte,
1039                     begin: 2489,
1040                     end: 2618,
1041                 },
1042                 Segment {
1043                     mode: Mode::Alphanumeric,
1044                     begin: 2618,
1045                     end: 2641,
1046                 },
1047                 Segment {
1048                     mode: Mode::Byte,
1049                     begin: 2641,
1050                     end: 2707,
1051                 },
1052                 Segment {
1053                     mode: Mode::Alphanumeric,
1054                     begin: 2707,
1055                     end: 2722,
1056                 },
1057                 Segment {
1058                     mode: Mode::Byte,
1059                     begin: 2722,
1060                     end: 2880,
1061                 },
1062             ],
1063             Version::Normal(40),
1064         );
1065     }
1066 
1067     #[test]
test_annex_j_guideline_1a()1068     fn test_annex_j_guideline_1a() {
1069         test_optimization_result(
1070             vec![
1071                 Segment {
1072                     mode: Mode::Numeric,
1073                     begin: 0,
1074                     end: 3,
1075                 },
1076                 Segment {
1077                     mode: Mode::Alphanumeric,
1078                     begin: 3,
1079                     end: 4,
1080                 },
1081             ],
1082             vec![
1083                 Segment {
1084                     mode: Mode::Numeric,
1085                     begin: 0,
1086                     end: 3,
1087                 },
1088                 Segment {
1089                     mode: Mode::Alphanumeric,
1090                     begin: 3,
1091                     end: 4,
1092                 },
1093             ],
1094             Version::Micro(2),
1095         );
1096     }
1097 
1098     #[test]
test_annex_j_guideline_1b()1099     fn test_annex_j_guideline_1b() {
1100         test_optimization_result(
1101             vec![
1102                 Segment {
1103                     mode: Mode::Numeric,
1104                     begin: 0,
1105                     end: 2,
1106                 },
1107                 Segment {
1108                     mode: Mode::Alphanumeric,
1109                     begin: 2,
1110                     end: 4,
1111                 },
1112             ],
1113             vec![Segment {
1114                 mode: Mode::Alphanumeric,
1115                 begin: 0,
1116                 end: 4,
1117             }],
1118             Version::Micro(2),
1119         );
1120     }
1121 
1122     #[test]
test_annex_j_guideline_1c()1123     fn test_annex_j_guideline_1c() {
1124         test_optimization_result(
1125             vec![
1126                 Segment {
1127                     mode: Mode::Numeric,
1128                     begin: 0,
1129                     end: 3,
1130                 },
1131                 Segment {
1132                     mode: Mode::Alphanumeric,
1133                     begin: 3,
1134                     end: 4,
1135                 },
1136             ],
1137             vec![Segment {
1138                 mode: Mode::Alphanumeric,
1139                 begin: 0,
1140                 end: 4,
1141             }],
1142             Version::Micro(3),
1143         );
1144     }
1145 }
1146 
1147 #[cfg(bench)]
1148 mod bench {
1149     use super::{optimize_segmentation, total_encoded_len, Parser, Segment};
1150     use crate::Version;
1151 
bench_optimize(data: &[u8], version: Version, bencher: &mut test::Bencher)1152     fn bench_optimize(data: &[u8], version: Version, bencher: &mut test::Bencher) {
1153         bencher.iter(|| {
1154             let segments: Vec<Segment> = Parser::new(data).collect();
1155             let segments = optimize_segmentation(segments.as_slice(), version);
1156             test::black_box(total_encoded_len(segments, version))
1157         });
1158     }
1159 
1160     #[bench]
bench_optimize_example1(bencher: &mut test::Bencher)1161     fn bench_optimize_example1(bencher: &mut test::Bencher) {
1162         let data = b"QR\x83R\x81[\x83h\x81i\x83L\x83\x85\x81[\x83A\x81[\x83\x8b\x83R\x81[\x83h\x81j\
1163                      \x82\xc6\x82\xcd\x81A1994\x94N\x82\xc9\x83f\x83\x93\x83\\\x81[\x82\xcc\x8aJ\
1164                      \x94\xad\x95\x94\x96\xe5\x81i\x8c\xbb\x8d\xdd\x82\xcd\x95\xaa\x97\xa3\x82\xb5\x83f\
1165                      \x83\x93\x83\\\x81[\x83E\x83F\x81[\x83u\x81j\x82\xaa\x8aJ\x94\xad\x82\xb5\x82\xbd\
1166                      \x83}\x83g\x83\x8a\x83b\x83N\x83X\x8c^\x93\xf1\x8e\x9f\x8c\xb3\x83R\x81[\x83h\
1167                      \x82\xc5\x82\xa0\x82\xe9\x81B\x82\xc8\x82\xa8\x81AQR\x83R\x81[\x83h\x82\xc6\
1168                      \x82\xa2\x82\xa4\x96\xbc\x8f\xcc\x81i\x82\xa8\x82\xe6\x82\xd1\x92P\x8c\xea\x81j\
1169                      \x82\xcd\x83f\x83\x93\x83\\\x81[\x83E\x83F\x81[\x83u\x82\xcc\x93o\x98^\x8f\xa4\
1170                      \x95W\x81i\x91\xe64075066\x8d\x86\x81j\x82\xc5\x82\xa0\x82\xe9\x81BQR\x82\xcd\
1171                      Quick Response\x82\xc9\x97R\x97\x88\x82\xb5\x81A\x8d\x82\x91\xac\x93\xc7\x82\xdd\
1172                      \x8e\xe6\x82\xe8\x82\xaa\x82\xc5\x82\xab\x82\xe9\x82\xe6\x82\xa4\x82\xc9\x8aJ\
1173                      \x94\xad\x82\xb3\x82\xea\x82\xbd\x81B\x93\x96\x8f\x89\x82\xcd\x8e\xa9\x93\xae\
1174                      \x8e\xd4\x95\x94\x95i\x8dH\x8f\xea\x82\xe2\x94z\x91\x97\x83Z\x83\x93\x83^\x81[\
1175                      \x82\xc8\x82\xc7\x82\xc5\x82\xcc\x8eg\x97p\x82\xf0\x94O\x93\xaa\x82\xc9\x8aJ\
1176                      \x94\xad\x82\xb3\x82\xea\x82\xbd\x82\xaa\x81A\x8c\xbb\x8d\xdd\x82\xc5\x82\xcd\x83X\
1177                      \x83}\x81[\x83g\x83t\x83H\x83\x93\x82\xcc\x95\x81\x8by\x82\xc8\x82\xc7\x82\xc9\
1178                      \x82\xe6\x82\xe8\x93\xfa\x96{\x82\xc9\x8c\xc0\x82\xe7\x82\xb8\x90\xa2\x8aE\x93I\
1179                      \x82\xc9\x95\x81\x8by\x82\xb5\x82\xc4\x82\xa2\x82\xe9\x81B";
1180         bench_optimize(data, Version::Normal(15), bencher);
1181     }
1182 
1183     #[bench]
bench_optimize_base64(bencher: &mut test::Bencher)1184     fn bench_optimize_base64(bencher: &mut test::Bencher) {
1185         let data = include_bytes!("../test_data/large_base64.in");
1186         bench_optimize(data, Version::Normal(40), bencher);
1187     }
1188 }
1189 
1190 //}}}
1191 //------------------------------------------------------------------------------
1192 //{{{ Internal types and data for parsing
1193 
1194 /// All values of `u8` can be split into 9 different character sets when
1195 /// determining which encoding to use. This enum represents these groupings for
1196 /// parsing purpose.
1197 #[derive(Copy, Clone)]
1198 enum ExclCharSet {
1199     /// The end of string.
1200     End = 0,
1201 
1202     /// All symbols supported by the Alphanumeric encoding, i.e. space, `$`, `%`,
1203     /// `*`, `+`, `-`, `.`, `/` and `:`.
1204     Symbol = 1,
1205 
1206     /// All numbers (0–9).
1207     Numeric = 2,
1208 
1209     /// All uppercase letters (A–Z). These characters may also appear in the
1210     /// second byte of a Shift JIS 2-byte encoding.
1211     Alpha = 3,
1212 
1213     /// The first byte of a Shift JIS 2-byte encoding, in the range 0x81–0x9f.
1214     KanjiHi1 = 4,
1215 
1216     /// The first byte of a Shift JIS 2-byte encoding, in the range 0xe0–0xea.
1217     KanjiHi2 = 5,
1218 
1219     /// The first byte of a Shift JIS 2-byte encoding, of value 0xeb. This is
1220     /// different from the other two range that the second byte has a smaller
1221     /// range.
1222     KanjiHi3 = 6,
1223 
1224     /// The second byte of a Shift JIS 2-byte encoding, in the range 0x40–0xbf,
1225     /// excluding letters (covered by `Alpha`), 0x81–0x9f (covered by `KanjiHi1`),
1226     /// and the invalid byte 0x7f.
1227     KanjiLo1 = 7,
1228 
1229     /// The second byte of a Shift JIS 2-byte encoding, in the range 0xc0–0xfc,
1230     /// excluding the range 0xe0–0xeb (covered by `KanjiHi2` and `KanjiHi3`).
1231     /// This half of byte-pair cannot appear as the second byte leaded by
1232     /// `KanjiHi3`.
1233     KanjiLo2 = 8,
1234 
1235     /// Any other values not covered by the above character sets.
1236     Byte = 9,
1237 }
1238 
1239 impl ExclCharSet {
1240     /// Determines which character set a byte is in.
from_u8(c: u8) -> Self1241     fn from_u8(c: u8) -> Self {
1242         match c {
1243             0x20 | 0x24 | 0x25 | 0x2a | 0x2b | 0x2d..=0x2f | 0x3a => ExclCharSet::Symbol,
1244             0x30..=0x39 => ExclCharSet::Numeric,
1245             0x41..=0x5a => ExclCharSet::Alpha,
1246             0x81..=0x9f => ExclCharSet::KanjiHi1,
1247             0xe0..=0xea => ExclCharSet::KanjiHi2,
1248             0xeb => ExclCharSet::KanjiHi3,
1249             0x40 | 0x5b..=0x7e | 0x80 | 0xa0..=0xbf => ExclCharSet::KanjiLo1,
1250             0xc0..=0xdf | 0xec..=0xfc => ExclCharSet::KanjiLo2,
1251             _ => ExclCharSet::Byte,
1252         }
1253     }
1254 }
1255 
1256 /// The current parsing state.
1257 #[derive(Copy, Clone)]
1258 enum State {
1259     /// Just initialized.
1260     Init = 0,
1261 
1262     /// Inside a string that can be exclusively encoded as Numeric.
1263     Numeric = 10,
1264 
1265     /// Inside a string that can be exclusively encoded as Alphanumeric.
1266     Alpha = 20,
1267 
1268     /// Inside a string that can be exclusively encoded as 8-Bit Byte.
1269     Byte = 30,
1270 
1271     /// Just encountered the first byte of a Shift JIS 2-byte sequence of the
1272     /// set `KanjiHi1` or `KanjiHi2`.
1273     KanjiHi12 = 40,
1274 
1275     /// Just encountered the first byte of a Shift JIS 2-byte sequence of the
1276     /// set `KanjiHi3`.
1277     KanjiHi3 = 50,
1278 
1279     /// Inside a string that can be exclusively encoded as Kanji.
1280     Kanji = 60,
1281 }
1282 
1283 /// What should the parser do after a state transition.
1284 #[derive(Copy, Clone)]
1285 enum Action {
1286     /// The parser should do nothing.
1287     Idle,
1288 
1289     /// Push the current segment as a Numeric string, and reset the marks.
1290     Numeric,
1291 
1292     /// Push the current segment as an Alphanumeric string, and reset the marks.
1293     Alpha,
1294 
1295     /// Push the current segment as a 8-Bit Byte string, and reset the marks.
1296     Byte,
1297 
1298     /// Push the current segment as a Kanji string, and reset the marks.
1299     Kanji,
1300 
1301     /// Push the current segment excluding the last byte as a Kanji string, then
1302     /// push the remaining single byte as a Byte string, and reset the marks.
1303     KanjiAndSingleByte,
1304 }
1305 
1306 static STATE_TRANSITION: [(State, Action); 70] = [
1307     // STATE_TRANSITION[current_state + next_character] == (next_state, what_to_do)
1308 
1309     // Init state:
1310     (State::Init, Action::Idle),      // End
1311     (State::Alpha, Action::Idle),     // Symbol
1312     (State::Numeric, Action::Idle),   // Numeric
1313     (State::Alpha, Action::Idle),     // Alpha
1314     (State::KanjiHi12, Action::Idle), // KanjiHi1
1315     (State::KanjiHi12, Action::Idle), // KanjiHi2
1316     (State::KanjiHi3, Action::Idle),  // KanjiHi3
1317     (State::Byte, Action::Idle),      // KanjiLo1
1318     (State::Byte, Action::Idle),      // KanjiLo2
1319     (State::Byte, Action::Idle),      // Byte
1320     // Numeric state:
1321     (State::Init, Action::Numeric),      // End
1322     (State::Alpha, Action::Numeric),     // Symbol
1323     (State::Numeric, Action::Idle),      // Numeric
1324     (State::Alpha, Action::Numeric),     // Alpha
1325     (State::KanjiHi12, Action::Numeric), // KanjiHi1
1326     (State::KanjiHi12, Action::Numeric), // KanjiHi2
1327     (State::KanjiHi3, Action::Numeric),  // KanjiHi3
1328     (State::Byte, Action::Numeric),      // KanjiLo1
1329     (State::Byte, Action::Numeric),      // KanjiLo2
1330     (State::Byte, Action::Numeric),      // Byte
1331     // Alpha state:
1332     (State::Init, Action::Alpha),      // End
1333     (State::Alpha, Action::Idle),      // Symbol
1334     (State::Numeric, Action::Alpha),   // Numeric
1335     (State::Alpha, Action::Idle),      // Alpha
1336     (State::KanjiHi12, Action::Alpha), // KanjiHi1
1337     (State::KanjiHi12, Action::Alpha), // KanjiHi2
1338     (State::KanjiHi3, Action::Alpha),  // KanjiHi3
1339     (State::Byte, Action::Alpha),      // KanjiLo1
1340     (State::Byte, Action::Alpha),      // KanjiLo2
1341     (State::Byte, Action::Alpha),      // Byte
1342     // Byte state:
1343     (State::Init, Action::Byte),      // End
1344     (State::Alpha, Action::Byte),     // Symbol
1345     (State::Numeric, Action::Byte),   // Numeric
1346     (State::Alpha, Action::Byte),     // Alpha
1347     (State::KanjiHi12, Action::Byte), // KanjiHi1
1348     (State::KanjiHi12, Action::Byte), // KanjiHi2
1349     (State::KanjiHi3, Action::Byte),  // KanjiHi3
1350     (State::Byte, Action::Idle),      // KanjiLo1
1351     (State::Byte, Action::Idle),      // KanjiLo2
1352     (State::Byte, Action::Idle),      // Byte
1353     // KanjiHi12 state:
1354     (State::Init, Action::KanjiAndSingleByte),    // End
1355     (State::Alpha, Action::KanjiAndSingleByte),   // Symbol
1356     (State::Numeric, Action::KanjiAndSingleByte), // Numeric
1357     (State::Kanji, Action::Idle),                 // Alpha
1358     (State::Kanji, Action::Idle),                 // KanjiHi1
1359     (State::Kanji, Action::Idle),                 // KanjiHi2
1360     (State::Kanji, Action::Idle),                 // KanjiHi3
1361     (State::Kanji, Action::Idle),                 // KanjiLo1
1362     (State::Kanji, Action::Idle),                 // KanjiLo2
1363     (State::Byte, Action::KanjiAndSingleByte),    // Byte
1364     // KanjiHi3 state:
1365     (State::Init, Action::KanjiAndSingleByte),      // End
1366     (State::Alpha, Action::KanjiAndSingleByte),     // Symbol
1367     (State::Numeric, Action::KanjiAndSingleByte),   // Numeric
1368     (State::Kanji, Action::Idle),                   // Alpha
1369     (State::Kanji, Action::Idle),                   // KanjiHi1
1370     (State::KanjiHi12, Action::KanjiAndSingleByte), // KanjiHi2
1371     (State::KanjiHi3, Action::KanjiAndSingleByte),  // KanjiHi3
1372     (State::Kanji, Action::Idle),                   // KanjiLo1
1373     (State::Byte, Action::KanjiAndSingleByte),      // KanjiLo2
1374     (State::Byte, Action::KanjiAndSingleByte),      // Byte
1375     // Kanji state:
1376     (State::Init, Action::Kanji),     // End
1377     (State::Alpha, Action::Kanji),    // Symbol
1378     (State::Numeric, Action::Kanji),  // Numeric
1379     (State::Alpha, Action::Kanji),    // Alpha
1380     (State::KanjiHi12, Action::Idle), // KanjiHi1
1381     (State::KanjiHi12, Action::Idle), // KanjiHi2
1382     (State::KanjiHi3, Action::Idle),  // KanjiHi3
1383     (State::Byte, Action::Kanji),     // KanjiLo1
1384     (State::Byte, Action::Kanji),     // KanjiLo2
1385     (State::Byte, Action::Kanji),     // Byte
1386 ];
1387 
1388 //}}}
1389