1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 #define UNISTR_FROM_CHAR_EXPLICIT
12
13 #include "uassert.h"
14 #include "number_patternstring.h"
15 #include "unicode/utf16.h"
16 #include "number_utils.h"
17 #include "number_roundingutils.h"
18 #include "number_mapper.h"
19
20 using namespace icu;
21 using namespace icu::number;
22 using namespace icu::number::impl;
23
24
parseToPatternInfo(const UnicodeString & patternString,ParsedPatternInfo & patternInfo,UErrorCode & status)25 void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
26 UErrorCode& status) {
27 patternInfo.consumePattern(patternString, status);
28 }
29
30 DecimalFormatProperties
parseToProperties(const UnicodeString & pattern,IgnoreRounding ignoreRounding,UErrorCode & status)31 PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
32 UErrorCode& status) {
33 DecimalFormatProperties properties;
34 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
35 return properties;
36 }
37
parseToProperties(const UnicodeString & pattern,UErrorCode & status)38 DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern,
39 UErrorCode& status) {
40 return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status);
41 }
42
43 void
parseToExistingProperties(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)44 PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
45 IgnoreRounding ignoreRounding, UErrorCode& status) {
46 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
47 }
48
49
charAt(int32_t flags,int32_t index) const50 char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
51 const Endpoints& endpoints = getEndpoints(flags);
52 if (index < 0 || index >= endpoints.end - endpoints.start) {
53 UPRV_UNREACHABLE_EXIT;
54 }
55 return pattern.charAt(endpoints.start + index);
56 }
57
length(int32_t flags) const58 int32_t ParsedPatternInfo::length(int32_t flags) const {
59 return getLengthFromEndpoints(getEndpoints(flags));
60 }
61
getLengthFromEndpoints(const Endpoints & endpoints)62 int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
63 return endpoints.end - endpoints.start;
64 }
65
getString(int32_t flags) const66 UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
67 const Endpoints& endpoints = getEndpoints(flags);
68 if (endpoints.start == endpoints.end) {
69 return {};
70 }
71 // Create a new UnicodeString
72 return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
73 }
74
getEndpoints(int32_t flags) const75 const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
76 bool prefix = (flags & AFFIX_PREFIX) != 0;
77 bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
78 bool padding = (flags & AFFIX_PADDING) != 0;
79 if (isNegative && padding) {
80 return negative.paddingEndpoints;
81 } else if (padding) {
82 return positive.paddingEndpoints;
83 } else if (prefix && isNegative) {
84 return negative.prefixEndpoints;
85 } else if (prefix) {
86 return positive.prefixEndpoints;
87 } else if (isNegative) {
88 return negative.suffixEndpoints;
89 } else {
90 return positive.suffixEndpoints;
91 }
92 }
93
positiveHasPlusSign() const94 bool ParsedPatternInfo::positiveHasPlusSign() const {
95 return positive.hasPlusSign;
96 }
97
hasNegativeSubpattern() const98 bool ParsedPatternInfo::hasNegativeSubpattern() const {
99 return fHasNegativeSubpattern;
100 }
101
negativeHasMinusSign() const102 bool ParsedPatternInfo::negativeHasMinusSign() const {
103 return negative.hasMinusSign;
104 }
105
hasCurrencySign() const106 bool ParsedPatternInfo::hasCurrencySign() const {
107 return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
108 }
109
containsSymbolType(AffixPatternType type,UErrorCode & status) const110 bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
111 return AffixUtils::containsType(pattern, type, status);
112 }
113
hasBody() const114 bool ParsedPatternInfo::hasBody() const {
115 return positive.integerTotal > 0;
116 }
117
currencyAsDecimal() const118 bool ParsedPatternInfo::currencyAsDecimal() const {
119 return positive.hasCurrencyDecimal;
120 }
121
122 /////////////////////////////////////////////////////
123 /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION ///
124 /////////////////////////////////////////////////////
125
peek()126 UChar32 ParsedPatternInfo::ParserState::peek() {
127 if (offset == pattern.length()) {
128 return -1;
129 } else {
130 return pattern.char32At(offset);
131 }
132 }
133
peek2()134 UChar32 ParsedPatternInfo::ParserState::peek2() {
135 if (offset == pattern.length()) {
136 return -1;
137 }
138 int32_t cp1 = pattern.char32At(offset);
139 int32_t offset2 = offset + U16_LENGTH(cp1);
140 if (offset2 == pattern.length()) {
141 return -1;
142 }
143 return pattern.char32At(offset2);
144 }
145
next()146 UChar32 ParsedPatternInfo::ParserState::next() {
147 int32_t codePoint = peek();
148 offset += U16_LENGTH(codePoint);
149 return codePoint;
150 }
151
consumePattern(const UnicodeString & patternString,UErrorCode & status)152 void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
153 if (U_FAILURE(status)) { return; }
154 this->pattern = patternString;
155
156 // This class is not intended for writing twice!
157 // Use move assignment to overwrite instead.
158 U_ASSERT(state.offset == 0);
159
160 // pattern := subpattern (';' subpattern)?
161 currentSubpattern = &positive;
162 consumeSubpattern(status);
163 if (U_FAILURE(status)) { return; }
164 if (state.peek() == u';') {
165 state.next(); // consume the ';'
166 // Don't consume the negative subpattern if it is empty (trailing ';')
167 if (state.peek() != -1) {
168 fHasNegativeSubpattern = true;
169 currentSubpattern = &negative;
170 consumeSubpattern(status);
171 if (U_FAILURE(status)) { return; }
172 }
173 }
174 if (state.peek() != -1) {
175 state.toParseException(u"Found unquoted special character");
176 status = U_UNQUOTED_SPECIAL;
177 }
178 }
179
consumeSubpattern(UErrorCode & status)180 void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
181 // subpattern := literals? number exponent? literals?
182 consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
183 if (U_FAILURE(status)) { return; }
184 consumeAffix(currentSubpattern->prefixEndpoints, status);
185 if (U_FAILURE(status)) { return; }
186 consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status);
187 if (U_FAILURE(status)) { return; }
188 consumeFormat(status);
189 if (U_FAILURE(status)) { return; }
190 consumeExponent(status);
191 if (U_FAILURE(status)) { return; }
192 consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status);
193 if (U_FAILURE(status)) { return; }
194 consumeAffix(currentSubpattern->suffixEndpoints, status);
195 if (U_FAILURE(status)) { return; }
196 consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status);
197 if (U_FAILURE(status)) { return; }
198 }
199
consumePadding(PadPosition paddingLocation,UErrorCode & status)200 void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
201 if (state.peek() != u'*') {
202 return;
203 }
204 if (currentSubpattern->hasPadding) {
205 state.toParseException(u"Cannot have multiple pad specifiers");
206 status = U_MULTIPLE_PAD_SPECIFIERS;
207 return;
208 }
209 currentSubpattern->paddingLocation = paddingLocation;
210 currentSubpattern->hasPadding = true;
211 state.next(); // consume the '*'
212 currentSubpattern->paddingEndpoints.start = state.offset;
213 consumeLiteral(status);
214 currentSubpattern->paddingEndpoints.end = state.offset;
215 }
216
consumeAffix(Endpoints & endpoints,UErrorCode & status)217 void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
218 // literals := { literal }
219 endpoints.start = state.offset;
220 while (true) {
221 switch (state.peek()) {
222 case u'#':
223 case u'@':
224 case u';':
225 case u'*':
226 case u'.':
227 case u',':
228 case u'0':
229 case u'1':
230 case u'2':
231 case u'3':
232 case u'4':
233 case u'5':
234 case u'6':
235 case u'7':
236 case u'8':
237 case u'9':
238 case -1:
239 // Characters that cannot appear unquoted in a literal
240 // break outer;
241 goto after_outer;
242
243 case u'%':
244 currentSubpattern->hasPercentSign = true;
245 break;
246
247 case u'‰':
248 currentSubpattern->hasPerMilleSign = true;
249 break;
250
251 case u'¤':
252 currentSubpattern->hasCurrencySign = true;
253 break;
254
255 case u'-':
256 currentSubpattern->hasMinusSign = true;
257 break;
258
259 case u'+':
260 currentSubpattern->hasPlusSign = true;
261 break;
262
263 default:
264 break;
265 }
266 consumeLiteral(status);
267 if (U_FAILURE(status)) { return; }
268 }
269 after_outer:
270 endpoints.end = state.offset;
271 }
272
consumeLiteral(UErrorCode & status)273 void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
274 if (state.peek() == -1) {
275 state.toParseException(u"Expected unquoted literal but found EOL");
276 status = U_PATTERN_SYNTAX_ERROR;
277 return;
278 } else if (state.peek() == u'\'') {
279 state.next(); // consume the starting quote
280 while (state.peek() != u'\'') {
281 if (state.peek() == -1) {
282 state.toParseException(u"Expected quoted literal but found EOL");
283 status = U_PATTERN_SYNTAX_ERROR;
284 return;
285 } else {
286 state.next(); // consume a quoted character
287 }
288 }
289 state.next(); // consume the ending quote
290 } else {
291 // consume a non-quoted literal character
292 state.next();
293 }
294 }
295
consumeFormat(UErrorCode & status)296 void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
297 consumeIntegerFormat(status);
298 if (U_FAILURE(status)) { return; }
299 if (state.peek() == u'.') {
300 state.next(); // consume the decimal point
301 currentSubpattern->hasDecimal = true;
302 currentSubpattern->widthExceptAffixes += 1;
303 consumeFractionFormat(status);
304 if (U_FAILURE(status)) { return; }
305 } else if (state.peek() == u'¤') {
306 // Check if currency is a decimal separator
307 switch (state.peek2()) {
308 case u'#':
309 case u'0':
310 case u'1':
311 case u'2':
312 case u'3':
313 case u'4':
314 case u'5':
315 case u'6':
316 case u'7':
317 case u'8':
318 case u'9':
319 break;
320 default:
321 // Currency symbol followed by a non-numeric character;
322 // treat as a normal affix.
323 return;
324 }
325 // Currency symbol is followed by a numeric character;
326 // treat as a decimal separator.
327 currentSubpattern->hasCurrencySign = true;
328 currentSubpattern->hasCurrencyDecimal = true;
329 currentSubpattern->hasDecimal = true;
330 currentSubpattern->widthExceptAffixes += 1;
331 state.next(); // consume the symbol
332 consumeFractionFormat(status);
333 if (U_FAILURE(status)) { return; }
334 }
335 }
336
consumeIntegerFormat(UErrorCode & status)337 void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
338 // Convenience reference:
339 ParsedSubpatternInfo& result = *currentSubpattern;
340
341 while (true) {
342 switch (state.peek()) {
343 case u',':
344 result.widthExceptAffixes += 1;
345 result.groupingSizes <<= 16;
346 break;
347
348 case u'#':
349 if (result.integerNumerals > 0) {
350 state.toParseException(u"# cannot follow 0 before decimal point");
351 status = U_UNEXPECTED_TOKEN;
352 return;
353 }
354 result.widthExceptAffixes += 1;
355 result.groupingSizes += 1;
356 if (result.integerAtSigns > 0) {
357 result.integerTrailingHashSigns += 1;
358 } else {
359 result.integerLeadingHashSigns += 1;
360 }
361 result.integerTotal += 1;
362 break;
363
364 case u'@':
365 if (result.integerNumerals > 0) {
366 state.toParseException(u"Cannot mix 0 and @");
367 status = U_UNEXPECTED_TOKEN;
368 return;
369 }
370 if (result.integerTrailingHashSigns > 0) {
371 state.toParseException(u"Cannot nest # inside of a run of @");
372 status = U_UNEXPECTED_TOKEN;
373 return;
374 }
375 result.widthExceptAffixes += 1;
376 result.groupingSizes += 1;
377 result.integerAtSigns += 1;
378 result.integerTotal += 1;
379 break;
380
381 case u'0':
382 case u'1':
383 case u'2':
384 case u'3':
385 case u'4':
386 case u'5':
387 case u'6':
388 case u'7':
389 case u'8':
390 case u'9':
391 if (result.integerAtSigns > 0) {
392 state.toParseException(u"Cannot mix @ and 0");
393 status = U_UNEXPECTED_TOKEN;
394 return;
395 }
396 result.widthExceptAffixes += 1;
397 result.groupingSizes += 1;
398 result.integerNumerals += 1;
399 result.integerTotal += 1;
400 if (!result.rounding.isZeroish() || state.peek() != u'0') {
401 result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true);
402 }
403 break;
404
405 default:
406 goto after_outer;
407 }
408 state.next(); // consume the symbol
409 }
410
411 after_outer:
412 // Disallow patterns with a trailing ',' or with two ',' next to each other
413 auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff);
414 auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff);
415 auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff);
416 if (grouping1 == 0 && grouping2 != -1) {
417 state.toParseException(u"Trailing grouping separator is invalid");
418 status = U_UNEXPECTED_TOKEN;
419 return;
420 }
421 if (grouping2 == 0 && grouping3 != -1) {
422 state.toParseException(u"Grouping width of zero is invalid");
423 status = U_PATTERN_SYNTAX_ERROR;
424 return;
425 }
426 }
427
consumeFractionFormat(UErrorCode & status)428 void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
429 // Convenience reference:
430 ParsedSubpatternInfo& result = *currentSubpattern;
431
432 int32_t zeroCounter = 0;
433 while (true) {
434 switch (state.peek()) {
435 case u'#':
436 result.widthExceptAffixes += 1;
437 result.fractionHashSigns += 1;
438 result.fractionTotal += 1;
439 zeroCounter++;
440 break;
441
442 case u'0':
443 case u'1':
444 case u'2':
445 case u'3':
446 case u'4':
447 case u'5':
448 case u'6':
449 case u'7':
450 case u'8':
451 case u'9':
452 if (result.fractionHashSigns > 0) {
453 state.toParseException(u"0 cannot follow # after decimal point");
454 status = U_UNEXPECTED_TOKEN;
455 return;
456 }
457 result.widthExceptAffixes += 1;
458 result.fractionNumerals += 1;
459 result.fractionTotal += 1;
460 if (state.peek() == u'0') {
461 zeroCounter++;
462 } else {
463 result.rounding
464 .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false);
465 zeroCounter = 0;
466 }
467 break;
468
469 default:
470 return;
471 }
472 state.next(); // consume the symbol
473 }
474 }
475
consumeExponent(UErrorCode & status)476 void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
477 // Convenience reference:
478 ParsedSubpatternInfo& result = *currentSubpattern;
479
480 if (state.peek() != u'E') {
481 return;
482 }
483 if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) {
484 state.toParseException(u"Cannot have grouping separator in scientific notation");
485 status = U_MALFORMED_EXPONENTIAL_PATTERN;
486 return;
487 }
488 state.next(); // consume the E
489 result.widthExceptAffixes++;
490 if (state.peek() == u'+') {
491 state.next(); // consume the +
492 result.exponentHasPlusSign = true;
493 result.widthExceptAffixes++;
494 }
495 while (state.peek() == u'0') {
496 state.next(); // consume the 0
497 result.exponentZeros += 1;
498 result.widthExceptAffixes++;
499 }
500 }
501
502 ///////////////////////////////////////////////////
503 /// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
504 ///////////////////////////////////////////////////
505
parseToExistingPropertiesImpl(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)506 void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
507 DecimalFormatProperties& properties,
508 IgnoreRounding ignoreRounding, UErrorCode& status) {
509 if (pattern.length() == 0) {
510 // Backwards compatibility requires that we reset to the default values.
511 // TODO: Only overwrite the properties that "saveToProperties" normally touches?
512 properties.clear();
513 return;
514 }
515
516 ParsedPatternInfo patternInfo;
517 parseToPatternInfo(pattern, patternInfo, status);
518 if (U_FAILURE(status)) { return; }
519 patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
520 }
521
522 void
patternInfoToProperties(DecimalFormatProperties & properties,ParsedPatternInfo & patternInfo,IgnoreRounding _ignoreRounding,UErrorCode & status)523 PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
524 IgnoreRounding _ignoreRounding, UErrorCode& status) {
525 // Translate from PatternParseResult to Properties.
526 // Note that most data from "negative" is ignored per the specification of DecimalFormat.
527
528 const ParsedSubpatternInfo& positive = patternInfo.positive;
529
530 bool ignoreRounding;
531 if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
532 ignoreRounding = false;
533 } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) {
534 ignoreRounding = positive.hasCurrencySign;
535 } else {
536 U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS);
537 ignoreRounding = true;
538 }
539
540 // Grouping settings
541 auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff);
542 auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff);
543 auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff);
544 if (grouping2 != -1) {
545 properties.groupingSize = grouping1;
546 properties.groupingUsed = true;
547 } else {
548 properties.groupingSize = -1;
549 properties.groupingUsed = false;
550 }
551 if (grouping3 != -1) {
552 properties.secondaryGroupingSize = grouping2;
553 } else {
554 properties.secondaryGroupingSize = -1;
555 }
556
557 // For backwards compatibility, require that the pattern emit at least one min digit.
558 int minInt, minFrac;
559 if (positive.integerTotal == 0 && positive.fractionTotal > 0) {
560 // patterns like ".##"
561 minInt = 0;
562 minFrac = uprv_max(1, positive.fractionNumerals);
563 } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) {
564 // patterns like "#.##"
565 minInt = 1;
566 minFrac = 0;
567 } else {
568 minInt = positive.integerNumerals;
569 minFrac = positive.fractionNumerals;
570 }
571
572 // Rounding settings
573 // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage
574 if (positive.integerAtSigns > 0) {
575 properties.minimumFractionDigits = -1;
576 properties.maximumFractionDigits = -1;
577 properties.roundingIncrement = 0.0;
578 properties.minimumSignificantDigits = positive.integerAtSigns;
579 properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
580 } else if (!positive.rounding.isZeroish()) {
581 if (!ignoreRounding) {
582 properties.minimumFractionDigits = minFrac;
583 properties.maximumFractionDigits = positive.fractionTotal;
584 properties.roundingIncrement = positive.rounding.toDouble();
585 } else {
586 properties.minimumFractionDigits = -1;
587 properties.maximumFractionDigits = -1;
588 properties.roundingIncrement = 0.0;
589 }
590 properties.minimumSignificantDigits = -1;
591 properties.maximumSignificantDigits = -1;
592 } else {
593 if (!ignoreRounding) {
594 properties.minimumFractionDigits = minFrac;
595 properties.maximumFractionDigits = positive.fractionTotal;
596 properties.roundingIncrement = 0.0;
597 } else {
598 properties.minimumFractionDigits = -1;
599 properties.maximumFractionDigits = -1;
600 properties.roundingIncrement = 0.0;
601 }
602 properties.minimumSignificantDigits = -1;
603 properties.maximumSignificantDigits = -1;
604 }
605
606 // If the pattern ends with a '.' then force the decimal point.
607 if (positive.hasDecimal && positive.fractionTotal == 0) {
608 properties.decimalSeparatorAlwaysShown = true;
609 } else {
610 properties.decimalSeparatorAlwaysShown = false;
611 }
612
613 // Persist the currency as decimal separator
614 properties.currencyAsDecimal = positive.hasCurrencyDecimal;
615
616 // Scientific notation settings
617 if (positive.exponentZeros > 0) {
618 properties.exponentSignAlwaysShown = positive.exponentHasPlusSign;
619 properties.minimumExponentDigits = positive.exponentZeros;
620 if (positive.integerAtSigns == 0) {
621 // patterns without '@' can define max integer digits, used for engineering notation
622 properties.minimumIntegerDigits = positive.integerNumerals;
623 properties.maximumIntegerDigits = positive.integerTotal;
624 } else {
625 // patterns with '@' cannot define max integer digits
626 properties.minimumIntegerDigits = 1;
627 properties.maximumIntegerDigits = -1;
628 }
629 } else {
630 properties.exponentSignAlwaysShown = false;
631 properties.minimumExponentDigits = -1;
632 properties.minimumIntegerDigits = minInt;
633 properties.maximumIntegerDigits = -1;
634 }
635
636 // Compute the affix patterns (required for both padding and affixes)
637 UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX);
638 UnicodeString posSuffix = patternInfo.getString(0);
639
640 // Padding settings
641 if (positive.hasPadding) {
642 // The width of the positive prefix and suffix templates are included in the padding
643 int paddingWidth = positive.widthExceptAffixes +
644 AffixUtils::estimateLength(posPrefix, status) +
645 AffixUtils::estimateLength(posSuffix, status);
646 properties.formatWidth = paddingWidth;
647 UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
648 if (rawPaddingString.length() == 1) {
649 properties.padString = rawPaddingString;
650 } else if (rawPaddingString.length() == 2) {
651 if (rawPaddingString.charAt(0) == u'\'') {
652 properties.padString.setTo(u"'", -1);
653 } else {
654 properties.padString = rawPaddingString;
655 }
656 } else {
657 properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2);
658 }
659 properties.padPosition = positive.paddingLocation;
660 } else {
661 properties.formatWidth = -1;
662 properties.padString.setToBogus();
663 properties.padPosition.nullify();
664 }
665
666 // Set the affixes
667 // Always call the setter, even if the prefixes are empty, especially in the case of the
668 // negative prefix pattern, to prevent default values from overriding the pattern.
669 properties.positivePrefixPattern = posPrefix;
670 properties.positiveSuffixPattern = posSuffix;
671 if (patternInfo.fHasNegativeSubpattern) {
672 properties.negativePrefixPattern = patternInfo.getString(
673 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX);
674 properties.negativeSuffixPattern = patternInfo.getString(
675 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN);
676 } else {
677 properties.negativePrefixPattern.setToBogus();
678 properties.negativeSuffixPattern.setToBogus();
679 }
680
681 // Set the magnitude multiplier
682 if (positive.hasPercentSign) {
683 properties.magnitudeMultiplier = 2;
684 } else if (positive.hasPerMilleSign) {
685 properties.magnitudeMultiplier = 3;
686 } else {
687 properties.magnitudeMultiplier = 0;
688 }
689 }
690
691 ///////////////////////////////////////////////////////////////////
692 /// End PatternStringParser.java; begin PatternStringUtils.java ///
693 ///////////////////////////////////////////////////////////////////
694
695 // Determine whether a given roundingIncrement should be ignored for formatting
696 // based on the current maxFrac value (maximum fraction digits). For example a
697 // roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac
698 // is 2 or more. Note that roundingIncrements are rounded in significance, so
699 // a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e.
700 // it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of
701 // 0.005 is treated like 0.001 for significance). This is the reason for the
702 // initial doubling below.
703 // roundIncr must be non-zero.
ignoreRoundingIncrement(double roundIncr,int32_t maxFrac)704 bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) {
705 if (maxFrac < 0) {
706 return false;
707 }
708 int32_t frac = 0;
709 roundIncr *= 2.0;
710 for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0);
711 return (frac > maxFrac);
712 }
713
propertiesToPatternString(const DecimalFormatProperties & properties,UErrorCode & status)714 UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
715 UErrorCode& status) {
716 UnicodeString sb;
717
718 // Convenience references
719 // The uprv_min() calls prevent DoS
720 int32_t dosMax = 100;
721 int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax));
722 int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax));
723 bool useGrouping = properties.groupingUsed;
724 int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax);
725 NullableValue<PadPosition> paddingLocation = properties.padPosition;
726 UnicodeString paddingString = properties.padString;
727 int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax));
728 int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
729 int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax));
730 int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax);
731 int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax);
732 int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax);
733 bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown;
734 int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax);
735 bool exponentShowPlusSign = properties.exponentSignAlwaysShown;
736
737 AutoAffixPatternProvider affixProvider(properties, status);
738
739 // Prefixes
740 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_PREFIX));
741 int32_t afterPrefixPos = sb.length();
742
743 // Figure out the grouping sizes.
744 if (!useGrouping) {
745 grouping1 = 0;
746 grouping2 = 0;
747 } else if (grouping1 == grouping2) {
748 grouping1 = 0;
749 }
750 int32_t groupingLength = grouping1 + grouping2 + 1;
751
752 // Figure out the digits we need to put in the pattern.
753 double increment = properties.roundingIncrement;
754 UnicodeString digitsString;
755 int32_t digitsStringScale = 0;
756 if (maxSig != uprv_min(dosMax, -1)) {
757 // Significant Digits.
758 while (digitsString.length() < minSig) {
759 digitsString.append(u'@');
760 }
761 while (digitsString.length() < maxSig) {
762 digitsString.append(u'#');
763 }
764 } else if (increment != 0.0 && !ignoreRoundingIncrement(increment,maxFrac)) {
765 // Rounding Increment.
766 DecimalQuantity incrementQuantity;
767 incrementQuantity.setToDouble(increment);
768 incrementQuantity.roundToInfinity();
769 digitsStringScale = incrementQuantity.getLowerDisplayMagnitude();
770 incrementQuantity.adjustMagnitude(-digitsStringScale);
771 incrementQuantity.increaseMinIntegerTo(minInt - digitsStringScale);
772 UnicodeString str = incrementQuantity.toPlainString();
773 if (str.charAt(0) == u'-') {
774 // TODO: Unsupported operation exception or fail silently?
775 digitsString.append(str, 1, str.length() - 1);
776 } else {
777 digitsString.append(str);
778 }
779 }
780 while (digitsString.length() + digitsStringScale < minInt) {
781 digitsString.insert(0, u'0');
782 }
783 while (-digitsStringScale < minFrac) {
784 digitsString.append(u'0');
785 digitsStringScale--;
786 }
787
788 // Write the digits to the string builder
789 int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale);
790 m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1;
791 int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale;
792 for (int32_t magnitude = m0; magnitude >= mN; magnitude--) {
793 int32_t di = digitsString.length() + digitsStringScale - magnitude - 1;
794 if (di < 0 || di >= digitsString.length()) {
795 sb.append(u'#');
796 } else {
797 sb.append(digitsString.charAt(di));
798 }
799 // Decimal separator
800 if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) {
801 if (properties.currencyAsDecimal) {
802 sb.append(u'¤');
803 } else {
804 sb.append(u'.');
805 }
806 }
807 if (!useGrouping) {
808 continue;
809 }
810 // Least-significant grouping separator
811 if (magnitude > 0 && magnitude == grouping1) {
812 sb.append(u',');
813 }
814 // All other grouping separators
815 if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) {
816 sb.append(u',');
817 }
818 }
819
820 // Exponential notation
821 if (exponentDigits != uprv_min(dosMax, -1)) {
822 sb.append(u'E');
823 if (exponentShowPlusSign) {
824 sb.append(u'+');
825 }
826 for (int32_t i = 0; i < exponentDigits; i++) {
827 sb.append(u'0');
828 }
829 }
830
831 // Suffixes
832 int32_t beforeSuffixPos = sb.length();
833 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_SUFFIX));
834
835 // Resolve Padding
836 if (paddingWidth > 0 && !paddingLocation.isNull()) {
837 while (paddingWidth - sb.length() > 0) {
838 sb.insert(afterPrefixPos, u'#');
839 beforeSuffixPos++;
840 }
841 int32_t addedLength;
842 switch (paddingLocation.get(status)) {
843 case PadPosition::UNUM_PAD_BEFORE_PREFIX:
844 addedLength = escapePaddingString(paddingString, sb, 0, status);
845 sb.insert(0, u'*');
846 afterPrefixPos += addedLength + 1;
847 beforeSuffixPos += addedLength + 1;
848 break;
849 case PadPosition::UNUM_PAD_AFTER_PREFIX:
850 addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status);
851 sb.insert(afterPrefixPos, u'*');
852 afterPrefixPos += addedLength + 1;
853 beforeSuffixPos += addedLength + 1;
854 break;
855 case PadPosition::UNUM_PAD_BEFORE_SUFFIX:
856 escapePaddingString(paddingString, sb, beforeSuffixPos, status);
857 sb.insert(beforeSuffixPos, u'*');
858 break;
859 case PadPosition::UNUM_PAD_AFTER_SUFFIX:
860 sb.append(u'*');
861 escapePaddingString(paddingString, sb, sb.length(), status);
862 break;
863 }
864 if (U_FAILURE(status)) { return sb; }
865 }
866
867 // Negative affixes
868 // Ignore if the negative prefix pattern is "-" and the negative suffix is empty
869 if (affixProvider.get().hasNegativeSubpattern()) {
870 sb.append(u';');
871 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_PREFIX));
872 // Copy the positive digit format into the negative.
873 // This is optional; the pattern is the same as if '#' were appended here instead.
874 // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy.
875 // See https://unicode-org.atlassian.net/browse/ICU-13707
876 UnicodeString copy(sb);
877 sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos);
878 sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_SUFFIX));
879 }
880
881 return sb;
882 }
883
escapePaddingString(UnicodeString input,UnicodeString & output,int startIndex,UErrorCode & status)884 int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
885 UErrorCode& status) {
886 (void) status;
887 if (input.length() == 0) {
888 input.setTo(kFallbackPaddingString, -1);
889 }
890 int startLength = output.length();
891 if (input.length() == 1) {
892 if (input.compare(u"'", -1) == 0) {
893 output.insert(startIndex, u"''", -1);
894 } else {
895 output.insert(startIndex, input);
896 }
897 } else {
898 output.insert(startIndex, u'\'');
899 int offset = 1;
900 for (int i = 0; i < input.length(); i++) {
901 // it's okay to deal in chars here because the quote mark is the only interesting thing.
902 char16_t ch = input.charAt(i);
903 if (ch == u'\'') {
904 output.insert(startIndex + offset, u"''", -1);
905 offset += 2;
906 } else {
907 output.insert(startIndex + offset, ch);
908 offset += 1;
909 }
910 }
911 output.insert(startIndex + offset, u'\'');
912 }
913 return output.length() - startLength;
914 }
915
916 UnicodeString
convertLocalized(const UnicodeString & input,const DecimalFormatSymbols & symbols,bool toLocalized,UErrorCode & status)917 PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
918 bool toLocalized, UErrorCode& status) {
919 // Construct a table of strings to be converted between localized and standard.
920 static constexpr int32_t LEN = 21;
921 UnicodeString table[LEN][2];
922 int standIdx = toLocalized ? 0 : 1;
923 int localIdx = toLocalized ? 1 : 0;
924 // TODO: Add approximately sign here?
925 table[0][standIdx] = u"%";
926 table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
927 table[1][standIdx] = u"‰";
928 table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
929 table[2][standIdx] = u".";
930 table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
931 table[3][standIdx] = u",";
932 table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
933 table[4][standIdx] = u"-";
934 table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
935 table[5][standIdx] = u"+";
936 table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
937 table[6][standIdx] = u";";
938 table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol);
939 table[7][standIdx] = u"@";
940 table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol);
941 table[8][standIdx] = u"E";
942 table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol);
943 table[9][standIdx] = u"*";
944 table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol);
945 table[10][standIdx] = u"#";
946 table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol);
947 for (int i = 0; i < 10; i++) {
948 table[11 + i][standIdx] = u'0' + i;
949 table[11 + i][localIdx] = symbols.getConstDigitSymbol(i);
950 }
951
952 // Special case: quotes are NOT allowed to be in any localIdx strings.
953 // Substitute them with '’' instead.
954 for (int32_t i = 0; i < LEN; i++) {
955 table[i][localIdx].findAndReplace(u'\'', u'’');
956 }
957
958 // Iterate through the string and convert.
959 // State table:
960 // 0 => base state
961 // 1 => first char inside a quoted sequence in input and output string
962 // 2 => inside a quoted sequence in input and output string
963 // 3 => first char after a close quote in input string;
964 // close quote still needs to be written to output string
965 // 4 => base state in input string; inside quoted sequence in output string
966 // 5 => first char inside a quoted sequence in input string;
967 // inside quoted sequence in output string
968 UnicodeString result;
969 int state = 0;
970 for (int offset = 0; offset < input.length(); offset++) {
971 char16_t ch = input.charAt(offset);
972
973 // Handle a quote character (state shift)
974 if (ch == u'\'') {
975 if (state == 0) {
976 result.append(u'\'');
977 state = 1;
978 continue;
979 } else if (state == 1) {
980 result.append(u'\'');
981 state = 0;
982 continue;
983 } else if (state == 2) {
984 state = 3;
985 continue;
986 } else if (state == 3) {
987 result.append(u'\'');
988 result.append(u'\'');
989 state = 1;
990 continue;
991 } else if (state == 4) {
992 state = 5;
993 continue;
994 } else {
995 U_ASSERT(state == 5);
996 result.append(u'\'');
997 result.append(u'\'');
998 state = 4;
999 continue;
1000 }
1001 }
1002
1003 if (state == 0 || state == 3 || state == 4) {
1004 for (auto& pair : table) {
1005 // Perform a greedy match on this symbol string
1006 UnicodeString temp = input.tempSubString(offset, pair[0].length());
1007 if (temp == pair[0]) {
1008 // Skip ahead past this region for the next iteration
1009 offset += pair[0].length() - 1;
1010 if (state == 3 || state == 4) {
1011 result.append(u'\'');
1012 state = 0;
1013 }
1014 result.append(pair[1]);
1015 goto continue_outer;
1016 }
1017 }
1018 // No replacement found. Check if a special quote is necessary
1019 for (auto& pair : table) {
1020 UnicodeString temp = input.tempSubString(offset, pair[1].length());
1021 if (temp == pair[1]) {
1022 if (state == 0) {
1023 result.append(u'\'');
1024 state = 4;
1025 }
1026 result.append(ch);
1027 goto continue_outer;
1028 }
1029 }
1030 // Still nothing. Copy the char verbatim. (Add a close quote if necessary)
1031 if (state == 3 || state == 4) {
1032 result.append(u'\'');
1033 state = 0;
1034 }
1035 result.append(ch);
1036 } else {
1037 U_ASSERT(state == 1 || state == 2 || state == 5);
1038 result.append(ch);
1039 state = 2;
1040 }
1041 continue_outer:;
1042 }
1043 // Resolve final quotes
1044 if (state == 3 || state == 4) {
1045 result.append(u'\'');
1046 state = 0;
1047 }
1048 if (state != 0) {
1049 // Malformed localized pattern: unterminated quote
1050 status = U_PATTERN_SYNTAX_ERROR;
1051 }
1052 return result;
1053 }
1054
patternInfoToStringBuilder(const AffixPatternProvider & patternInfo,bool isPrefix,PatternSignType patternSignType,bool approximately,StandardPlural::Form plural,bool perMilleReplacesPercent,bool dropCurrencySymbols,UnicodeString & output)1055 void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
1056 PatternSignType patternSignType,
1057 bool approximately,
1058 StandardPlural::Form plural,
1059 bool perMilleReplacesPercent,
1060 bool dropCurrencySymbols,
1061 UnicodeString& output) {
1062
1063 // Should the output render '+' where '-' would normally appear in the pattern?
1064 bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN)
1065 && !patternInfo.positiveHasPlusSign();
1066
1067 // Should we use the affix from the negative subpattern?
1068 // (If not, we will use the positive subpattern.)
1069 bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern()
1070 && (patternSignType == PATTERN_SIGN_TYPE_NEG
1071 || (patternInfo.negativeHasMinusSign() && (plusReplacesMinusSign || approximately)));
1072
1073 // Resolve the flags for the affix pattern.
1074 int flags = 0;
1075 if (useNegativeAffixPattern) {
1076 flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
1077 }
1078 if (isPrefix) {
1079 flags |= AffixPatternProvider::AFFIX_PREFIX;
1080 }
1081 if (plural != StandardPlural::Form::COUNT) {
1082 U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
1083 flags |= plural;
1084 }
1085
1086 // Should we prepend a sign to the pattern?
1087 bool prependSign;
1088 if (!isPrefix || useNegativeAffixPattern) {
1089 prependSign = false;
1090 } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) {
1091 prependSign = true;
1092 } else {
1093 prependSign = plusReplacesMinusSign || approximately;
1094 }
1095
1096 // What symbols should take the place of the sign placeholder?
1097 const char16_t* signSymbols = u"-";
1098 if (approximately) {
1099 if (plusReplacesMinusSign) {
1100 signSymbols = u"~+";
1101 } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) {
1102 signSymbols = u"~-";
1103 } else {
1104 signSymbols = u"~";
1105 }
1106 } else if (plusReplacesMinusSign) {
1107 signSymbols = u"+";
1108 }
1109
1110 // Compute the number of tokens in the affix pattern (signSymbols is considered one token).
1111 int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
1112
1113 // Finally, set the result into the StringBuilder.
1114 output.remove();
1115 for (int index = 0; index < length; index++) {
1116 char16_t candidate;
1117 if (prependSign && index == 0) {
1118 candidate = u'-';
1119 } else if (prependSign) {
1120 candidate = patternInfo.charAt(flags, index - 1);
1121 } else {
1122 candidate = patternInfo.charAt(flags, index);
1123 }
1124 if (candidate == u'-') {
1125 if (u_strlen(signSymbols) == 1) {
1126 candidate = signSymbols[0];
1127 } else {
1128 output.append(signSymbols[0]);
1129 candidate = signSymbols[1];
1130 }
1131 }
1132 if (perMilleReplacesPercent && candidate == u'%') {
1133 candidate = u'‰';
1134 }
1135 if (dropCurrencySymbols && candidate == u'\u00A4') {
1136 continue;
1137 }
1138 output.append(candidate);
1139 }
1140 }
1141
resolveSignDisplay(UNumberSignDisplay signDisplay,Signum signum)1142 PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) {
1143 switch (signDisplay) {
1144 case UNUM_SIGN_AUTO:
1145 case UNUM_SIGN_ACCOUNTING:
1146 switch (signum) {
1147 case SIGNUM_NEG:
1148 case SIGNUM_NEG_ZERO:
1149 return PATTERN_SIGN_TYPE_NEG;
1150 case SIGNUM_POS_ZERO:
1151 case SIGNUM_POS:
1152 return PATTERN_SIGN_TYPE_POS;
1153 default:
1154 break;
1155 }
1156 break;
1157
1158 case UNUM_SIGN_ALWAYS:
1159 case UNUM_SIGN_ACCOUNTING_ALWAYS:
1160 switch (signum) {
1161 case SIGNUM_NEG:
1162 case SIGNUM_NEG_ZERO:
1163 return PATTERN_SIGN_TYPE_NEG;
1164 case SIGNUM_POS_ZERO:
1165 case SIGNUM_POS:
1166 return PATTERN_SIGN_TYPE_POS_SIGN;
1167 default:
1168 break;
1169 }
1170 break;
1171
1172 case UNUM_SIGN_EXCEPT_ZERO:
1173 case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO:
1174 switch (signum) {
1175 case SIGNUM_NEG:
1176 return PATTERN_SIGN_TYPE_NEG;
1177 case SIGNUM_NEG_ZERO:
1178 case SIGNUM_POS_ZERO:
1179 return PATTERN_SIGN_TYPE_POS;
1180 case SIGNUM_POS:
1181 return PATTERN_SIGN_TYPE_POS_SIGN;
1182 default:
1183 break;
1184 }
1185 break;
1186
1187 case UNUM_SIGN_NEGATIVE:
1188 case UNUM_SIGN_ACCOUNTING_NEGATIVE:
1189 switch (signum) {
1190 case SIGNUM_NEG:
1191 return PATTERN_SIGN_TYPE_NEG;
1192 case SIGNUM_NEG_ZERO:
1193 case SIGNUM_POS_ZERO:
1194 case SIGNUM_POS:
1195 return PATTERN_SIGN_TYPE_POS;
1196 default:
1197 break;
1198 }
1199 break;
1200
1201 case UNUM_SIGN_NEVER:
1202 return PATTERN_SIGN_TYPE_POS;
1203
1204 default:
1205 break;
1206 }
1207
1208 UPRV_UNREACHABLE_EXIT;
1209 return PATTERN_SIGN_TYPE_POS;
1210 }
1211
1212 #endif /* #if !UCONFIG_NO_FORMATTING */
1213