From 858e7513fc269c21e26b14dc963054c78ddcce1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Goetz?= Date: Sat, 19 Oct 2024 16:43:36 +0200 Subject: [PATCH] Optimize parseText (#327) --- packages/benchmark-messageformat/README.md | 94 +++++++++++----------- packages/messageformat/src/parser.ts | 92 ++++++++++++--------- 2 files changed, 101 insertions(+), 85 deletions(-) diff --git a/packages/benchmark-messageformat/README.md b/packages/benchmark-messageformat/README.md index f76c67c..20f6661 100644 --- a/packages/benchmark-messageformat/README.md +++ b/packages/benchmark-messageformat/README.md @@ -16,11 +16,11 @@ Sources can be found in `src`, measure taken on 07/12/2023 with latest available | Npm Package | Version | Size | Comment | | ----------------------------------------------------- | ---------- | ---- | ----------- | | @ffz/icu-msgparser (+ custom renderer) | 2.0.0 | 9.4K | | -| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 1.0.0-rc.2 | 8.1K | | +| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 1.0.0-rc.2 | 8K | | | @onigoetz/messageformat (+ @onigoetz/make-plural) | 1.0.0-rc.2 | 11K | | | format-message-parse | 6.2.4 | 22K | Uses peg.js | | @onigoetz/messageformat (+ make-plural) | 1.0.0-rc.2 | 23K | | -| @onigoetz/messageformat (+ @phensley/plurals) | 1.0.0-rc.2 | 41K | | +| @onigoetz/messageformat (+ @phensley/plurals) | 1.0.0-rc.2 | 40K | | | intl-messageformat | 10.5.14 | 55K | Uses peg.js | | @phensley/messageformat | 1.9.0 | 54K | | | @messageformat/core | 3.4.0 | 74K | Uses peg.js | @@ -50,7 +50,7 @@ The benchmark is applied to 4 different strings, which for the simple cases shou > > - Node.js v20.9.0 > - Apple M2 CPU -> - October 18, 2024 +> - October 19, 2024 ## Simple String @@ -60,18 +60,18 @@ const input = [`Hello, world!`, {}]; // Renders: `Hello, world!` ``` -| Name | ops/sec | MoE | Runs sampled | -| ----------------------------------------------------- | --------: | ------- | ------------ | -| **format-message-parse** | 8,895,545 | ± 0.23% | 94 | -| @onigoetz/messageformat (+ @phensley/plurals) | 8,596,907 | ± 0.16% | 99 | -| @onigoetz/messageformat (+ @onigoetz/make-plural) | 8,583,838 | ± 0.14% | 101 | -| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 8,396,261 | ± 2.03% | 94 | -| @onigoetz/messageformat (+ make-plural) | 8,222,727 | ± 1.48% | 97 | -| @phensley/messageformat | 8,079,695 | ± 0.22% | 99 | -| @ffz/icu-msgparser (+ custom renderer) | 5,662,192 | ± 0.16% | 97 | -| @messageformat/core | 1,715,496 | ± 0.13% | 97 | -| intl-messageformat | 240,408 | ± 0.63% | 93 | -| globalize | 37,391 | ± 0.31% | 96 | +| Name | ops/sec | MoE | Runs sampled | +| ----------------------------------------------------- | ---------: | ------- | ------------ | +| **@onigoetz/messageformat (+ @onigoetz/make-plural)** | 18,624,709 | ± 0.64% | 93 | +| @onigoetz/messageformat (+ @phensley/plurals) | 18,440,109 | ± 1.91% | 97 | +| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 18,181,841 | ± 1.13% | 92 | +| @onigoetz/messageformat (+ make-plural) | 17,425,302 | ± 3.11% | 91 | +| format-message-parse | 8,697,924 | ± 0.26% | 100 | +| @phensley/messageformat | 7,948,798 | ± 0.35% | 100 | +| @ffz/icu-msgparser (+ custom renderer) | 5,693,123 | ± 0.28% | 97 | +| @messageformat/core | 1,635,413 | ± 2.11% | 96 | +| intl-messageformat | 189,554 | ± 9.22% | 79 | +| globalize | 34,190 | ± 5.54% | 93 | ## With one variable @@ -88,16 +88,16 @@ const input = [ | Name | ops/sec | MoE | Runs sampled | | --------------------------------------------------------- | --------: | ------- | ------------ | -| **@onigoetz/messageformat (+ @onigoetz/intl-formatters)** | 6,414,862 | ± 0.11% | 98 | -| @onigoetz/messageformat (+ make-plural) | 6,380,084 | ± 0.31% | 98 | -| @onigoetz/messageformat (+ @phensley/plurals) | 6,233,602 | ± 0.22% | 98 | -| @onigoetz/messageformat (+ @onigoetz/make-plural) | 6,228,841 | ± 0.20% | 100 | -| format-message-parse | 3,978,459 | ± 0.19% | 99 | -| @phensley/messageformat | 3,399,981 | ± 0.13% | 99 | -| @ffz/icu-msgparser (+ custom renderer) | 3,361,547 | ± 0.27% | 100 | -| @messageformat/core | 880,358 | ± 0.26% | 100 | -| intl-messageformat | 216,110 | ± 0.54% | 95 | -| globalize | 36,506 | ± 0.32% | 97 | +| **@onigoetz/messageformat (+ @onigoetz/intl-formatters)** | 7,561,041 | ± 0.19% | 93 | +| @onigoetz/messageformat (+ make-plural) | 7,431,574 | ± 0.14% | 100 | +| @onigoetz/messageformat (+ @onigoetz/make-plural) | 7,349,978 | ± 0.37% | 98 | +| @onigoetz/messageformat (+ @phensley/plurals) | 6,904,747 | ± 0.51% | 97 | +| format-message-parse | 4,019,700 | ± 0.21% | 100 | +| @ffz/icu-msgparser (+ custom renderer) | 3,378,807 | ± 0.27% | 97 | +| @phensley/messageformat | 3,359,162 | ± 0.16% | 99 | +| @messageformat/core | 853,873 | ± 1.96% | 89 | +| intl-messageformat | 217,803 | ± 0.61% | 96 | +| globalize | 36,601 | ± 0.17% | 98 | ## With plurals @@ -116,16 +116,16 @@ const input = [ | Name | ops/sec | MoE | Runs sampled | | ----------------------------------------------------- | ------: | ------- | ------------ | -| **@onigoetz/messageformat (+ @phensley/plurals)** | 940,533 | ± 0.19% | 98 | -| @onigoetz/messageformat (+ make-plural) | 919,852 | ± 0.14% | 98 | -| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 772,695 | ± 0.25% | 98 | -| @phensley/messageformat | 551,128 | ± 0.13% | 96 | -| @messageformat/core | 185,781 | ± 0.11% | 98 | -| @onigoetz/messageformat (+ @onigoetz/make-plural) | 152,541 | ± 0.10% | 100 | -| @ffz/icu-msgparser (+ custom renderer) | 130,129 | ± 0.10% | 97 | -| format-message-parse | 81,703 | ± 0.28% | 97 | -| intl-messageformat | 48,687 | ± 2.22% | 92 | -| globalize | 27,151 | ± 0.17% | 97 | +| **@onigoetz/messageformat (+ @phensley/plurals)** | 982,506 | ± 0.19% | 99 | +| @onigoetz/messageformat (+ make-plural) | 960,514 | ± 0.14% | 95 | +| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 806,120 | ± 0.08% | 101 | +| @phensley/messageformat | 547,569 | ± 0.35% | 100 | +| @messageformat/core | 183,629 | ± 0.18% | 94 | +| @onigoetz/messageformat (+ @onigoetz/make-plural) | 154,218 | ± 0.07% | 100 | +| @ffz/icu-msgparser (+ custom renderer) | 129,434 | ± 0.08% | 97 | +| format-message-parse | 82,619 | ± 0.16% | 99 | +| intl-messageformat | 48,941 | ± 2.50% | 91 | +| globalize | 27,024 | ± 0.30% | 95 | ## With select and plurals @@ -171,15 +171,15 @@ const input = [` ` ``` -| Name | ops/sec | MoE | Runs sampled | -| ----------------------------------------------------- | ------: | -------- | ------------ | -| **@onigoetz/messageformat (+ @phensley/plurals)** | 140,415 | ± 0.07% | 99 | -| @onigoetz/messageformat (+ make-plural) | 136,452 | ± 0.11% | 98 | -| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 135,955 | ± 0.11% | 99 | -| @onigoetz/messageformat (+ @onigoetz/make-plural) | 79,580 | ± 0.07% | 98 | -| @phensley/messageformat | 53,396 | ± 0.16% | 101 | -| @ffz/icu-msgparser (+ custom renderer) | 29,484 | ± 0.72% | 95 | -| @messageformat/core | 29,510 | ± 0.93% | 95 | -| intl-messageformat | 15,706 | ± 1.99% | 89 | -| format-message-parse | 15,356 | ± 12.31% | 85 | -| globalize | 8,250 | ± 1.98% | 91 | +| Name | ops/sec | MoE | Runs sampled | +| ----------------------------------------------------- | ------: | ------- | ------------ | +| **@onigoetz/messageformat (+ @phensley/plurals)** | 180,818 | ± 0.33% | 100 | +| @onigoetz/messageformat (+ make-plural) | 178,976 | ± 0.22% | 99 | +| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 172,978 | ± 0.25% | 97 | +| @onigoetz/messageformat (+ @onigoetz/make-plural) | 89,534 | ± 0.15% | 99 | +| @phensley/messageformat | 53,231 | ± 0.07% | 101 | +| @messageformat/core | 30,664 | ± 0.20% | 96 | +| @ffz/icu-msgparser (+ custom renderer) | 29,430 | ± 1.11% | 95 | +| intl-messageformat | 17,116 | ± 1.02% | 95 | +| format-message-parse | 17,090 | ± 2.95% | 95 | +| globalize | 8,766 | ± 0.08% | 100 | diff --git a/packages/messageformat/src/parser.ts b/packages/messageformat/src/parser.ts index 0bac3c4..8eb7da1 100644 --- a/packages/messageformat/src/parser.ts +++ b/packages/messageformat/src/parser.ts @@ -64,10 +64,6 @@ function expected(char: string, context: Context): SyntaxError { ); } -function peek(context: Context): number { - return context.msg.charCodeAt(context.i + 1); -} - function get(context: Context): number { return context.msg.charCodeAt(context.i); } @@ -160,17 +156,23 @@ function add(context: Context, token: Token): number { } /** - * Parse text, stop or not at separators, stop or not at spaces, stop or not at # - * Could use some cleanup :/ + * Parse text + * + * Stops when it finds an open `{`, close `}` or sub-variable character `#` character. except if preceded by escape characters + * + * Returns string without escape characters * * @param context * @param specialHash */ function parseText(context: Context, specialHash = false): string { - let out = ""; + let start = context.i; + + // Stores all the escape characters to remove once we reached the end of the text + const toRemove = []; while (context.i < context.l) { - const char = get(context); + let char = get(context); if ( char === CHAR_OPEN || char === CHAR_CLOSE || @@ -179,46 +181,60 @@ function parseText(context: Context, specialHash = false): string { break; } + if (char !== CHAR_ESCAPE) { + context.i++; + continue; + } + + // Since it's an escape, jump to the next character + ++context.i; + char = get(context); + if (char === CHAR_ESCAPE) { + // Escaped Escape Character + // Remove one of the two escape characters + toRemove.unshift(context.i - start); ++context.i; - let next = get(context); - if (next === CHAR_ESCAPE) { - // Escaped Escape Character - out += String.fromCharCode(next); - ++context.i; - } else if ( - next === CHAR_OPEN || - next === CHAR_CLOSE || - (specialHash && next === CHAR_SUB_VAR) - ) { - // Special Character - out += String.fromCharCode(next); - while (++context.i < context.l) { - next = get(context); - if (next === CHAR_ESCAPE) { - // Check for an escaped escape character, and don't - // stop if we encounter one. - next = peek(context); - if (next === CHAR_ESCAPE) { - out += String.fromCharCode(next); - ++context.i; - } else { - ++context.i; - break; - } - } else { - out += String.fromCharCode(next); + + console.log(); + } else if ( + char === CHAR_OPEN || + char === CHAR_CLOSE || + (specialHash && char === CHAR_SUB_VAR) + ) { + toRemove.unshift(context.i - start - 1); + + // Special Character + // Escaping a special character will move forward the string until it finds the next + // escape character (unless it's an escaped escape character) + while (++context.i < context.l) { + char = get(context); + + if (char === CHAR_ESCAPE) { + // Always ignore the escape character itself + toRemove.unshift(context.i - start); + + // If we find a second escape character, we continue, otherwise we stop + ++context.i; + char = get(context); + if (char !== CHAR_ESCAPE) { + break; } } - } else { - out += String.fromCharCode(char); } } else { + // This is not escaping a special character, we keep it ++context.i; - out += String.fromCharCode(char); } } + let out = context.msg.substring(start, context.i); + + // Remove all escapes from the final string + for (const idx of toRemove) { + out = out.substring(0, idx) + out.substring(idx + 1); + } + return out; }