|
1 |
| -/* eslint-disable @typescript-eslint/no-unnecessary-condition */ |
2 |
| -import { UINT32_MAX } from "./int"; |
3 |
| - |
4 |
| -const TEXT_ENCODING_AVAILABLE = |
5 |
| -(typeof process === "undefined" || process?.env?.["TEXT_ENCODING"] !== "never") && |
6 |
| -typeof TextEncoder !== "undefined" && |
7 |
| -typeof TextDecoder !== "undefined"; |
8 | 1 |
|
9 | 2 | export function utf8Count(str: string): number {
|
10 | 3 | const strLength = str.length;
|
@@ -89,22 +82,30 @@ export function utf8EncodeJs(str: string, output: Uint8Array, outputOffset: numb
|
89 | 82 | }
|
90 | 83 | }
|
91 | 84 |
|
92 |
| -const sharedTextEncoder = TEXT_ENCODING_AVAILABLE ? new TextEncoder() : undefined; |
93 |
| -export const TEXT_ENCODER_THRESHOLD = !TEXT_ENCODING_AVAILABLE |
94 |
| -? UINT32_MAX |
95 |
| -: typeof process !== "undefined" && process?.env?.["TEXT_ENCODING"] !== "force" |
96 |
| -? 200 |
97 |
| -: 0; |
| 85 | +// TextEncoder and TextDecoder are standardized in whatwg encoding: |
| 86 | +// https://encoding.spec.whatwg.org/ |
| 87 | +// and available in all the modern browsers: |
| 88 | +// https://caniuse.com/textencoder |
| 89 | +// They are available in Node.js since v12 LTS as well: |
| 90 | +// https://nodejs.org/api/globals.html#textencoder |
98 | 91 |
|
99 |
| -function utf8EncodeTEencode(str: string, output: Uint8Array, outputOffset: number): void { |
100 |
| -output.set(sharedTextEncoder!.encode(str), outputOffset); |
101 |
| -} |
| 92 | +const sharedTextEncoder = new TextEncoder(); |
| 93 | + |
| 94 | +// This threshold should be determined by benchmarking, which might vary in engines and input data. |
| 95 | +// Run `npx ts-node benchmark/encode-string.ts` for details. |
| 96 | +const TEXT_ENCODER_THRESHOLD = 50; |
102 | 97 |
|
103 |
| -function utf8EncodeTEencodeInto(str: string, output: Uint8Array, outputOffset: number): void { |
104 |
| -sharedTextEncoder!.encodeInto(str, output.subarray(outputOffset)); |
| 98 | +export function utf8EncodeTE(str: string, output: Uint8Array, outputOffset: number): void { |
| 99 | +sharedTextEncoder.encodeInto(str, output.subarray(outputOffset)); |
105 | 100 | }
|
106 | 101 |
|
107 |
| -export const utf8EncodeTE = sharedTextEncoder?.encodeInto ? utf8EncodeTEencodeInto : utf8EncodeTEencode; |
| 102 | +export function utf8Encode(str: string, output: Uint8Array, outputOffset: number): void { |
| 103 | +if (str.length > TEXT_ENCODER_THRESHOLD) { |
| 104 | +utf8EncodeTE(str, output, outputOffset); |
| 105 | +} else { |
| 106 | +utf8EncodeJs(str, output, outputOffset); |
| 107 | +} |
| 108 | +} |
108 | 109 |
|
109 | 110 | const CHUNK_SIZE = 0x1_000;
|
110 | 111 |
|
@@ -157,14 +158,21 @@ export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength:
|
157 | 158 | return result;
|
158 | 159 | }
|
159 | 160 |
|
160 |
| -const sharedTextDecoder = TEXT_ENCODING_AVAILABLE ? new TextDecoder() : null; |
161 |
| -export const TEXT_DECODER_THRESHOLD = !TEXT_ENCODING_AVAILABLE |
162 |
| -? UINT32_MAX |
163 |
| -: typeof process !== "undefined" && process?.env?.["TEXT_DECODER"] !== "force" |
164 |
| -? 200 |
165 |
| -: 0; |
| 161 | +const sharedTextDecoder = new TextDecoder(); |
| 162 | + |
| 163 | +// This threshold should be determined by benchmarking, which might vary in engines and input data. |
| 164 | +// Run `npx ts-node benchmark/decode-string.ts` for details. |
| 165 | +const TEXT_DECODER_THRESHOLD = 200; |
166 | 166 |
|
167 | 167 | export function utf8DecodeTD(bytes: Uint8Array, inputOffset: number, byteLength: number): string {
|
168 | 168 | const stringBytes = bytes.subarray(inputOffset, inputOffset + byteLength);
|
169 |
| -return sharedTextDecoder!.decode(stringBytes); |
| 169 | +return sharedTextDecoder.decode(stringBytes); |
| 170 | +} |
| 171 | + |
| 172 | +export function utf8Decode(bytes: Uint8Array, inputOffset: number, byteLength: number): string { |
| 173 | +if (byteLength > TEXT_DECODER_THRESHOLD) { |
| 174 | +return utf8DecodeTD(bytes, inputOffset, byteLength); |
| 175 | +} else { |
| 176 | +return utf8DecodeJs(bytes, inputOffset, byteLength); |
| 177 | +} |
170 | 178 | }
|
0 commit comments