xref: /aosp_15_r20/external/pigweed/pw_tokenizer/ts/detokenizer.ts (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1*61c4878aSAndroid Build Coastguard Worker// Copyright 2022 The Pigweed Authors
2*61c4878aSAndroid Build Coastguard Worker//
3*61c4878aSAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4*61c4878aSAndroid Build Coastguard Worker// use this file except in compliance with the License. You may obtain a copy of
5*61c4878aSAndroid Build Coastguard Worker// the License at
6*61c4878aSAndroid Build Coastguard Worker//
7*61c4878aSAndroid Build Coastguard Worker//     https://www.apache.org/licenses/LICENSE-2.0
8*61c4878aSAndroid Build Coastguard Worker//
9*61c4878aSAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
10*61c4878aSAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11*61c4878aSAndroid Build Coastguard Worker// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12*61c4878aSAndroid Build Coastguard Worker// License for the specific language governing permissions and limitations under
13*61c4878aSAndroid Build Coastguard Worker// the License.
14*61c4878aSAndroid Build Coastguard Worker
15*61c4878aSAndroid Build Coastguard Worker/**  Decodes and detokenizes strings from binary or Base64 input. */
16*61c4878aSAndroid Build Coastguard Workerimport { Buffer } from 'buffer';
17*61c4878aSAndroid Build Coastguard Workerimport { Frame } from 'pigweedjs/pw_hdlc';
18*61c4878aSAndroid Build Coastguard Workerimport { TokenDatabase } from './token_database';
19*61c4878aSAndroid Build Coastguard Workerimport { PrintfDecoder } from './printf_decoder';
20*61c4878aSAndroid Build Coastguard Worker
21*61c4878aSAndroid Build Coastguard Workerconst MAX_RECURSIONS = 9;
22*61c4878aSAndroid Build Coastguard Workerconst BASE64CHARS = '[A-Za-z0-9+/-_]';
23*61c4878aSAndroid Build Coastguard Workerconst PATTERN = new RegExp(
24*61c4878aSAndroid Build Coastguard Worker  // Base64 tokenized strings start with the prefix character ($)
25*61c4878aSAndroid Build Coastguard Worker  '\\$' +
26*61c4878aSAndroid Build Coastguard Worker    // Tokenized strings contain 0 or more blocks of four Base64 chars.
27*61c4878aSAndroid Build Coastguard Worker    `(?:${BASE64CHARS}{4})*` +
28*61c4878aSAndroid Build Coastguard Worker    // The last block of 4 chars may have one or two padding chars (=).
29*61c4878aSAndroid Build Coastguard Worker    `(?:${BASE64CHARS}{3}=|${BASE64CHARS}{2}==)?`,
30*61c4878aSAndroid Build Coastguard Worker  'g',
31*61c4878aSAndroid Build Coastguard Worker);
32*61c4878aSAndroid Build Coastguard Worker
33*61c4878aSAndroid Build Coastguard Workerinterface TokenAndArgs {
34*61c4878aSAndroid Build Coastguard Worker  token: number;
35*61c4878aSAndroid Build Coastguard Worker  args: Uint8Array;
36*61c4878aSAndroid Build Coastguard Worker}
37*61c4878aSAndroid Build Coastguard Worker
38*61c4878aSAndroid Build Coastguard Workerexport class Detokenizer {
39*61c4878aSAndroid Build Coastguard Worker  private database: TokenDatabase;
40*61c4878aSAndroid Build Coastguard Worker
41*61c4878aSAndroid Build Coastguard Worker  constructor(csvDatabase: string) {
42*61c4878aSAndroid Build Coastguard Worker    this.database = new TokenDatabase(csvDatabase);
43*61c4878aSAndroid Build Coastguard Worker  }
44*61c4878aSAndroid Build Coastguard Worker
45*61c4878aSAndroid Build Coastguard Worker  /**
46*61c4878aSAndroid Build Coastguard Worker   * Detokenize frame data into actual string messages using the provided
47*61c4878aSAndroid Build Coastguard Worker   * token database.
48*61c4878aSAndroid Build Coastguard Worker   *
49*61c4878aSAndroid Build Coastguard Worker   * If the frame doesn't match any token from database, the frame will be
50*61c4878aSAndroid Build Coastguard Worker   * returned as string as-is.
51*61c4878aSAndroid Build Coastguard Worker   */
52*61c4878aSAndroid Build Coastguard Worker  detokenize(tokenizedFrame: Frame): string {
53*61c4878aSAndroid Build Coastguard Worker    return this.detokenizeUint8Array(tokenizedFrame.data);
54*61c4878aSAndroid Build Coastguard Worker  }
55*61c4878aSAndroid Build Coastguard Worker
56*61c4878aSAndroid Build Coastguard Worker  /**
57*61c4878aSAndroid Build Coastguard Worker   * Detokenize uint8 into actual string messages using the provided
58*61c4878aSAndroid Build Coastguard Worker   * token database.
59*61c4878aSAndroid Build Coastguard Worker   *
60*61c4878aSAndroid Build Coastguard Worker   * If the data doesn't match any token from database, the data will be
61*61c4878aSAndroid Build Coastguard Worker   * returned as string as-is.
62*61c4878aSAndroid Build Coastguard Worker   */
63*61c4878aSAndroid Build Coastguard Worker  detokenizeUint8Array(data: Uint8Array): string {
64*61c4878aSAndroid Build Coastguard Worker    const { token, args } = this.decodeUint8Array(data);
65*61c4878aSAndroid Build Coastguard Worker    // Parse arguments if this is printf-style text.
66*61c4878aSAndroid Build Coastguard Worker    const format = this.database.get(token);
67*61c4878aSAndroid Build Coastguard Worker    if (format) {
68*61c4878aSAndroid Build Coastguard Worker      return new PrintfDecoder().decode(String(format), args);
69*61c4878aSAndroid Build Coastguard Worker    }
70*61c4878aSAndroid Build Coastguard Worker
71*61c4878aSAndroid Build Coastguard Worker    return new TextDecoder().decode(data);
72*61c4878aSAndroid Build Coastguard Worker  }
73*61c4878aSAndroid Build Coastguard Worker
74*61c4878aSAndroid Build Coastguard Worker  /**
75*61c4878aSAndroid Build Coastguard Worker   * Detokenize Base64-encoded frame data into actual string messages using the
76*61c4878aSAndroid Build Coastguard Worker   * provided token database.
77*61c4878aSAndroid Build Coastguard Worker   *
78*61c4878aSAndroid Build Coastguard Worker   * If the frame doesn't match any token from database, the frame will be
79*61c4878aSAndroid Build Coastguard Worker   * returned as string as-is.
80*61c4878aSAndroid Build Coastguard Worker   */
81*61c4878aSAndroid Build Coastguard Worker  detokenizeBase64(
82*61c4878aSAndroid Build Coastguard Worker    tokenizedFrame: Frame,
83*61c4878aSAndroid Build Coastguard Worker    maxRecursion: number = MAX_RECURSIONS,
84*61c4878aSAndroid Build Coastguard Worker  ): string {
85*61c4878aSAndroid Build Coastguard Worker    const base64String = new TextDecoder().decode(tokenizedFrame.data);
86*61c4878aSAndroid Build Coastguard Worker    return this.detokenizeBase64String(base64String, maxRecursion);
87*61c4878aSAndroid Build Coastguard Worker  }
88*61c4878aSAndroid Build Coastguard Worker
89*61c4878aSAndroid Build Coastguard Worker  private detokenizeBase64String(
90*61c4878aSAndroid Build Coastguard Worker    base64String: string,
91*61c4878aSAndroid Build Coastguard Worker    recursions: number,
92*61c4878aSAndroid Build Coastguard Worker  ): string {
93*61c4878aSAndroid Build Coastguard Worker    return base64String.replace(PATTERN, (base64Substring) => {
94*61c4878aSAndroid Build Coastguard Worker      const { token, args } = this.decodeBase64TokenFrame(base64Substring);
95*61c4878aSAndroid Build Coastguard Worker      const format = this.database.get(token);
96*61c4878aSAndroid Build Coastguard Worker      // Parse arguments if this is printf-style text.
97*61c4878aSAndroid Build Coastguard Worker      if (format) {
98*61c4878aSAndroid Build Coastguard Worker        const decodedOriginal = new PrintfDecoder().decode(
99*61c4878aSAndroid Build Coastguard Worker          String(format),
100*61c4878aSAndroid Build Coastguard Worker          args,
101*61c4878aSAndroid Build Coastguard Worker        );
102*61c4878aSAndroid Build Coastguard Worker        // Detokenize nested Base64 tokens and their arguments.
103*61c4878aSAndroid Build Coastguard Worker        if (recursions > 0) {
104*61c4878aSAndroid Build Coastguard Worker          return this.detokenizeBase64String(decodedOriginal, recursions - 1);
105*61c4878aSAndroid Build Coastguard Worker        }
106*61c4878aSAndroid Build Coastguard Worker        return decodedOriginal;
107*61c4878aSAndroid Build Coastguard Worker      }
108*61c4878aSAndroid Build Coastguard Worker      return base64Substring;
109*61c4878aSAndroid Build Coastguard Worker    });
110*61c4878aSAndroid Build Coastguard Worker  }
111*61c4878aSAndroid Build Coastguard Worker
112*61c4878aSAndroid Build Coastguard Worker  private decodeUint8Array(data: Uint8Array): TokenAndArgs {
113*61c4878aSAndroid Build Coastguard Worker    const token = new DataView(data.buffer, data.byteOffset, 4).getUint32(
114*61c4878aSAndroid Build Coastguard Worker      0,
115*61c4878aSAndroid Build Coastguard Worker      true,
116*61c4878aSAndroid Build Coastguard Worker    );
117*61c4878aSAndroid Build Coastguard Worker    const args = new Uint8Array(data.buffer.slice(data.byteOffset + 4));
118*61c4878aSAndroid Build Coastguard Worker
119*61c4878aSAndroid Build Coastguard Worker    return { token, args };
120*61c4878aSAndroid Build Coastguard Worker  }
121*61c4878aSAndroid Build Coastguard Worker
122*61c4878aSAndroid Build Coastguard Worker  private decodeBase64TokenFrame(base64Data: string): TokenAndArgs {
123*61c4878aSAndroid Build Coastguard Worker    // Remove the prefix '$' and convert from Base64.
124*61c4878aSAndroid Build Coastguard Worker    const prefixRemoved = base64Data.slice(1);
125*61c4878aSAndroid Build Coastguard Worker    const noBase64 = Buffer.from(prefixRemoved, 'base64').toString('binary');
126*61c4878aSAndroid Build Coastguard Worker    // Convert back to bytes and return token and arguments.
127*61c4878aSAndroid Build Coastguard Worker    const bytes = noBase64.split('').map((ch) => ch.charCodeAt(0));
128*61c4878aSAndroid Build Coastguard Worker    const uIntArray = new Uint8Array(bytes);
129*61c4878aSAndroid Build Coastguard Worker    const token = new DataView(
130*61c4878aSAndroid Build Coastguard Worker      uIntArray.buffer,
131*61c4878aSAndroid Build Coastguard Worker      uIntArray.byteOffset,
132*61c4878aSAndroid Build Coastguard Worker      4,
133*61c4878aSAndroid Build Coastguard Worker    ).getUint32(0, true);
134*61c4878aSAndroid Build Coastguard Worker    const args = new Uint8Array(bytes.slice(4));
135*61c4878aSAndroid Build Coastguard Worker
136*61c4878aSAndroid Build Coastguard Worker    return { token, args };
137*61c4878aSAndroid Build Coastguard Worker  }
138*61c4878aSAndroid Build Coastguard Worker}
139