xref: /aosp_15_r20/external/icu/libicu/cts_headers/uspoof_conf.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *
6*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 2008-2016, International Business Machines
7*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
8*0e209d39SAndroid Build Coastguard Worker *
9*0e209d39SAndroid Build Coastguard Worker ******************************************************************************
10*0e209d39SAndroid Build Coastguard Worker *   file name:  uspoof_conf.h
11*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
12*0e209d39SAndroid Build Coastguard Worker *   tab size:   8 (not used)
13*0e209d39SAndroid Build Coastguard Worker *   indentation:4
14*0e209d39SAndroid Build Coastguard Worker *
15*0e209d39SAndroid Build Coastguard Worker *   created on: 2009Jan05
16*0e209d39SAndroid Build Coastguard Worker *   created by: Andy Heninger
17*0e209d39SAndroid Build Coastguard Worker *
18*0e209d39SAndroid Build Coastguard Worker *   Internal classes for compiling confusable data into its binary (runtime) form.
19*0e209d39SAndroid Build Coastguard Worker */
20*0e209d39SAndroid Build Coastguard Worker 
21*0e209d39SAndroid Build Coastguard Worker #ifndef __USPOOF_BUILDCONF_H__
22*0e209d39SAndroid Build Coastguard Worker #define __USPOOF_BUILDCONF_H__
23*0e209d39SAndroid Build Coastguard Worker 
24*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
25*0e209d39SAndroid Build Coastguard Worker 
26*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_NORMALIZATION
27*0e209d39SAndroid Build Coastguard Worker 
28*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_REGULAR_EXPRESSIONS
29*0e209d39SAndroid Build Coastguard Worker 
30*0e209d39SAndroid Build Coastguard Worker #include "unicode/uregex.h"
31*0e209d39SAndroid Build Coastguard Worker #include "uhash.h"
32*0e209d39SAndroid Build Coastguard Worker #include "uspoof_impl.h"
33*0e209d39SAndroid Build Coastguard Worker 
34*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
35*0e209d39SAndroid Build Coastguard Worker 
36*0e209d39SAndroid Build Coastguard Worker // SPUString
37*0e209d39SAndroid Build Coastguard Worker //              Holds a string that is the result of one of the mappings defined
38*0e209d39SAndroid Build Coastguard Worker //              by the confusable mapping data (confusables.txt from Unicode.org)
39*0e209d39SAndroid Build Coastguard Worker //              Instances of SPUString exist during the compilation process only.
40*0e209d39SAndroid Build Coastguard Worker 
41*0e209d39SAndroid Build Coastguard Worker struct SPUString : public UMemory {
42*0e209d39SAndroid Build Coastguard Worker     LocalPointer<UnicodeString> fStr;     // The actual string.
43*0e209d39SAndroid Build Coastguard Worker     int32_t      fCharOrStrTableIndex;    // Index into the final runtime data for this
44*0e209d39SAndroid Build Coastguard Worker                                           // string (or, for length 1, the single string char
45*0e209d39SAndroid Build Coastguard Worker                                           // itself, there being no string table entry for it.)
46*0e209d39SAndroid Build Coastguard Worker 
47*0e209d39SAndroid Build Coastguard Worker     SPUString(LocalPointer<UnicodeString> s);
48*0e209d39SAndroid Build Coastguard Worker     ~SPUString();
49*0e209d39SAndroid Build Coastguard Worker };
50*0e209d39SAndroid Build Coastguard Worker 
51*0e209d39SAndroid Build Coastguard Worker 
52*0e209d39SAndroid Build Coastguard Worker //  String Pool   A utility class for holding the strings that are the result of
53*0e209d39SAndroid Build Coastguard Worker //                the spoof mappings.  These strings will utimately end up in the
54*0e209d39SAndroid Build Coastguard Worker //                run-time String Table.
55*0e209d39SAndroid Build Coastguard Worker //                This is sort of like a sorted set of strings, except that ICU's anemic
56*0e209d39SAndroid Build Coastguard Worker //                built-in collections don't support those, so it is implemented with a
57*0e209d39SAndroid Build Coastguard Worker //                combination of a uhash and a UVector.
58*0e209d39SAndroid Build Coastguard Worker 
59*0e209d39SAndroid Build Coastguard Worker 
60*0e209d39SAndroid Build Coastguard Worker class SPUStringPool : public UMemory {
61*0e209d39SAndroid Build Coastguard Worker   public:
62*0e209d39SAndroid Build Coastguard Worker     SPUStringPool(UErrorCode &status);
63*0e209d39SAndroid Build Coastguard Worker     ~SPUStringPool();
64*0e209d39SAndroid Build Coastguard Worker 
65*0e209d39SAndroid Build Coastguard Worker     // Add a string. Return the string from the table.
66*0e209d39SAndroid Build Coastguard Worker     // If the input parameter string is already in the table, delete the
67*0e209d39SAndroid Build Coastguard Worker     //  input parameter and return the existing string.
68*0e209d39SAndroid Build Coastguard Worker     SPUString *addString(UnicodeString *src, UErrorCode &status);
69*0e209d39SAndroid Build Coastguard Worker 
70*0e209d39SAndroid Build Coastguard Worker 
71*0e209d39SAndroid Build Coastguard Worker     // Get the n-th string in the collection.
72*0e209d39SAndroid Build Coastguard Worker     SPUString *getByIndex(int32_t i);
73*0e209d39SAndroid Build Coastguard Worker 
74*0e209d39SAndroid Build Coastguard Worker     // Sort the contents; affects the ordering of getByIndex().
75*0e209d39SAndroid Build Coastguard Worker     void sort(UErrorCode &status);
76*0e209d39SAndroid Build Coastguard Worker 
77*0e209d39SAndroid Build Coastguard Worker     int32_t size();
78*0e209d39SAndroid Build Coastguard Worker 
79*0e209d39SAndroid Build Coastguard Worker   private:
80*0e209d39SAndroid Build Coastguard Worker     UVector     *fVec;    // Elements are SPUString *
81*0e209d39SAndroid Build Coastguard Worker     UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
82*0e209d39SAndroid Build Coastguard Worker };
83*0e209d39SAndroid Build Coastguard Worker 
84*0e209d39SAndroid Build Coastguard Worker 
85*0e209d39SAndroid Build Coastguard Worker // class ConfusabledataBuilder
86*0e209d39SAndroid Build Coastguard Worker //     An instance of this class exists while the confusable data is being built from source.
87*0e209d39SAndroid Build Coastguard Worker //     It encapsulates the intermediate data structures that are used for building.
88*0e209d39SAndroid Build Coastguard Worker //     It exports one static function, to do a confusable data build.
89*0e209d39SAndroid Build Coastguard Worker 
90*0e209d39SAndroid Build Coastguard Worker class ConfusabledataBuilder : public UMemory {
91*0e209d39SAndroid Build Coastguard Worker   private:
92*0e209d39SAndroid Build Coastguard Worker     SpoofImpl  *fSpoofImpl;
93*0e209d39SAndroid Build Coastguard Worker     char16_t   *fInput;
94*0e209d39SAndroid Build Coastguard Worker     UHashtable *fTable;
95*0e209d39SAndroid Build Coastguard Worker     UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.
96*0e209d39SAndroid Build Coastguard Worker 
97*0e209d39SAndroid Build Coastguard Worker     // The binary data is first assembled into the following four collections, then
98*0e209d39SAndroid Build Coastguard Worker     //   copied to its final raw-memory destination.
99*0e209d39SAndroid Build Coastguard Worker     UVector            *fKeyVec;
100*0e209d39SAndroid Build Coastguard Worker     UVector            *fValueVec;
101*0e209d39SAndroid Build Coastguard Worker     UnicodeString      *fStringTable;
102*0e209d39SAndroid Build Coastguard Worker 
103*0e209d39SAndroid Build Coastguard Worker     SPUStringPool      *stringPool;
104*0e209d39SAndroid Build Coastguard Worker     URegularExpression *fParseLine;
105*0e209d39SAndroid Build Coastguard Worker     URegularExpression *fParseHexNum;
106*0e209d39SAndroid Build Coastguard Worker     int32_t             fLineNum;
107*0e209d39SAndroid Build Coastguard Worker 
108*0e209d39SAndroid Build Coastguard Worker     ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
109*0e209d39SAndroid Build Coastguard Worker     ~ConfusabledataBuilder();
110*0e209d39SAndroid Build Coastguard Worker     void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
111*0e209d39SAndroid Build Coastguard Worker 
112*0e209d39SAndroid Build Coastguard Worker     // Add an entry to the key and value tables being built
113*0e209d39SAndroid Build Coastguard Worker     //   input:  data from SLTable, MATable, etc.
114*0e209d39SAndroid Build Coastguard Worker     //   output:  entry added to fKeyVec and fValueVec
115*0e209d39SAndroid Build Coastguard Worker     void addKeyEntry(UChar32     keyChar,     // The key character
116*0e209d39SAndroid Build Coastguard Worker                      UHashtable *table,       // The table, one of SATable, MATable, etc.
117*0e209d39SAndroid Build Coastguard Worker                      int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
118*0e209d39SAndroid Build Coastguard Worker                      UErrorCode &status);
119*0e209d39SAndroid Build Coastguard Worker 
120*0e209d39SAndroid Build Coastguard Worker     // From an index into fKeyVec & fValueVec
121*0e209d39SAndroid Build Coastguard Worker     //   get a UnicodeString with the corresponding mapping.
122*0e209d39SAndroid Build Coastguard Worker     UnicodeString getMapping(int32_t index);
123*0e209d39SAndroid Build Coastguard Worker 
124*0e209d39SAndroid Build Coastguard Worker     // Populate the final binary output data array with the compiled data.
125*0e209d39SAndroid Build Coastguard Worker     void outputData(UErrorCode &status);
126*0e209d39SAndroid Build Coastguard Worker 
127*0e209d39SAndroid Build Coastguard Worker   public:
128*0e209d39SAndroid Build Coastguard Worker     static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
129*0e209d39SAndroid Build Coastguard Worker         int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
130*0e209d39SAndroid Build Coastguard Worker };
131*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
132*0e209d39SAndroid Build Coastguard Worker 
133*0e209d39SAndroid Build Coastguard Worker #endif
134*0e209d39SAndroid Build Coastguard Worker #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
135*0e209d39SAndroid Build Coastguard Worker #endif  // __USPOOF_BUILDCONF_H__
136