1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * 6*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2008-2016, International Business Machines 7*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 8*0e209d39SAndroid Build Coastguard Worker * 9*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 10*0e209d39SAndroid Build Coastguard Worker * file name: uspoof_conf.h 11*0e209d39SAndroid Build Coastguard Worker * encoding: UTF-8 12*0e209d39SAndroid Build Coastguard Worker * tab size: 8 (not used) 13*0e209d39SAndroid Build Coastguard Worker * indentation:4 14*0e209d39SAndroid Build Coastguard Worker * 15*0e209d39SAndroid Build Coastguard Worker * created on: 2009Jan05 16*0e209d39SAndroid Build Coastguard Worker * created by: Andy Heninger 17*0e209d39SAndroid Build Coastguard Worker * 18*0e209d39SAndroid Build Coastguard Worker * Internal classes for compiling confusable data into its binary (runtime) form. 19*0e209d39SAndroid Build Coastguard Worker */ 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker #ifndef __USPOOF_BUILDCONF_H__ 22*0e209d39SAndroid Build Coastguard Worker #define __USPOOF_BUILDCONF_H__ 23*0e209d39SAndroid Build Coastguard Worker 24*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 25*0e209d39SAndroid Build Coastguard Worker 26*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_NORMALIZATION 27*0e209d39SAndroid Build Coastguard Worker 28*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_REGULAR_EXPRESSIONS 29*0e209d39SAndroid Build Coastguard Worker 30*0e209d39SAndroid Build Coastguard Worker #include "unicode/uregex.h" 31*0e209d39SAndroid Build Coastguard Worker #include "uhash.h" 32*0e209d39SAndroid Build Coastguard Worker #include "uspoof_impl.h" 33*0e209d39SAndroid Build Coastguard Worker 34*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 35*0e209d39SAndroid Build Coastguard Worker 36*0e209d39SAndroid Build Coastguard Worker // SPUString 37*0e209d39SAndroid Build Coastguard Worker // Holds a string that is the result of one of the mappings defined 38*0e209d39SAndroid Build Coastguard Worker // by the confusable mapping data (confusables.txt from Unicode.org) 39*0e209d39SAndroid Build Coastguard Worker // Instances of SPUString exist during the compilation process only. 40*0e209d39SAndroid Build Coastguard Worker 41*0e209d39SAndroid Build Coastguard Worker struct SPUString : public UMemory { 42*0e209d39SAndroid Build Coastguard Worker LocalPointer<UnicodeString> fStr; // The actual string. 43*0e209d39SAndroid Build Coastguard Worker int32_t fCharOrStrTableIndex; // Index into the final runtime data for this 44*0e209d39SAndroid Build Coastguard Worker // string (or, for length 1, the single string char 45*0e209d39SAndroid Build Coastguard Worker // itself, there being no string table entry for it.) 46*0e209d39SAndroid Build Coastguard Worker 47*0e209d39SAndroid Build Coastguard Worker SPUString(LocalPointer<UnicodeString> s); 48*0e209d39SAndroid Build Coastguard Worker ~SPUString(); 49*0e209d39SAndroid Build Coastguard Worker }; 50*0e209d39SAndroid Build Coastguard Worker 51*0e209d39SAndroid Build Coastguard Worker 52*0e209d39SAndroid Build Coastguard Worker // String Pool A utility class for holding the strings that are the result of 53*0e209d39SAndroid Build Coastguard Worker // the spoof mappings. These strings will utimately end up in the 54*0e209d39SAndroid Build Coastguard Worker // run-time String Table. 55*0e209d39SAndroid Build Coastguard Worker // This is sort of like a sorted set of strings, except that ICU's anemic 56*0e209d39SAndroid Build Coastguard Worker // built-in collections don't support those, so it is implemented with a 57*0e209d39SAndroid Build Coastguard Worker // combination of a uhash and a UVector. 58*0e209d39SAndroid Build Coastguard Worker 59*0e209d39SAndroid Build Coastguard Worker 60*0e209d39SAndroid Build Coastguard Worker class SPUStringPool : public UMemory { 61*0e209d39SAndroid Build Coastguard Worker public: 62*0e209d39SAndroid Build Coastguard Worker SPUStringPool(UErrorCode &status); 63*0e209d39SAndroid Build Coastguard Worker ~SPUStringPool(); 64*0e209d39SAndroid Build Coastguard Worker 65*0e209d39SAndroid Build Coastguard Worker // Add a string. Return the string from the table. 66*0e209d39SAndroid Build Coastguard Worker // If the input parameter string is already in the table, delete the 67*0e209d39SAndroid Build Coastguard Worker // input parameter and return the existing string. 68*0e209d39SAndroid Build Coastguard Worker SPUString *addString(UnicodeString *src, UErrorCode &status); 69*0e209d39SAndroid Build Coastguard Worker 70*0e209d39SAndroid Build Coastguard Worker 71*0e209d39SAndroid Build Coastguard Worker // Get the n-th string in the collection. 72*0e209d39SAndroid Build Coastguard Worker SPUString *getByIndex(int32_t i); 73*0e209d39SAndroid Build Coastguard Worker 74*0e209d39SAndroid Build Coastguard Worker // Sort the contents; affects the ordering of getByIndex(). 75*0e209d39SAndroid Build Coastguard Worker void sort(UErrorCode &status); 76*0e209d39SAndroid Build Coastguard Worker 77*0e209d39SAndroid Build Coastguard Worker int32_t size(); 78*0e209d39SAndroid Build Coastguard Worker 79*0e209d39SAndroid Build Coastguard Worker private: 80*0e209d39SAndroid Build Coastguard Worker UVector *fVec; // Elements are SPUString * 81*0e209d39SAndroid Build Coastguard Worker UHashtable *fHash; // Key: UnicodeString Value: SPUString 82*0e209d39SAndroid Build Coastguard Worker }; 83*0e209d39SAndroid Build Coastguard Worker 84*0e209d39SAndroid Build Coastguard Worker 85*0e209d39SAndroid Build Coastguard Worker // class ConfusabledataBuilder 86*0e209d39SAndroid Build Coastguard Worker // An instance of this class exists while the confusable data is being built from source. 87*0e209d39SAndroid Build Coastguard Worker // It encapsulates the intermediate data structures that are used for building. 88*0e209d39SAndroid Build Coastguard Worker // It exports one static function, to do a confusable data build. 89*0e209d39SAndroid Build Coastguard Worker 90*0e209d39SAndroid Build Coastguard Worker class ConfusabledataBuilder : public UMemory { 91*0e209d39SAndroid Build Coastguard Worker private: 92*0e209d39SAndroid Build Coastguard Worker SpoofImpl *fSpoofImpl; 93*0e209d39SAndroid Build Coastguard Worker char16_t *fInput; 94*0e209d39SAndroid Build Coastguard Worker UHashtable *fTable; 95*0e209d39SAndroid Build Coastguard Worker UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables. 96*0e209d39SAndroid Build Coastguard Worker 97*0e209d39SAndroid Build Coastguard Worker // The binary data is first assembled into the following four collections, then 98*0e209d39SAndroid Build Coastguard Worker // copied to its final raw-memory destination. 99*0e209d39SAndroid Build Coastguard Worker UVector *fKeyVec; 100*0e209d39SAndroid Build Coastguard Worker UVector *fValueVec; 101*0e209d39SAndroid Build Coastguard Worker UnicodeString *fStringTable; 102*0e209d39SAndroid Build Coastguard Worker 103*0e209d39SAndroid Build Coastguard Worker SPUStringPool *stringPool; 104*0e209d39SAndroid Build Coastguard Worker URegularExpression *fParseLine; 105*0e209d39SAndroid Build Coastguard Worker URegularExpression *fParseHexNum; 106*0e209d39SAndroid Build Coastguard Worker int32_t fLineNum; 107*0e209d39SAndroid Build Coastguard Worker 108*0e209d39SAndroid Build Coastguard Worker ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); 109*0e209d39SAndroid Build Coastguard Worker ~ConfusabledataBuilder(); 110*0e209d39SAndroid Build Coastguard Worker void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); 111*0e209d39SAndroid Build Coastguard Worker 112*0e209d39SAndroid Build Coastguard Worker // Add an entry to the key and value tables being built 113*0e209d39SAndroid Build Coastguard Worker // input: data from SLTable, MATable, etc. 114*0e209d39SAndroid Build Coastguard Worker // output: entry added to fKeyVec and fValueVec 115*0e209d39SAndroid Build Coastguard Worker void addKeyEntry(UChar32 keyChar, // The key character 116*0e209d39SAndroid Build Coastguard Worker UHashtable *table, // The table, one of SATable, MATable, etc. 117*0e209d39SAndroid Build Coastguard Worker int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. 118*0e209d39SAndroid Build Coastguard Worker UErrorCode &status); 119*0e209d39SAndroid Build Coastguard Worker 120*0e209d39SAndroid Build Coastguard Worker // From an index into fKeyVec & fValueVec 121*0e209d39SAndroid Build Coastguard Worker // get a UnicodeString with the corresponding mapping. 122*0e209d39SAndroid Build Coastguard Worker UnicodeString getMapping(int32_t index); 123*0e209d39SAndroid Build Coastguard Worker 124*0e209d39SAndroid Build Coastguard Worker // Populate the final binary output data array with the compiled data. 125*0e209d39SAndroid Build Coastguard Worker void outputData(UErrorCode &status); 126*0e209d39SAndroid Build Coastguard Worker 127*0e209d39SAndroid Build Coastguard Worker public: 128*0e209d39SAndroid Build Coastguard Worker static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, 129*0e209d39SAndroid Build Coastguard Worker int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); 130*0e209d39SAndroid Build Coastguard Worker }; 131*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 132*0e209d39SAndroid Build Coastguard Worker 133*0e209d39SAndroid Build Coastguard Worker #endif 134*0e209d39SAndroid Build Coastguard Worker #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 135*0e209d39SAndroid Build Coastguard Worker #endif // __USPOOF_BUILDCONF_H__ 136