1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2005-2015, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 8*0e209d39SAndroid Build Coastguard Worker */ 9*0e209d39SAndroid Build Coastguard Worker 10*0e209d39SAndroid Build Coastguard Worker #ifndef __CSRSBCS_H 11*0e209d39SAndroid Build Coastguard Worker #define __CSRSBCS_H 12*0e209d39SAndroid Build Coastguard Worker 13*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h" 14*0e209d39SAndroid Build Coastguard Worker 15*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_CONVERSION 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #include "csrecog.h" 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker class NGramParser : public UMemory 22*0e209d39SAndroid Build Coastguard Worker { 23*0e209d39SAndroid Build Coastguard Worker private: 24*0e209d39SAndroid Build Coastguard Worker int32_t ngram; 25*0e209d39SAndroid Build Coastguard Worker const int32_t *ngramList; 26*0e209d39SAndroid Build Coastguard Worker 27*0e209d39SAndroid Build Coastguard Worker int32_t ngramCount; 28*0e209d39SAndroid Build Coastguard Worker int32_t hitCount; 29*0e209d39SAndroid Build Coastguard Worker 30*0e209d39SAndroid Build Coastguard Worker protected: 31*0e209d39SAndroid Build Coastguard Worker int32_t byteIndex; 32*0e209d39SAndroid Build Coastguard Worker const uint8_t *charMap; 33*0e209d39SAndroid Build Coastguard Worker 34*0e209d39SAndroid Build Coastguard Worker void addByte(int32_t b); 35*0e209d39SAndroid Build Coastguard Worker 36*0e209d39SAndroid Build Coastguard Worker public: 37*0e209d39SAndroid Build Coastguard Worker NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap); 38*0e209d39SAndroid Build Coastguard Worker virtual ~NGramParser(); 39*0e209d39SAndroid Build Coastguard Worker 40*0e209d39SAndroid Build Coastguard Worker private: 41*0e209d39SAndroid Build Coastguard Worker /* 42*0e209d39SAndroid Build Coastguard Worker * Binary search for value in table, which must have exactly 64 entries. 43*0e209d39SAndroid Build Coastguard Worker */ 44*0e209d39SAndroid Build Coastguard Worker int32_t search(const int32_t *table, int32_t value); 45*0e209d39SAndroid Build Coastguard Worker 46*0e209d39SAndroid Build Coastguard Worker void lookup(int32_t thisNgram); 47*0e209d39SAndroid Build Coastguard Worker 48*0e209d39SAndroid Build Coastguard Worker virtual int32_t nextByte(InputText *det); 49*0e209d39SAndroid Build Coastguard Worker virtual void parseCharacters(InputText *det); 50*0e209d39SAndroid Build Coastguard Worker 51*0e209d39SAndroid Build Coastguard Worker public: 52*0e209d39SAndroid Build Coastguard Worker int32_t parse(InputText *det); 53*0e209d39SAndroid Build Coastguard Worker 54*0e209d39SAndroid Build Coastguard Worker }; 55*0e209d39SAndroid Build Coastguard Worker 56*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_ONLY_HTML_CONVERSION 57*0e209d39SAndroid Build Coastguard Worker class NGramParser_IBM420 : public NGramParser 58*0e209d39SAndroid Build Coastguard Worker { 59*0e209d39SAndroid Build Coastguard Worker public: 60*0e209d39SAndroid Build Coastguard Worker NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); 61*0e209d39SAndroid Build Coastguard Worker ~NGramParser_IBM420(); 62*0e209d39SAndroid Build Coastguard Worker 63*0e209d39SAndroid Build Coastguard Worker private: 64*0e209d39SAndroid Build Coastguard Worker int32_t alef; 65*0e209d39SAndroid Build Coastguard Worker int32_t isLamAlef(int32_t b); 66*0e209d39SAndroid Build Coastguard Worker int32_t nextByte(InputText *det) override; 67*0e209d39SAndroid Build Coastguard Worker void parseCharacters(InputText *det) override; 68*0e209d39SAndroid Build Coastguard Worker }; 69*0e209d39SAndroid Build Coastguard Worker #endif 70*0e209d39SAndroid Build Coastguard Worker 71*0e209d39SAndroid Build Coastguard Worker 72*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_sbcs : public CharsetRecognizer 73*0e209d39SAndroid Build Coastguard Worker { 74*0e209d39SAndroid Build Coastguard Worker public: 75*0e209d39SAndroid Build Coastguard Worker CharsetRecog_sbcs(); 76*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_sbcs(); 77*0e209d39SAndroid Build Coastguard Worker virtual const char *getName() const override = 0; 78*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override = 0; 79*0e209d39SAndroid Build Coastguard Worker virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const; 80*0e209d39SAndroid Build Coastguard Worker }; 81*0e209d39SAndroid Build Coastguard Worker 82*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_1 : public CharsetRecog_sbcs 83*0e209d39SAndroid Build Coastguard Worker { 84*0e209d39SAndroid Build Coastguard Worker public: 85*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_1(); 86*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 87*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 88*0e209d39SAndroid Build Coastguard Worker }; 89*0e209d39SAndroid Build Coastguard Worker 90*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_2 : public CharsetRecog_sbcs 91*0e209d39SAndroid Build Coastguard Worker { 92*0e209d39SAndroid Build Coastguard Worker public: 93*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_2(); 94*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 95*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 96*0e209d39SAndroid Build Coastguard Worker }; 97*0e209d39SAndroid Build Coastguard Worker 98*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_5 : public CharsetRecog_sbcs 99*0e209d39SAndroid Build Coastguard Worker { 100*0e209d39SAndroid Build Coastguard Worker public: 101*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_5(); 102*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 103*0e209d39SAndroid Build Coastguard Worker }; 104*0e209d39SAndroid Build Coastguard Worker 105*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_6 : public CharsetRecog_sbcs 106*0e209d39SAndroid Build Coastguard Worker { 107*0e209d39SAndroid Build Coastguard Worker public: 108*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_6(); 109*0e209d39SAndroid Build Coastguard Worker 110*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 111*0e209d39SAndroid Build Coastguard Worker }; 112*0e209d39SAndroid Build Coastguard Worker 113*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_7 : public CharsetRecog_sbcs 114*0e209d39SAndroid Build Coastguard Worker { 115*0e209d39SAndroid Build Coastguard Worker public: 116*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_7(); 117*0e209d39SAndroid Build Coastguard Worker 118*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 119*0e209d39SAndroid Build Coastguard Worker }; 120*0e209d39SAndroid Build Coastguard Worker 121*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_8 : public CharsetRecog_sbcs 122*0e209d39SAndroid Build Coastguard Worker { 123*0e209d39SAndroid Build Coastguard Worker public: 124*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_8(); 125*0e209d39SAndroid Build Coastguard Worker 126*0e209d39SAndroid Build Coastguard Worker virtual const char *getName() const override; 127*0e209d39SAndroid Build Coastguard Worker }; 128*0e209d39SAndroid Build Coastguard Worker 129*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_9 : public CharsetRecog_sbcs 130*0e209d39SAndroid Build Coastguard Worker { 131*0e209d39SAndroid Build Coastguard Worker public: 132*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_9(); 133*0e209d39SAndroid Build Coastguard Worker 134*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 135*0e209d39SAndroid Build Coastguard Worker }; 136*0e209d39SAndroid Build Coastguard Worker 137*0e209d39SAndroid Build Coastguard Worker 138*0e209d39SAndroid Build Coastguard Worker 139*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5 140*0e209d39SAndroid Build Coastguard Worker { 141*0e209d39SAndroid Build Coastguard Worker public: 142*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_5_ru(); 143*0e209d39SAndroid Build Coastguard Worker 144*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 145*0e209d39SAndroid Build Coastguard Worker 146*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 147*0e209d39SAndroid Build Coastguard Worker }; 148*0e209d39SAndroid Build Coastguard Worker 149*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6 150*0e209d39SAndroid Build Coastguard Worker { 151*0e209d39SAndroid Build Coastguard Worker public: 152*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_6_ar(); 153*0e209d39SAndroid Build Coastguard Worker 154*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 155*0e209d39SAndroid Build Coastguard Worker 156*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 157*0e209d39SAndroid Build Coastguard Worker }; 158*0e209d39SAndroid Build Coastguard Worker 159*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_7_el : public CharsetRecog_8859_7 160*0e209d39SAndroid Build Coastguard Worker { 161*0e209d39SAndroid Build Coastguard Worker public: 162*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_7_el(); 163*0e209d39SAndroid Build Coastguard Worker 164*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 165*0e209d39SAndroid Build Coastguard Worker 166*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 167*0e209d39SAndroid Build Coastguard Worker }; 168*0e209d39SAndroid Build Coastguard Worker 169*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8 170*0e209d39SAndroid Build Coastguard Worker { 171*0e209d39SAndroid Build Coastguard Worker public: 172*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_8_I_he(); 173*0e209d39SAndroid Build Coastguard Worker 174*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 175*0e209d39SAndroid Build Coastguard Worker 176*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 177*0e209d39SAndroid Build Coastguard Worker 178*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 179*0e209d39SAndroid Build Coastguard Worker }; 180*0e209d39SAndroid Build Coastguard Worker 181*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_8_he : public CharsetRecog_8859_8 182*0e209d39SAndroid Build Coastguard Worker { 183*0e209d39SAndroid Build Coastguard Worker public: 184*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_8_he (); 185*0e209d39SAndroid Build Coastguard Worker 186*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 187*0e209d39SAndroid Build Coastguard Worker 188*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 189*0e209d39SAndroid Build Coastguard Worker }; 190*0e209d39SAndroid Build Coastguard Worker 191*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9 192*0e209d39SAndroid Build Coastguard Worker { 193*0e209d39SAndroid Build Coastguard Worker public: 194*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_8859_9_tr (); 195*0e209d39SAndroid Build Coastguard Worker 196*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 197*0e209d39SAndroid Build Coastguard Worker 198*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 199*0e209d39SAndroid Build Coastguard Worker }; 200*0e209d39SAndroid Build Coastguard Worker 201*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_windows_1256 : public CharsetRecog_sbcs 202*0e209d39SAndroid Build Coastguard Worker { 203*0e209d39SAndroid Build Coastguard Worker public: 204*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_windows_1256(); 205*0e209d39SAndroid Build Coastguard Worker 206*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 207*0e209d39SAndroid Build Coastguard Worker 208*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 209*0e209d39SAndroid Build Coastguard Worker 210*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 211*0e209d39SAndroid Build Coastguard Worker }; 212*0e209d39SAndroid Build Coastguard Worker 213*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_windows_1251 : public CharsetRecog_sbcs 214*0e209d39SAndroid Build Coastguard Worker { 215*0e209d39SAndroid Build Coastguard Worker public: 216*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_windows_1251(); 217*0e209d39SAndroid Build Coastguard Worker 218*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 219*0e209d39SAndroid Build Coastguard Worker 220*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 221*0e209d39SAndroid Build Coastguard Worker 222*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 223*0e209d39SAndroid Build Coastguard Worker }; 224*0e209d39SAndroid Build Coastguard Worker 225*0e209d39SAndroid Build Coastguard Worker 226*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_KOI8_R : public CharsetRecog_sbcs 227*0e209d39SAndroid Build Coastguard Worker { 228*0e209d39SAndroid Build Coastguard Worker public: 229*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_KOI8_R(); 230*0e209d39SAndroid Build Coastguard Worker 231*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 232*0e209d39SAndroid Build Coastguard Worker 233*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 234*0e209d39SAndroid Build Coastguard Worker 235*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 236*0e209d39SAndroid Build Coastguard Worker }; 237*0e209d39SAndroid Build Coastguard Worker 238*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_ONLY_HTML_CONVERSION 239*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM424_he : public CharsetRecog_sbcs 240*0e209d39SAndroid Build Coastguard Worker { 241*0e209d39SAndroid Build Coastguard Worker public: 242*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_IBM424_he(); 243*0e209d39SAndroid Build Coastguard Worker 244*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 245*0e209d39SAndroid Build Coastguard Worker }; 246*0e209d39SAndroid Build Coastguard Worker 247*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he { 248*0e209d39SAndroid Build Coastguard Worker public: 249*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_IBM424_he_rtl(); 250*0e209d39SAndroid Build Coastguard Worker 251*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 252*0e209d39SAndroid Build Coastguard Worker 253*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 254*0e209d39SAndroid Build Coastguard Worker }; 255*0e209d39SAndroid Build Coastguard Worker 256*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he { 257*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_IBM424_he_ltr(); 258*0e209d39SAndroid Build Coastguard Worker 259*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 260*0e209d39SAndroid Build Coastguard Worker 261*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 262*0e209d39SAndroid Build Coastguard Worker }; 263*0e209d39SAndroid Build Coastguard Worker 264*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs 265*0e209d39SAndroid Build Coastguard Worker { 266*0e209d39SAndroid Build Coastguard Worker public: 267*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_IBM420_ar(); 268*0e209d39SAndroid Build Coastguard Worker 269*0e209d39SAndroid Build Coastguard Worker const char *getLanguage() const override; 270*0e209d39SAndroid Build Coastguard Worker int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const override; 271*0e209d39SAndroid Build Coastguard Worker 272*0e209d39SAndroid Build Coastguard Worker }; 273*0e209d39SAndroid Build Coastguard Worker 274*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar { 275*0e209d39SAndroid Build Coastguard Worker public: 276*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_IBM420_ar_rtl(); 277*0e209d39SAndroid Build Coastguard Worker 278*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 279*0e209d39SAndroid Build Coastguard Worker 280*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 281*0e209d39SAndroid Build Coastguard Worker }; 282*0e209d39SAndroid Build Coastguard Worker 283*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar { 284*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_IBM420_ar_ltr(); 285*0e209d39SAndroid Build Coastguard Worker 286*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 287*0e209d39SAndroid Build Coastguard Worker 288*0e209d39SAndroid Build Coastguard Worker virtual UBool match(InputText *det, CharsetMatch *results) const override; 289*0e209d39SAndroid Build Coastguard Worker }; 290*0e209d39SAndroid Build Coastguard Worker #endif 291*0e209d39SAndroid Build Coastguard Worker 292*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 293*0e209d39SAndroid Build Coastguard Worker 294*0e209d39SAndroid Build Coastguard Worker #endif /* !UCONFIG_NO_CONVERSION */ 295*0e209d39SAndroid Build Coastguard Worker #endif /* __CSRSBCS_H */ 296