xref: /aosp_15_r20/external/icu/libicu/cts_headers/csrsbcs.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker  **********************************************************************
5*0e209d39SAndroid Build Coastguard Worker  *   Copyright (C) 2005-2015, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker  *   Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker  **********************************************************************
8*0e209d39SAndroid Build Coastguard Worker  */
9*0e209d39SAndroid Build Coastguard Worker 
10*0e209d39SAndroid Build Coastguard Worker #ifndef __CSRSBCS_H
11*0e209d39SAndroid Build Coastguard Worker #define __CSRSBCS_H
12*0e209d39SAndroid Build Coastguard Worker 
13*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
14*0e209d39SAndroid Build Coastguard Worker 
15*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_CONVERSION
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker #include "csrecog.h"
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
20*0e209d39SAndroid Build Coastguard Worker 
21*0e209d39SAndroid Build Coastguard Worker class NGramParser : public UMemory
22*0e209d39SAndroid Build Coastguard Worker {
23*0e209d39SAndroid Build Coastguard Worker private:
24*0e209d39SAndroid Build Coastguard Worker     int32_t ngram;
25*0e209d39SAndroid Build Coastguard Worker     const int32_t *ngramList;
26*0e209d39SAndroid Build Coastguard Worker 
27*0e209d39SAndroid Build Coastguard Worker     int32_t ngramCount;
28*0e209d39SAndroid Build Coastguard Worker     int32_t hitCount;
29*0e209d39SAndroid Build Coastguard Worker 
30*0e209d39SAndroid Build Coastguard Worker protected:
31*0e209d39SAndroid Build Coastguard Worker 	int32_t byteIndex;
32*0e209d39SAndroid Build Coastguard Worker     const uint8_t *charMap;
33*0e209d39SAndroid Build Coastguard Worker 
34*0e209d39SAndroid Build Coastguard Worker 	void addByte(int32_t b);
35*0e209d39SAndroid Build Coastguard Worker 
36*0e209d39SAndroid Build Coastguard Worker public:
37*0e209d39SAndroid Build Coastguard Worker     NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
38*0e209d39SAndroid Build Coastguard Worker     virtual ~NGramParser();
39*0e209d39SAndroid Build Coastguard Worker 
40*0e209d39SAndroid Build Coastguard Worker private:
41*0e209d39SAndroid Build Coastguard Worker     /*
42*0e209d39SAndroid Build Coastguard Worker     * Binary search for value in table, which must have exactly 64 entries.
43*0e209d39SAndroid Build Coastguard Worker     */
44*0e209d39SAndroid Build Coastguard Worker     int32_t search(const int32_t *table, int32_t value);
45*0e209d39SAndroid Build Coastguard Worker 
46*0e209d39SAndroid Build Coastguard Worker     void lookup(int32_t thisNgram);
47*0e209d39SAndroid Build Coastguard Worker 
48*0e209d39SAndroid Build Coastguard Worker     virtual int32_t nextByte(InputText *det);
49*0e209d39SAndroid Build Coastguard Worker 	virtual void parseCharacters(InputText *det);
50*0e209d39SAndroid Build Coastguard Worker 
51*0e209d39SAndroid Build Coastguard Worker public:
52*0e209d39SAndroid Build Coastguard Worker     int32_t parse(InputText *det);
53*0e209d39SAndroid Build Coastguard Worker 
54*0e209d39SAndroid Build Coastguard Worker };
55*0e209d39SAndroid Build Coastguard Worker 
56*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_ONLY_HTML_CONVERSION
57*0e209d39SAndroid Build Coastguard Worker class NGramParser_IBM420 : public NGramParser
58*0e209d39SAndroid Build Coastguard Worker {
59*0e209d39SAndroid Build Coastguard Worker public:
60*0e209d39SAndroid Build Coastguard Worker     NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
61*0e209d39SAndroid Build Coastguard Worker     ~NGramParser_IBM420();
62*0e209d39SAndroid Build Coastguard Worker 
63*0e209d39SAndroid Build Coastguard Worker private:
64*0e209d39SAndroid Build Coastguard Worker     int32_t alef;
65*0e209d39SAndroid Build Coastguard Worker     int32_t isLamAlef(int32_t b);
66*0e209d39SAndroid Build Coastguard Worker     int32_t nextByte(InputText *det) override;
67*0e209d39SAndroid Build Coastguard Worker     void parseCharacters(InputText *det) override;
68*0e209d39SAndroid Build Coastguard Worker };
69*0e209d39SAndroid Build Coastguard Worker #endif
70*0e209d39SAndroid Build Coastguard Worker 
71*0e209d39SAndroid Build Coastguard Worker 
72*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_sbcs : public CharsetRecognizer
73*0e209d39SAndroid Build Coastguard Worker {
74*0e209d39SAndroid Build Coastguard Worker public:
75*0e209d39SAndroid Build Coastguard Worker     CharsetRecog_sbcs();
76*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_sbcs();
77*0e209d39SAndroid Build Coastguard Worker     virtual const char *getName() const override = 0;
78*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override = 0;
79*0e209d39SAndroid Build Coastguard Worker     virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
80*0e209d39SAndroid Build Coastguard Worker };
81*0e209d39SAndroid Build Coastguard Worker 
82*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_1 : public CharsetRecog_sbcs
83*0e209d39SAndroid Build Coastguard Worker {
84*0e209d39SAndroid Build Coastguard Worker public:
85*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_1();
86*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
87*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
88*0e209d39SAndroid Build Coastguard Worker };
89*0e209d39SAndroid Build Coastguard Worker 
90*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_2 : public CharsetRecog_sbcs
91*0e209d39SAndroid Build Coastguard Worker {
92*0e209d39SAndroid Build Coastguard Worker public:
93*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_2();
94*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
95*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
96*0e209d39SAndroid Build Coastguard Worker };
97*0e209d39SAndroid Build Coastguard Worker 
98*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_5 : public CharsetRecog_sbcs
99*0e209d39SAndroid Build Coastguard Worker {
100*0e209d39SAndroid Build Coastguard Worker public:
101*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_5();
102*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
103*0e209d39SAndroid Build Coastguard Worker };
104*0e209d39SAndroid Build Coastguard Worker 
105*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_6 : public CharsetRecog_sbcs
106*0e209d39SAndroid Build Coastguard Worker {
107*0e209d39SAndroid Build Coastguard Worker public:
108*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_6();
109*0e209d39SAndroid Build Coastguard Worker 
110*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
111*0e209d39SAndroid Build Coastguard Worker };
112*0e209d39SAndroid Build Coastguard Worker 
113*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_7 : public CharsetRecog_sbcs
114*0e209d39SAndroid Build Coastguard Worker {
115*0e209d39SAndroid Build Coastguard Worker public:
116*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_7();
117*0e209d39SAndroid Build Coastguard Worker 
118*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
119*0e209d39SAndroid Build Coastguard Worker };
120*0e209d39SAndroid Build Coastguard Worker 
121*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_8 : public CharsetRecog_sbcs
122*0e209d39SAndroid Build Coastguard Worker {
123*0e209d39SAndroid Build Coastguard Worker public:
124*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_8();
125*0e209d39SAndroid Build Coastguard Worker 
126*0e209d39SAndroid Build Coastguard Worker     virtual const char *getName() const override;
127*0e209d39SAndroid Build Coastguard Worker };
128*0e209d39SAndroid Build Coastguard Worker 
129*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_9 : public CharsetRecog_sbcs
130*0e209d39SAndroid Build Coastguard Worker {
131*0e209d39SAndroid Build Coastguard Worker public:
132*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_9();
133*0e209d39SAndroid Build Coastguard Worker 
134*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
135*0e209d39SAndroid Build Coastguard Worker };
136*0e209d39SAndroid Build Coastguard Worker 
137*0e209d39SAndroid Build Coastguard Worker 
138*0e209d39SAndroid Build Coastguard Worker 
139*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
140*0e209d39SAndroid Build Coastguard Worker {
141*0e209d39SAndroid Build Coastguard Worker public:
142*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_5_ru();
143*0e209d39SAndroid Build Coastguard Worker 
144*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
145*0e209d39SAndroid Build Coastguard Worker 
146*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
147*0e209d39SAndroid Build Coastguard Worker };
148*0e209d39SAndroid Build Coastguard Worker 
149*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
150*0e209d39SAndroid Build Coastguard Worker {
151*0e209d39SAndroid Build Coastguard Worker public:
152*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_6_ar();
153*0e209d39SAndroid Build Coastguard Worker 
154*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
155*0e209d39SAndroid Build Coastguard Worker 
156*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
157*0e209d39SAndroid Build Coastguard Worker };
158*0e209d39SAndroid Build Coastguard Worker 
159*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
160*0e209d39SAndroid Build Coastguard Worker {
161*0e209d39SAndroid Build Coastguard Worker public:
162*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_7_el();
163*0e209d39SAndroid Build Coastguard Worker 
164*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
165*0e209d39SAndroid Build Coastguard Worker 
166*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
167*0e209d39SAndroid Build Coastguard Worker };
168*0e209d39SAndroid Build Coastguard Worker 
169*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
170*0e209d39SAndroid Build Coastguard Worker {
171*0e209d39SAndroid Build Coastguard Worker public:
172*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_8_I_he();
173*0e209d39SAndroid Build Coastguard Worker 
174*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
175*0e209d39SAndroid Build Coastguard Worker 
176*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
177*0e209d39SAndroid Build Coastguard Worker 
178*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
179*0e209d39SAndroid Build Coastguard Worker };
180*0e209d39SAndroid Build Coastguard Worker 
181*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
182*0e209d39SAndroid Build Coastguard Worker {
183*0e209d39SAndroid Build Coastguard Worker public:
184*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_8_he ();
185*0e209d39SAndroid Build Coastguard Worker 
186*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
187*0e209d39SAndroid Build Coastguard Worker 
188*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
189*0e209d39SAndroid Build Coastguard Worker };
190*0e209d39SAndroid Build Coastguard Worker 
191*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
192*0e209d39SAndroid Build Coastguard Worker {
193*0e209d39SAndroid Build Coastguard Worker public:
194*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_8859_9_tr ();
195*0e209d39SAndroid Build Coastguard Worker 
196*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
197*0e209d39SAndroid Build Coastguard Worker 
198*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
199*0e209d39SAndroid Build Coastguard Worker };
200*0e209d39SAndroid Build Coastguard Worker 
201*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
202*0e209d39SAndroid Build Coastguard Worker {
203*0e209d39SAndroid Build Coastguard Worker public:
204*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_windows_1256();
205*0e209d39SAndroid Build Coastguard Worker 
206*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
207*0e209d39SAndroid Build Coastguard Worker 
208*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
209*0e209d39SAndroid Build Coastguard Worker 
210*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
211*0e209d39SAndroid Build Coastguard Worker };
212*0e209d39SAndroid Build Coastguard Worker 
213*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
214*0e209d39SAndroid Build Coastguard Worker {
215*0e209d39SAndroid Build Coastguard Worker public:
216*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_windows_1251();
217*0e209d39SAndroid Build Coastguard Worker 
218*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
219*0e209d39SAndroid Build Coastguard Worker 
220*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
221*0e209d39SAndroid Build Coastguard Worker 
222*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
223*0e209d39SAndroid Build Coastguard Worker };
224*0e209d39SAndroid Build Coastguard Worker 
225*0e209d39SAndroid Build Coastguard Worker 
226*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
227*0e209d39SAndroid Build Coastguard Worker {
228*0e209d39SAndroid Build Coastguard Worker public:
229*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_KOI8_R();
230*0e209d39SAndroid Build Coastguard Worker 
231*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
232*0e209d39SAndroid Build Coastguard Worker 
233*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
234*0e209d39SAndroid Build Coastguard Worker 
235*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
236*0e209d39SAndroid Build Coastguard Worker };
237*0e209d39SAndroid Build Coastguard Worker 
238*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_ONLY_HTML_CONVERSION
239*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
240*0e209d39SAndroid Build Coastguard Worker {
241*0e209d39SAndroid Build Coastguard Worker public:
242*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_IBM424_he();
243*0e209d39SAndroid Build Coastguard Worker 
244*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
245*0e209d39SAndroid Build Coastguard Worker };
246*0e209d39SAndroid Build Coastguard Worker 
247*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
248*0e209d39SAndroid Build Coastguard Worker public:
249*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_IBM424_he_rtl();
250*0e209d39SAndroid Build Coastguard Worker 
251*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
252*0e209d39SAndroid Build Coastguard Worker 
253*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
254*0e209d39SAndroid Build Coastguard Worker };
255*0e209d39SAndroid Build Coastguard Worker 
256*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
257*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_IBM424_he_ltr();
258*0e209d39SAndroid Build Coastguard Worker 
259*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
260*0e209d39SAndroid Build Coastguard Worker 
261*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
262*0e209d39SAndroid Build Coastguard Worker };
263*0e209d39SAndroid Build Coastguard Worker 
264*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
265*0e209d39SAndroid Build Coastguard Worker {
266*0e209d39SAndroid Build Coastguard Worker public:
267*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_IBM420_ar();
268*0e209d39SAndroid Build Coastguard Worker 
269*0e209d39SAndroid Build Coastguard Worker     const char *getLanguage() const override;
270*0e209d39SAndroid Build Coastguard Worker 	int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const override;
271*0e209d39SAndroid Build Coastguard Worker 
272*0e209d39SAndroid Build Coastguard Worker };
273*0e209d39SAndroid Build Coastguard Worker 
274*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
275*0e209d39SAndroid Build Coastguard Worker public:
276*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_IBM420_ar_rtl();
277*0e209d39SAndroid Build Coastguard Worker 
278*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
279*0e209d39SAndroid Build Coastguard Worker 
280*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
281*0e209d39SAndroid Build Coastguard Worker };
282*0e209d39SAndroid Build Coastguard Worker 
283*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
284*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_IBM420_ar_ltr();
285*0e209d39SAndroid Build Coastguard Worker 
286*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
287*0e209d39SAndroid Build Coastguard Worker 
288*0e209d39SAndroid Build Coastguard Worker     virtual UBool match(InputText *det, CharsetMatch *results) const override;
289*0e209d39SAndroid Build Coastguard Worker };
290*0e209d39SAndroid Build Coastguard Worker #endif
291*0e209d39SAndroid Build Coastguard Worker 
292*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
293*0e209d39SAndroid Build Coastguard Worker 
294*0e209d39SAndroid Build Coastguard Worker #endif /* !UCONFIG_NO_CONVERSION */
295*0e209d39SAndroid Build Coastguard Worker #endif /* __CSRSBCS_H */
296