xref: /aosp_15_r20/external/icu/libicu/cts_headers/csr2022.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker  **********************************************************************
5*0e209d39SAndroid Build Coastguard Worker  *   Copyright (C) 2005-2015, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker  *   Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker  **********************************************************************
8*0e209d39SAndroid Build Coastguard Worker  */
9*0e209d39SAndroid Build Coastguard Worker 
10*0e209d39SAndroid Build Coastguard Worker #ifndef __CSR2022_H
11*0e209d39SAndroid Build Coastguard Worker #define __CSR2022_H
12*0e209d39SAndroid Build Coastguard Worker 
13*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
14*0e209d39SAndroid Build Coastguard Worker 
15*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_CONVERSION
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker #include "csrecog.h"
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
20*0e209d39SAndroid Build Coastguard Worker 
21*0e209d39SAndroid Build Coastguard Worker class CharsetMatch;
22*0e209d39SAndroid Build Coastguard Worker 
23*0e209d39SAndroid Build Coastguard Worker /**
24*0e209d39SAndroid Build Coastguard Worker  *  class CharsetRecog_2022  part of the ICU charset detection implementation.
25*0e209d39SAndroid Build Coastguard Worker  *                           This is a superclass for the individual detectors for
26*0e209d39SAndroid Build Coastguard Worker  *                           each of the detectable members of the ISO 2022 family
27*0e209d39SAndroid Build Coastguard Worker  *                           of encodings.
28*0e209d39SAndroid Build Coastguard Worker  *
29*0e209d39SAndroid Build Coastguard Worker  *                           The separate classes are nested within this class.
30*0e209d39SAndroid Build Coastguard Worker  *
31*0e209d39SAndroid Build Coastguard Worker  * @internal
32*0e209d39SAndroid Build Coastguard Worker  */
33*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_2022 : public CharsetRecognizer
34*0e209d39SAndroid Build Coastguard Worker {
35*0e209d39SAndroid Build Coastguard Worker 
36*0e209d39SAndroid Build Coastguard Worker public:
37*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_2022() = 0;
38*0e209d39SAndroid Build Coastguard Worker 
39*0e209d39SAndroid Build Coastguard Worker protected:
40*0e209d39SAndroid Build Coastguard Worker 
41*0e209d39SAndroid Build Coastguard Worker     /**
42*0e209d39SAndroid Build Coastguard Worker      * Matching function shared among the 2022 detectors JP, CN and KR
43*0e209d39SAndroid Build Coastguard Worker      * Counts up the number of legal an unrecognized escape sequences in
44*0e209d39SAndroid Build Coastguard Worker      * the sample of text, and computes a score based on the total number &
45*0e209d39SAndroid Build Coastguard Worker      * the proportion that fit the encoding.
46*0e209d39SAndroid Build Coastguard Worker      *
47*0e209d39SAndroid Build Coastguard Worker      *
48*0e209d39SAndroid Build Coastguard Worker      * @param text the byte buffer containing text to analyse
49*0e209d39SAndroid Build Coastguard Worker      * @param textLen  the size of the text in the byte.
50*0e209d39SAndroid Build Coastguard Worker      * @param escapeSequences the byte escape sequences to test for.
51*0e209d39SAndroid Build Coastguard Worker      * @return match quality, in the range of 0-100.
52*0e209d39SAndroid Build Coastguard Worker      */
53*0e209d39SAndroid Build Coastguard Worker     int32_t match_2022(const uint8_t *text,
54*0e209d39SAndroid Build Coastguard Worker                        int32_t textLen,
55*0e209d39SAndroid Build Coastguard Worker                        const uint8_t escapeSequences[][5],
56*0e209d39SAndroid Build Coastguard Worker                        int32_t escapeSequences_length) const;
57*0e209d39SAndroid Build Coastguard Worker 
58*0e209d39SAndroid Build Coastguard Worker };
59*0e209d39SAndroid Build Coastguard Worker 
60*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_2022JP :public CharsetRecog_2022
61*0e209d39SAndroid Build Coastguard Worker {
62*0e209d39SAndroid Build Coastguard Worker public:
63*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_2022JP();
64*0e209d39SAndroid Build Coastguard Worker 
65*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
66*0e209d39SAndroid Build Coastguard Worker 
67*0e209d39SAndroid Build Coastguard Worker     UBool match(InputText *textIn, CharsetMatch *results) const override;
68*0e209d39SAndroid Build Coastguard Worker };
69*0e209d39SAndroid Build Coastguard Worker 
70*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_ONLY_HTML_CONVERSION
71*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_2022KR :public CharsetRecog_2022 {
72*0e209d39SAndroid Build Coastguard Worker public:
73*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_2022KR();
74*0e209d39SAndroid Build Coastguard Worker 
75*0e209d39SAndroid Build Coastguard Worker     const char *getName() const override;
76*0e209d39SAndroid Build Coastguard Worker 
77*0e209d39SAndroid Build Coastguard Worker     UBool match(InputText *textIn, CharsetMatch *results) const override;
78*0e209d39SAndroid Build Coastguard Worker 
79*0e209d39SAndroid Build Coastguard Worker };
80*0e209d39SAndroid Build Coastguard Worker 
81*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_2022CN :public CharsetRecog_2022
82*0e209d39SAndroid Build Coastguard Worker {
83*0e209d39SAndroid Build Coastguard Worker public:
84*0e209d39SAndroid Build Coastguard Worker     virtual ~CharsetRecog_2022CN();
85*0e209d39SAndroid Build Coastguard Worker 
86*0e209d39SAndroid Build Coastguard Worker     const char* getName() const override;
87*0e209d39SAndroid Build Coastguard Worker 
88*0e209d39SAndroid Build Coastguard Worker     UBool match(InputText *textIn, CharsetMatch *results) const override;
89*0e209d39SAndroid Build Coastguard Worker };
90*0e209d39SAndroid Build Coastguard Worker #endif
91*0e209d39SAndroid Build Coastguard Worker 
92*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
93*0e209d39SAndroid Build Coastguard Worker 
94*0e209d39SAndroid Build Coastguard Worker #endif
95*0e209d39SAndroid Build Coastguard Worker #endif /* __CSR2022_H */
96