1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2005-2015, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 8*0e209d39SAndroid Build Coastguard Worker */ 9*0e209d39SAndroid Build Coastguard Worker 10*0e209d39SAndroid Build Coastguard Worker #ifndef __CSR2022_H 11*0e209d39SAndroid Build Coastguard Worker #define __CSR2022_H 12*0e209d39SAndroid Build Coastguard Worker 13*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 14*0e209d39SAndroid Build Coastguard Worker 15*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_CONVERSION 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #include "csrecog.h" 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker class CharsetMatch; 22*0e209d39SAndroid Build Coastguard Worker 23*0e209d39SAndroid Build Coastguard Worker /** 24*0e209d39SAndroid Build Coastguard Worker * class CharsetRecog_2022 part of the ICU charset detection implementation. 25*0e209d39SAndroid Build Coastguard Worker * This is a superclass for the individual detectors for 26*0e209d39SAndroid Build Coastguard Worker * each of the detectable members of the ISO 2022 family 27*0e209d39SAndroid Build Coastguard Worker * of encodings. 28*0e209d39SAndroid Build Coastguard Worker * 29*0e209d39SAndroid Build Coastguard Worker * The separate classes are nested within this class. 30*0e209d39SAndroid Build Coastguard Worker * 31*0e209d39SAndroid Build Coastguard Worker * @internal 32*0e209d39SAndroid Build Coastguard Worker */ 33*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_2022 : public CharsetRecognizer 34*0e209d39SAndroid Build Coastguard Worker { 35*0e209d39SAndroid Build Coastguard Worker 36*0e209d39SAndroid Build Coastguard Worker public: 37*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_2022() = 0; 38*0e209d39SAndroid Build Coastguard Worker 39*0e209d39SAndroid Build Coastguard Worker protected: 40*0e209d39SAndroid Build Coastguard Worker 41*0e209d39SAndroid Build Coastguard Worker /** 42*0e209d39SAndroid Build Coastguard Worker * Matching function shared among the 2022 detectors JP, CN and KR 43*0e209d39SAndroid Build Coastguard Worker * Counts up the number of legal an unrecognized escape sequences in 44*0e209d39SAndroid Build Coastguard Worker * the sample of text, and computes a score based on the total number & 45*0e209d39SAndroid Build Coastguard Worker * the proportion that fit the encoding. 46*0e209d39SAndroid Build Coastguard Worker * 47*0e209d39SAndroid Build Coastguard Worker * 48*0e209d39SAndroid Build Coastguard Worker * @param text the byte buffer containing text to analyse 49*0e209d39SAndroid Build Coastguard Worker * @param textLen the size of the text in the byte. 50*0e209d39SAndroid Build Coastguard Worker * @param escapeSequences the byte escape sequences to test for. 51*0e209d39SAndroid Build Coastguard Worker * @return match quality, in the range of 0-100. 52*0e209d39SAndroid Build Coastguard Worker */ 53*0e209d39SAndroid Build Coastguard Worker int32_t match_2022(const uint8_t *text, 54*0e209d39SAndroid Build Coastguard Worker int32_t textLen, 55*0e209d39SAndroid Build Coastguard Worker const uint8_t escapeSequences[][5], 56*0e209d39SAndroid Build Coastguard Worker int32_t escapeSequences_length) const; 57*0e209d39SAndroid Build Coastguard Worker 58*0e209d39SAndroid Build Coastguard Worker }; 59*0e209d39SAndroid Build Coastguard Worker 60*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_2022JP :public CharsetRecog_2022 61*0e209d39SAndroid Build Coastguard Worker { 62*0e209d39SAndroid Build Coastguard Worker public: 63*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_2022JP(); 64*0e209d39SAndroid Build Coastguard Worker 65*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 66*0e209d39SAndroid Build Coastguard Worker 67*0e209d39SAndroid Build Coastguard Worker UBool match(InputText *textIn, CharsetMatch *results) const override; 68*0e209d39SAndroid Build Coastguard Worker }; 69*0e209d39SAndroid Build Coastguard Worker 70*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_ONLY_HTML_CONVERSION 71*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_2022KR :public CharsetRecog_2022 { 72*0e209d39SAndroid Build Coastguard Worker public: 73*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_2022KR(); 74*0e209d39SAndroid Build Coastguard Worker 75*0e209d39SAndroid Build Coastguard Worker const char *getName() const override; 76*0e209d39SAndroid Build Coastguard Worker 77*0e209d39SAndroid Build Coastguard Worker UBool match(InputText *textIn, CharsetMatch *results) const override; 78*0e209d39SAndroid Build Coastguard Worker 79*0e209d39SAndroid Build Coastguard Worker }; 80*0e209d39SAndroid Build Coastguard Worker 81*0e209d39SAndroid Build Coastguard Worker class CharsetRecog_2022CN :public CharsetRecog_2022 82*0e209d39SAndroid Build Coastguard Worker { 83*0e209d39SAndroid Build Coastguard Worker public: 84*0e209d39SAndroid Build Coastguard Worker virtual ~CharsetRecog_2022CN(); 85*0e209d39SAndroid Build Coastguard Worker 86*0e209d39SAndroid Build Coastguard Worker const char* getName() const override; 87*0e209d39SAndroid Build Coastguard Worker 88*0e209d39SAndroid Build Coastguard Worker UBool match(InputText *textIn, CharsetMatch *results) const override; 89*0e209d39SAndroid Build Coastguard Worker }; 90*0e209d39SAndroid Build Coastguard Worker #endif 91*0e209d39SAndroid Build Coastguard Worker 92*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 93*0e209d39SAndroid Build Coastguard Worker 94*0e209d39SAndroid Build Coastguard Worker #endif 95*0e209d39SAndroid Build Coastguard Worker #endif /* __CSR2022_H */ 96