xref: /aosp_15_r20/external/cronet/third_party/icu/patches/iso2022jp.patch (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp
2index 5989c1b4..9d6d111f 100644
3--- a/source/common/ucnv2022.cpp
4+++ b/source/common/ucnv2022.cpp
5@@ -513,7 +513,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
6                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
7             }
8             myConverterData->myConverterArray[JISX208] =
9-                ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
10+                ucnv_loadSharedData("EUC-JP", &stackPieces, &stackArgs, errorCode);
11             if(jpCharsetMasks[version]&CSM(JISX212)) {
12                 myConverterData->myConverterArray[JISX212] =
13                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
14@@ -1514,79 +1514,6 @@ jisx201FromU(uint32_t value) {
15     return 0xfffe;
16 }
17
18-/*
19- * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
20- * to JIS X 0208, and convert it to a pair of 21..7E bytes.
21- * Return 0 if the byte pair is out of range.
22- */
23-static inline uint32_t
24-_2022FromSJIS(uint32_t value) {
25-    uint8_t trail;
26-
27-    if(value > 0xEFFC) {
28-        return 0;  /* beyond JIS X 0208 */
29-    }
30-
31-    trail = (uint8_t)value;
32-
33-    value &= 0xff00;  /* lead byte */
34-    if(value <= 0x9f00) {
35-        value -= 0x7000;
36-    } else /* 0xe000 <= value <= 0xef00 */ {
37-        value -= 0xb000;
38-    }
39-    value <<= 1;
40-
41-    if(trail <= 0x9e) {
42-        value -= 0x100;
43-        if(trail <= 0x7e) {
44-            value |= trail - 0x1f;
45-        } else {
46-            value |= trail - 0x20;
47-        }
48-    } else /* trail <= 0xfc */ {
49-        value |= trail - 0x7e;
50-    }
51-    return value;
52-}
53-
54-/*
55- * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
56- * If either byte is outside 21..7E make sure that the result is not valid
57- * for Shift-JIS so that the converter catches it.
58- * Some invalid byte values already turn into equally invalid Shift-JIS
59- * byte values and need not be tested explicitly.
60- */
61-static inline void
62-_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
63-    if(c1&1) {
64-        ++c1;
65-        if(c2 <= 0x5f) {
66-            c2 += 0x1f;
67-        } else if(c2 <= 0x7e) {
68-            c2 += 0x20;
69-        } else {
70-            c2 = 0;  /* invalid */
71-        }
72-    } else {
73-        if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
74-            c2 += 0x7e;
75-        } else {
76-            c2 = 0;  /* invalid */
77-        }
78-    }
79-    c1 >>= 1;
80-    if(c1 <= 0x2f) {
81-        c1 += 0x70;
82-    } else if(c1 <= 0x3f) {
83-        c1 += 0xb0;
84-    } else {
85-        c1 = 0;  /* invalid */
86-    }
87-    bytes[0] = (char)c1;
88-    bytes[1] = (char)c2;
89-}
90-
91 /*
92  * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
93  * Katakana.
94@@ -1857,8 +1784,13 @@ getTrail:
95                                 converterData->myConverterArray[cs0],
96                                 sourceChar, &value,
97                                 useFallback, MBCS_OUTPUT_2);
98-                    if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
99-                        value = _2022FromSJIS(value);
100+                    // Only accept DBCS char (abs(len2) == 2).
101+                    // With EUC-JP table for JIS X 208, half-width Kana
102+                    // represented with DBCS starting with 0x8E has to be
103+                    // filtered out so that they can be converted with
104+                    // hwkana_fb table.
105+                    if((len2 == 2 && ((value & 0xFF00) != 0x8E00)) || (len2 == -2 && len == 0)) {
106+                        value &= 0x7F7F;
107                         if(value != 0) {
108                             targetValue = value;
109                             len = len2;
110@@ -2250,18 +2182,13 @@ getTrailByte:
111                         if (leadIsOk && trailIsOk) {
112                             ++mySource;
113                             tmpSourceChar = (mySourceChar << 8) | trailByte;
114-                            if(cs == JISX208) {
115-                                _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
116-                                mySourceChar = tmpSourceChar;
117-                            } else {
118-                                /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
119-                                mySourceChar = tmpSourceChar;
120-                                if (cs == KSC5601) {
121-                                    tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
122-                                }
123-                                tempBuf[0] = (char)(tmpSourceChar >> 8);
124-                                tempBuf[1] = (char)(tmpSourceChar);
125+                            /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
126+                            mySourceChar = tmpSourceChar;
127+                            if (cs == JISX208 || cs == KSC5601) {
128+                                tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
129                             }
130+                            tempBuf[0] = (char)(tmpSourceChar >> 8);
131+                            tempBuf[1] = (char)(tmpSourceChar);
132                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, false);
133                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
134                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
135