1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2006-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.charset; 11 12 import java.nio.BufferOverflowException; 13 import java.nio.ByteBuffer; 14 import java.nio.CharBuffer; 15 import java.nio.charset.CharacterCodingException; 16 import java.nio.charset.Charset; 17 import java.nio.charset.CharsetDecoder; 18 import java.nio.charset.CharsetEncoder; 19 import java.nio.charset.CoderMalfunctionError; 20 import java.nio.charset.CoderResult; 21 import java.nio.charset.CodingErrorAction; 22 import java.nio.charset.UnsupportedCharsetException; 23 import java.nio.charset.spi.CharsetProvider; 24 import java.util.ArrayList; 25 import java.util.Arrays; 26 import java.util.Iterator; 27 import java.util.MissingResourceException; 28 import java.util.Set; 29 import java.util.SortedMap; 30 31 import org.junit.Test; 32 import org.junit.runner.RunWith; 33 import org.junit.runners.JUnit4; 34 35 import com.ibm.icu.charset.CharsetCallback; 36 import com.ibm.icu.charset.CharsetDecoderICU; 37 import com.ibm.icu.charset.CharsetEncoderICU; 38 import com.ibm.icu.charset.CharsetICU; 39 import com.ibm.icu.charset.CharsetProviderICU; 40 import com.ibm.icu.dev.test.TestFmwk; 41 import com.ibm.icu.text.UTF16; 42 import com.ibm.icu.text.UnicodeSet; 43 import com.ibm.icu.text.UnicodeSetIterator; 44 45 @RunWith(JUnit4.class) 46 public class TestCharset extends TestFmwk { 47 @Test TestUTF16Converter()48 public void TestUTF16Converter(){ 49 CharsetProvider icu = new CharsetProviderICU(); 50 Charset cs1 = icu.charsetForName("UTF-16BE"); 51 CharsetEncoder e1 = cs1.newEncoder(); 52 CharsetDecoder d1 = cs1.newDecoder(); 53 54 Charset cs2 = icu.charsetForName("UTF-16LE"); 55 CharsetEncoder e2 = cs2.newEncoder(); 56 CharsetDecoder d2 = cs2.newDecoder(); 57 58 for(int i=0x0000; i<0x10FFFF; i+=0xFF){ 59 CharBuffer us = CharBuffer.allocate(0xFF*2); 60 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 61 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 62 for(int j=0;j<0xFF; j++){ 63 int c = i+j; 64 65 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 66 continue; 67 } 68 69 if(c>0xFFFF){ 70 char lead = UTF16.getLeadSurrogate(c); 71 char trail = UTF16.getTrailSurrogate(c); 72 if(!UTF16.isLeadSurrogate(lead)){ 73 errln("lead is not lead!"+lead+" for cp: \\U"+Integer.toHexString(c)); 74 continue; 75 } 76 if(!UTF16.isTrailSurrogate(trail)){ 77 errln("trail is not trail!"+trail); 78 continue; 79 } 80 us.put(lead); 81 us.put(trail); 82 bs1.put((byte)(lead>>8)); 83 bs1.put((byte)(lead&0xFF)); 84 bs1.put((byte)(trail>>8)); 85 bs1.put((byte)(trail&0xFF)); 86 87 bs2.put((byte)(lead&0xFF)); 88 bs2.put((byte)(lead>>8)); 89 bs2.put((byte)(trail&0xFF)); 90 bs2.put((byte)(trail>>8)); 91 }else{ 92 93 if(c<0xFF){ 94 bs1.put((byte)0x00); 95 bs1.put((byte)(c)); 96 bs2.put((byte)(c)); 97 bs2.put((byte)0x00); 98 }else{ 99 bs1.put((byte)(c>>8)); 100 bs1.put((byte)(c&0xFF)); 101 102 bs2.put((byte)(c&0xFF)); 103 bs2.put((byte)(c>>8)); 104 } 105 us.put((char)c); 106 } 107 } 108 109 110 us.limit(us.position()); 111 us.position(0); 112 if(us.length()==0){ 113 continue; 114 } 115 116 117 bs1.limit(bs1.position()); 118 bs1.position(0); 119 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 120 //newBS.put((byte)0xFE); 121 //newBS.put((byte)0xFF); 122 newBS.put(bs1); 123 bs1.position(0); 124 smBufDecode(d1, "UTF-16", bs1, us); 125 smBufEncode(e1, "UTF-16", us, newBS); 126 127 bs2.limit(bs2.position()); 128 bs2.position(0); 129 newBS.clear(); 130 //newBS.put((byte)0xFF); 131 //newBS.put((byte)0xFE); 132 newBS.put(bs2); 133 bs2.position(0); 134 smBufDecode(d2, "UTF16-LE", bs2, us); 135 smBufEncode(e2, "UTF-16LE", us, newBS); 136 137 } 138 } 139 140 @Test TestUTF32Converter()141 public void TestUTF32Converter(){ 142 CharsetProvider icu = new CharsetProviderICU(); 143 Charset cs1 = icu.charsetForName("UTF-32BE"); 144 CharsetEncoder e1 = cs1.newEncoder(); 145 CharsetDecoder d1 = cs1.newDecoder(); 146 147 Charset cs2 = icu.charsetForName("UTF-32LE"); 148 CharsetEncoder e2 = cs2.newEncoder(); 149 CharsetDecoder d2 = cs2.newDecoder(); 150 151 for(int i=0x000; i<0x10FFFF; i+=0xFF){ 152 CharBuffer us = CharBuffer.allocate(0xFF*2); 153 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 154 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 155 for(int j=0;j<0xFF; j++){ 156 int c = i+j; 157 158 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 159 continue; 160 } 161 162 if(c>0xFFFF){ 163 char lead = UTF16.getLeadSurrogate(c); 164 char trail = UTF16.getTrailSurrogate(c); 165 166 us.put(lead); 167 us.put(trail); 168 }else{ 169 us.put((char)c); 170 } 171 bs1.put((byte) (c >>> 24)); 172 bs1.put((byte) (c >>> 16)); 173 bs1.put((byte) (c >>> 8)); 174 bs1.put((byte) (c & 0xFF)); 175 176 bs2.put((byte) (c & 0xFF)); 177 bs2.put((byte) (c >>> 8)); 178 bs2.put((byte) (c >>> 16)); 179 bs2.put((byte) (c >>> 24)); 180 } 181 bs1.limit(bs1.position()); 182 bs1.position(0); 183 bs2.limit(bs2.position()); 184 bs2.position(0); 185 us.limit(us.position()); 186 us.position(0); 187 if(us.length()==0){ 188 continue; 189 } 190 191 192 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 193 194 newBS.put((byte)0x00); 195 newBS.put((byte)0x00); 196 newBS.put((byte)0xFE); 197 newBS.put((byte)0xFF); 198 199 newBS.put(bs1); 200 bs1.position(0); 201 smBufDecode(d1, "UTF-32", bs1, us); 202 smBufEncode(e1, "UTF-32", us, newBS); 203 204 205 newBS.clear(); 206 207 newBS.put((byte)0xFF); 208 newBS.put((byte)0xFE); 209 newBS.put((byte)0x00); 210 newBS.put((byte)0x00); 211 212 newBS.put(bs2); 213 bs2.position(0); 214 smBufDecode(d2, "UTF-32LE", bs2, us); 215 smBufEncode(e2, "UTF-32LE", us, newBS); 216 217 } 218 } 219 220 @Test TestASCIIConverter()221 public void TestASCIIConverter() { 222 runTestASCIIBasedConverter("ASCII", 0x80); 223 } 224 225 @Test Test88591Converter()226 public void Test88591Converter() { 227 runTestASCIIBasedConverter("iso-8859-1", 0x100); 228 } 229 runTestASCIIBasedConverter(String converter, int limit)230 public void runTestASCIIBasedConverter(String converter, int limit){ 231 CharsetProvider icu = new CharsetProviderICU(); 232 Charset icuChar = icu.charsetForName(converter); 233 CharsetEncoder encoder = icuChar.newEncoder(); 234 CharsetDecoder decoder = icuChar.newDecoder(); 235 CoderResult cr; 236 237 /* test with and without array-backed buffers */ 238 239 byte[] bytes = new byte[0x10000]; 240 char[] chars = new char[0x10000]; 241 for (int j = 0; j <= 0xffff; j++) { 242 bytes[j] = (byte) j; 243 chars[j] = (char) j; 244 } 245 246 boolean fail = false; 247 boolean arrays = false; 248 boolean decoding = false; 249 int i; 250 251 // 0 thru limit - 1 252 ByteBuffer bs = ByteBuffer.wrap(bytes, 0, limit); 253 CharBuffer us = CharBuffer.wrap(chars, 0, limit); 254 smBufDecode(decoder, converter, bs, us, true); 255 smBufDecode(decoder, converter, bs, us, false); 256 smBufEncode(encoder, converter, us, bs, true); 257 smBufEncode(encoder, converter, us, bs, false); 258 for (i = 0; i < limit; i++) { 259 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 260 us = CharBuffer.wrap(chars, i, 1).slice(); 261 try { 262 decoding = true; 263 arrays = true; 264 smBufDecode(decoder, converter, bs, us, true, false, true); 265 266 decoding = true; 267 arrays = false; 268 smBufDecode(decoder, converter, bs, us, true, false, false); 269 270 decoding = false; 271 arrays = true; 272 smBufEncode(encoder, converter, us, bs, true, false, true); 273 274 decoding = false; 275 arrays = false; 276 smBufEncode(encoder, converter, us, bs, true, false, false); 277 278 } catch (Exception ex) { 279 errln("Failed to fail to " + (decoding ? "decode" : "encode") + " 0x" 280 + Integer.toHexString(i) + (arrays ? " with arrays" : " without arrays") + " in " + converter); 281 return; 282 } 283 } 284 285 // decode limit thru 255 286 for (i = limit; i <= 0xff; i++) { 287 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 288 us = CharBuffer.wrap(chars, i, 1).slice(); 289 try { 290 smBufDecode(decoder, converter, bs, us, true, false, true); 291 fail = true; 292 arrays = true; 293 break; 294 } catch (Exception ex) { 295 } 296 try { 297 smBufDecode(decoder, converter, bs, us, true, false, false); 298 fail = true; 299 arrays = false; 300 break; 301 } catch (Exception ex) { 302 } 303 } 304 if (fail) { 305 errln("Failed to fail to decode 0x" + Integer.toHexString(i) 306 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 307 return; 308 } 309 310 // encode limit thru 0xffff, skipping through much of the 1ff to feff range to save 311 // time (it would take too much time to test every possible case) 312 for (i = limit; i <= 0xffff; i = ((i>=0x1ff && i<0xfeff) ? i+0xfd : i+1)) { 313 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 314 us = CharBuffer.wrap(chars, i, 1).slice(); 315 try { 316 smBufEncode(encoder, converter, us, bs, true, false, true); 317 fail = true; 318 arrays = true; 319 break; 320 } catch (Exception ex) { 321 } 322 try { 323 smBufEncode(encoder, converter, us, bs, true, false, false); 324 fail = true; 325 arrays = false; 326 break; 327 } catch (Exception ex) { 328 } 329 } 330 if (fail) { 331 errln("Failed to fail to encode 0x" + Integer.toHexString(i) 332 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 333 return; 334 } 335 336 // test overflow / underflow edge cases 337 outer: for (int n = 1; n <= 3; n++) { 338 for (int m = 0; m < n; m++) { 339 // expecting underflow 340 try { 341 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 342 us = CharBuffer.wrap(chars, 'a', m).slice(); 343 smBufDecode(decoder, converter, bs, us, true, false, true); 344 smBufDecode(decoder, converter, bs, us, true, false, false); 345 smBufEncode(encoder, converter, us, bs, true, false, true); 346 smBufEncode(encoder, converter, us, bs, true, false, false); 347 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 348 us = CharBuffer.wrap(chars, 'a', n).slice(); 349 smBufDecode(decoder, converter, bs, us, true, false, true, m); 350 smBufDecode(decoder, converter, bs, us, true, false, false, m); 351 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 352 us = CharBuffer.wrap(chars, 'a', m).slice(); 353 smBufEncode(encoder, converter, us, bs, true, false, true, m); 354 smBufEncode(encoder, converter, us, bs, true, false, false, m); 355 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 356 us = CharBuffer.wrap(chars, 'a', n).slice(); 357 smBufDecode(decoder, converter, bs, us, true, false, true); 358 smBufDecode(decoder, converter, bs, us, true, false, false); 359 smBufEncode(encoder, converter, us, bs, true, false, true); 360 smBufEncode(encoder, converter, us, bs, true, false, false); 361 } catch (Exception ex) { 362 fail = true; 363 break outer; 364 } 365 366 // expecting overflow 367 try { 368 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 369 us = CharBuffer.wrap(chars, 'a', m).slice(); 370 smBufDecode(decoder, converter, bs, us, true, false, true); 371 fail = true; 372 break; 373 } catch (Exception ex) { 374 if (!(ex instanceof BufferOverflowException)) { 375 fail = true; 376 break outer; 377 } 378 } 379 try { 380 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 381 us = CharBuffer.wrap(chars, 'a', m).slice(); 382 smBufDecode(decoder, converter, bs, us, true, false, false); 383 fail = true; 384 } catch (Exception ex) { 385 if (!(ex instanceof BufferOverflowException)) { 386 fail = true; 387 break outer; 388 } 389 } 390 try { 391 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 392 us = CharBuffer.wrap(chars, 'a', n).slice(); 393 smBufEncode(encoder, converter, us, bs, true, false, true); 394 fail = true; 395 } catch (Exception ex) { 396 if (!(ex instanceof BufferOverflowException)) { 397 fail = true; 398 break outer; 399 } 400 } 401 try { 402 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 403 us = CharBuffer.wrap(chars, 'a', n).slice(); 404 smBufEncode(encoder, converter, us, bs, true, false, false); 405 fail = true; 406 } catch (Exception ex) { 407 if (!(ex instanceof BufferOverflowException)) { 408 fail = true; 409 break outer; 410 } 411 } 412 } 413 } 414 if (fail) { 415 errln("Incorrect result in " + converter + " for underflow / overflow edge cases"); 416 return; 417 } 418 419 // test surrogate combinations in encoding 420 String lead = "\ud888"; 421 String trail = "\udc88"; 422 String norm = "a"; 423 String ext = "\u0275"; // theta 424 String end = ""; 425 bs = ByteBuffer.wrap(new byte[] { 0 }); 426 String[] input = new String[] { // 427 lead + lead, // malf(1) 428 lead + trail, // unmap(2) 429 lead + norm, // malf(1) 430 lead + ext, // malf(1) 431 lead + end, // malf(1) 432 trail + norm, // malf(1) 433 trail + end, // malf(1) 434 ext + norm, // unmap(1) 435 ext + end, // unmap(1) 436 }; 437 CoderResult[] result = new CoderResult[] { 438 CoderResult.malformedForLength(1), 439 CoderResult.unmappableForLength(2), 440 CoderResult.malformedForLength(1), 441 CoderResult.malformedForLength(1), 442 CoderResult.malformedForLength(1), 443 CoderResult.malformedForLength(1), 444 CoderResult.malformedForLength(1), 445 CoderResult.unmappableForLength(1), 446 CoderResult.unmappableForLength(1), 447 }; 448 449 for (int index = 0; index < input.length; index++) { 450 CharBuffer source = CharBuffer.wrap(input[index]); 451 cr = encoder.encode(source, bs, true); 452 bs.rewind(); 453 encoder.reset(); 454 455 // if cr != results[x] 456 if (!((cr.isUnderflow() && result[index].isUnderflow()) 457 || (cr.isOverflow() && result[index].isOverflow()) 458 || (cr.isMalformed() && result[index].isMalformed()) 459 || (cr.isUnmappable() && result[index].isUnmappable())) 460 || (cr.isError() && cr.length() != result[index].length())) { 461 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 462 break; 463 } 464 465 source = CharBuffer.wrap(input[index].toCharArray()); 466 cr = encoder.encode(source, bs, true); 467 bs.rewind(); 468 encoder.reset(); 469 470 // if cr != results[x] 471 if (!((cr.isUnderflow() && result[index].isUnderflow()) 472 || (cr.isOverflow() && result[index].isOverflow()) 473 || (cr.isMalformed() && result[index].isMalformed()) 474 || (cr.isUnmappable() && result[index].isUnmappable())) 475 || (cr.isError() && cr.length() != result[index].length())) { 476 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 477 break; 478 } 479 } 480 } 481 482 @Test TestUTF8Converter()483 public void TestUTF8Converter() { 484 String converter = "UTF-8"; 485 CharsetProvider icu = new CharsetProviderICU(); 486 Charset icuChar = icu.charsetForName(converter); 487 CharsetEncoder encoder = icuChar.newEncoder(); 488 CharsetDecoder decoder = icuChar.newDecoder(); 489 ByteBuffer bs; 490 CharBuffer us; 491 CoderResult cr; 492 493 494 int[] size = new int[] { 1<<7, 1<<11, 1<<16 }; // # of 1,2,3 byte combinations 495 byte[] bytes = new byte[size[0] + size[1]*2 + size[2]*3]; 496 char[] chars = new char[size[0] + size[1] + size[2]]; 497 int i = 0; 498 int x, y; 499 500 // 0 to 1 << 7 (1 byters) 501 for (; i < size[0]; i++) { 502 bytes[i] = (byte) i; 503 chars[i] = (char) i; 504 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 505 us = CharBuffer.wrap(chars, i, 1).slice(); 506 try { 507 smBufDecode(decoder, converter, bs, us, true, false, true); 508 smBufDecode(decoder, converter, bs, us, true, false, false); 509 smBufEncode(encoder, converter, us, bs, true, false, true); 510 smBufEncode(encoder, converter, us, bs, true, false, false); 511 } catch (Exception ex) { 512 errln("Incorrect result in " + converter + " for 0x" 513 + Integer.toHexString(i)); 514 break; 515 } 516 } 517 518 // 1 << 7 to 1 << 11 (2 byters) 519 for (; i < size[1]; i++) { 520 x = size[0] + i*2; 521 y = size[0] + i; 522 bytes[x + 0] = (byte) (0xc0 | ((i >> 6) & 0x1f)); 523 bytes[x + 1] = (byte) (0x80 | ((i >> 0) & 0x3f)); 524 chars[y] = (char) i; 525 bs = ByteBuffer.wrap(bytes, x, 2).slice(); 526 us = CharBuffer.wrap(chars, y, 1).slice(); 527 try { 528 smBufDecode(decoder, converter, bs, us, true, false, true); 529 smBufDecode(decoder, converter, bs, us, true, false, false); 530 smBufEncode(encoder, converter, us, bs, true, false, true); 531 smBufEncode(encoder, converter, us, bs, true, false, false); 532 } catch (Exception ex) { 533 errln("Incorrect result in " + converter + " for 0x" 534 + Integer.toHexString(i)); 535 break; 536 } 537 } 538 539 // 1 << 11 to 1 << 16 (3 byters and surrogates) 540 for (; i < size[2]; i++) { 541 x = size[0] + size[1] * 2 + i * 3; 542 y = size[0] + size[1] + i; 543 bytes[x + 0] = (byte) (0xe0 | ((i >> 12) & 0x0f)); 544 bytes[x + 1] = (byte) (0x80 | ((i >> 6) & 0x3f)); 545 bytes[x + 2] = (byte) (0x80 | ((i >> 0) & 0x3f)); 546 chars[y] = (char) i; 547 if (!UTF16.isSurrogate(i)) { 548 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 549 us = CharBuffer.wrap(chars, y, 1).slice(); 550 try { 551 smBufDecode(decoder, converter, bs, us, true, false, true); 552 smBufDecode(decoder, converter, bs, us, true, false, false); 553 smBufEncode(encoder, converter, us, bs, true, false, true); 554 smBufEncode(encoder, converter, us, bs, true, false, false); 555 } catch (Exception ex) { 556 errln("Incorrect result in " + converter + " for 0x" 557 + Integer.toHexString(i)); 558 break; 559 } 560 } else { 561 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 562 us = CharBuffer.wrap(chars, y, 1).slice(); 563 564 decoder.reset(); 565 cr = decoder.decode(bs, us, true); 566 bs.rewind(); 567 us.rewind(); 568 if (!cr.isMalformed() || cr.length() != 1) { 569 errln("Incorrect result in " + converter + " decoder for 0x" 570 + Integer.toHexString(i) + " received " + cr); 571 break; 572 } 573 encoder.reset(); 574 cr = encoder.encode(us, bs, true); 575 bs.rewind(); 576 us.rewind(); 577 if (!cr.isMalformed() || cr.length() != 1) { 578 errln("Incorrect result in " + converter + " encoder for 0x" 579 + Integer.toHexString(i) + " received " + cr); 580 break; 581 } 582 583 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 584 us = CharBuffer.wrap(new String(chars, y, 1)); 585 586 decoder.reset(); 587 cr = decoder.decode(bs, us, true); 588 bs.rewind(); 589 us.rewind(); 590 if (!cr.isMalformed() || cr.length() != 1) { 591 errln("Incorrect result in " + converter + " decoder for 0x" 592 + Integer.toHexString(i) + " received " + cr); 593 break; 594 } 595 encoder.reset(); 596 cr = encoder.encode(us, bs, true); 597 bs.rewind(); 598 us.rewind(); 599 if (!cr.isMalformed() || cr.length() != 1) { 600 errln("Incorrect result in " + converter + " encoder for 0x" 601 + Integer.toHexString(i) + " received " + cr); 602 break; 603 } 604 605 606 } 607 } 608 if (true) 609 return; 610 } 611 612 @Test TestHZ()613 public void TestHZ() { 614 /* test input */ 615 char[] in = new char[] { 616 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 617 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 618 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 619 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 620 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 621 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 622 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 623 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 624 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 625 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 626 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 627 0x005A, 0x005B, 0x005C, 0x000A 628 }; 629 630 String converter = "HZ"; 631 CharsetProvider icu = new CharsetProviderICU(); 632 Charset icuChar = icu.charsetForName(converter); 633 CharsetEncoder encoder = icuChar.newEncoder(); 634 CharsetDecoder decoder = icuChar.newDecoder(); 635 try { 636 CharBuffer start = CharBuffer.wrap(in); 637 ByteBuffer bytes = encoder.encode(start); 638 CharBuffer finish = decoder.decode(bytes); 639 640 if (!equals(start, finish)) { 641 errln(converter + " roundtrip test failed: start does not match finish"); 642 643 char[] finishArray = new char[finish.limit()]; 644 for (int i=0; i<finishArray.length; i++) 645 finishArray[i] = finish.get(i); 646 647 logln("start: " + hex(in)); 648 logln("finish: " + hex(finishArray)); 649 } 650 } catch (CharacterCodingException ex) { 651 // Android patch: Skip tests that fail with customized data. 652 logln(converter + " roundtrip test failed: " + ex.getMessage()); 653 // Android patch end. 654 ex.printStackTrace(System.err); 655 } 656 657 /* For better code coverage */ 658 CoderResult result = CoderResult.UNDERFLOW; 659 byte byteout[] = { 660 (byte)0x7e, (byte)0x7d, (byte)0x41, 661 (byte)0x7e, (byte)0x7b, (byte)0x21, 662 }; 663 char charin[] = { 664 (char)0x0041, (char)0x0042, (char)0x3000 665 }; 666 ByteBuffer bb = ByteBuffer.wrap(byteout); 667 CharBuffer cb = CharBuffer.wrap(charin); 668 int testLoopSize = 5; 669 int bbLimits[] = { 0, 1, 3, 4, 6}; 670 int bbPositions[] = { 0, 0, 0, 3, 3 }; 671 int ccPositions[] = { 0, 0, 0, 2, 2 }; 672 for (int i = 0; i < testLoopSize; i++) { 673 encoder.reset(); 674 bb.limit(bbLimits[i]); 675 bb.position(bbPositions[i]); 676 cb.position(ccPositions[i]); 677 result = encoder.encode(cb, bb, true); 678 679 if (i < 3) { 680 if (!result.isOverflow()) { 681 errln("Overflow buffer error should have occurred while encoding HZ (" + i + ")"); 682 } 683 } else { 684 if (result.isError()) { 685 // Android patch: Skip tests that fail with customized data. 686 logln("Error should not have occurred while encoding HZ.(" + i + ")"); 687 // Android patch end. 688 } 689 } 690 } 691 } 692 693 @Test TestUTF8Surrogates()694 public void TestUTF8Surrogates() { 695 byte[][] in = new byte[][] { 696 { (byte)0x61, }, 697 { (byte)0xc2, (byte)0x80, }, 698 { (byte)0xe0, (byte)0xa0, (byte)0x80, }, 699 { (byte)0xf0, (byte)0x90, (byte)0x80, (byte)0x80, }, 700 { (byte)0xf4, (byte)0x84, (byte)0x8c, (byte)0xa1, }, 701 { (byte)0xf0, (byte)0x90, (byte)0x90, (byte)0x81, }, 702 }; 703 704 /* expected test results */ 705 char[][] results = new char[][] { 706 /* number of bytes read, code point */ 707 { '\u0061', }, 708 { '\u0080', }, 709 { '\u0800', }, 710 { '\ud800', '\udc00', }, // 10000 711 { '\udbd0', '\udf21', }, // 104321 712 { '\ud801', '\udc01', }, // 10401 713 }; 714 715 /* error test input */ 716 byte[][] in2 = new byte[][] { 717 { (byte)0x61, }, 718 { (byte)0xc0, (byte)0x80, /* illegal non-shortest form */ 719 (byte)0xe0, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 720 (byte)0xf0, (byte)0x80, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 721 (byte)0xc0, (byte)0xc0, /* illegal trail byte */ 722 (byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80, /* 0x110000 out of range */ 723 (byte)0xf8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, /* too long */ 724 (byte)0xfe, /* illegal byte altogether */ 725 (byte)0x62, }, 726 }; 727 728 /* expected error test results */ 729 char[][] results2 = new char[][] { 730 /* number of bytes read, code point */ 731 { '\u0062', }, 732 { '\u0062', }, 733 }; 734 735 String converter = "UTF-8"; 736 CharsetProvider icu = new CharsetProviderICU(); 737 Charset icuChar = icu.charsetForName(converter); 738 CharsetDecoder decoder = icuChar.newDecoder(); 739 740 int i; 741 try { 742 for (i = 0; i < in.length; i++) { 743 ByteBuffer source = ByteBuffer.wrap(in[i]); 744 CharBuffer expected = CharBuffer.wrap(results[i]); 745 smBufDecode(decoder, converter, source, expected, true, false, 746 true); 747 smBufDecode(decoder, converter, source, expected, true, false, 748 false); 749 } 750 } catch (Exception ex) { 751 errln("Incorrect result in " + converter); 752 } 753 try { 754 for (i = 0; i < in2.length; i++) { 755 ByteBuffer source = ByteBuffer.wrap(in2[i]); 756 CharBuffer expected = CharBuffer.wrap(results2[i]); 757 decoder.onMalformedInput(CodingErrorAction.IGNORE); 758 smBufDecode(decoder, converter, source, expected, true, false, 759 true); 760 smBufDecode(decoder, converter, source, expected, true, false, 761 false); 762 } 763 } catch (Exception ex) { 764 errln("Incorrect result in " + converter); 765 } 766 } 767 768 @Test TestSurrogateBehavior()769 public void TestSurrogateBehavior() { 770 CharsetProviderICU icu = new CharsetProviderICU(); 771 772 // get all the converters into an array 773 Object[] converters = CharsetProviderICU.getAvailableNames(); 774 775 String norm = "a"; 776 String ext = "\u0275"; // theta 777 String lead = "\ud835"; 778 String trail = "\udd04"; 779 // lead + trail = \U1d504 (fraktur capital A) 780 781 String input = 782 // error position 783 ext // unmap(1) 1 784 + lead // under 1 785 + lead // malf(1) 2 786 + trail // unmap(2) 4 787 + trail // malf(1) 5 788 + ext // unmap(1) 6 789 + norm // unmap(1) 7 790 ; 791 CoderResult[] results = new CoderResult[] { 792 CoderResult.unmappableForLength(1), // or underflow 793 CoderResult.UNDERFLOW, 794 CoderResult.malformedForLength(1), 795 CoderResult.unmappableForLength(2), // or underflow 796 CoderResult.malformedForLength(1), 797 CoderResult.unmappableForLength(1), // or underflow 798 CoderResult.unmappableForLength(1), // or underflow 799 }; 800 int[] positions = new int[] { 1,1,2,4,5,6,7 }; 801 int n = positions.length; 802 803 int badcount = 0; 804 int goodcount = 0; 805 int[] uhohindices = new int[n]; 806 int[] badposindices = new int[n]; 807 int[] malfindices = new int[n]; 808 int[] unmapindices = new int[n]; 809 ArrayList pass = new ArrayList(); 810 ArrayList exempt = new ArrayList(); 811 812 outer: for (int conv=0; conv<converters.length; conv++) { 813 String converter = (String)converters[conv]; 814 if (converter.equals("x-IMAP-mailbox-name") || converter.equals("UTF-7") || converter.equals("CESU-8") || converter.equals("BOCU-1") || 815 converter.equals("x-LMBCS-1")) { 816 exempt.add(converter); 817 continue; 818 } 819 820 boolean currentlybad = false; 821 Charset icuChar = icu.charsetForName(converter); 822 CharsetEncoder encoder = icuChar.newEncoder(); 823 CoderResult cr; 824 825 CharBuffer source = CharBuffer.wrap(input); 826 ByteBuffer target = ByteBuffer.allocate(30); 827 ByteBuffer expected = null; 828 try { 829 encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); 830 encoder.onMalformedInput(CodingErrorAction.IGNORE); 831 expected = encoder.encode(CharBuffer.wrap(ext + lead + trail + ext + norm)); 832 encoder.reset(); 833 } catch (CharacterCodingException ex) { 834 errln("Unexpected CharacterCodingException: " + ex.getMessage()); 835 return; 836 // Android patch: Skip tests that fail with customized data. 837 } catch (RuntimeException | CoderMalfunctionError ex) { 838 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 839 logln(converter + " " + ex.getClass().getName() + ": " + ex.getMessage()); 840 continue outer; 841 } 842 // Android patch end. 843 844 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 845 encoder.onMalformedInput(CodingErrorAction.REPORT); 846 for (int i=0; i<n; i++) { 847 source.limit(i+1); 848 cr = encoder.encode(source, target, i == n - 1); 849 if (!(equals(cr, results[i]) 850 || (results[i].isUnmappable() && cr.isUnderflow()) // mappability depends on the converter 851 )) { 852 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 853 if (results[i].isMalformed() && cr.isMalformed()) { 854 malfindices[i]++; 855 } else if (results[i].isUnmappable() && cr.isUnmappable()) { 856 unmapindices[i]++; 857 } else { 858 uhohindices[i]++; 859 } 860 errln("(index=" + i + ") " + converter + " Received: " + cr + " Expected: " + results[i]); 861 } 862 if (source.position() != positions[i]) { 863 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 864 badposindices[i]++; 865 errln("(index=" + i + ") " + converter + " Received: " + source.position() + " Expected: " + positions[i]); 866 } 867 868 } 869 encoder.reset(); 870 871 //System.out.println("\n" + hex(target.array())); 872 //System.out.println(hex(expected.array()) + "\n" + expected.limit()); 873 if (!(equals(target, expected, expected.limit()) && target.position() == expected.limit())) { 874 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 875 errln(converter + " Received: \"" + hex(target.array()) + "\" Expected: \"" + hex(expected.array()) + "\""); 876 } 877 878 if (!currentlybad) { 879 goodcount++; 880 pass.add(converter); 881 } 882 } 883 884 logln("\n" + badcount + " / " + (converters.length - exempt.size()) + " (" + goodcount + " good, " + badcount + " bad)"); 885 log("index\t"); for (int i=0; i<n; i++) log(i + "\t"); logln(""); 886 log("unmap\t"); for (int i=0; i<n; i++) log(unmapindices[i] + "\t"); logln(""); 887 log("malf \t"); for (int i=0; i<n; i++) log(malfindices[i] + "\t"); logln(""); 888 log("pos \t"); for (int i=0; i<n; i++) log(badposindices[i] + "\t"); logln(""); 889 log("uhoh \t"); for (int i=0; i<n; i++) log(uhohindices[i] + "\t"); logln(""); 890 logln(""); 891 log("The few that passed: "); for (int i=0; i<pass.size(); i++) log(pass.get(i) + ", "); logln(""); 892 log("The few that are exempt: "); for (int i=0; i<exempt.size(); i++) log(exempt.get(i) + ", "); logln(""); 893 } 894 895 // public void TestCharsetCallback() { 896 // String currentTest = "initialization"; 897 // try { 898 // Class[] params; 899 // 900 // // get the classes 901 // Class CharsetCallback = Class.forName("com.ibm.icu.charset.CharsetCallback"); 902 // Class Decoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Decoder"); 903 // Class Encoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Encoder"); 904 // 905 // // set up encoderCall 906 // params = new Class[] {CharsetEncoderICU.class, Object.class, 907 // CharBuffer.class, ByteBuffer.class, IntBuffer.class, 908 // char[].class, int.class, int.class, CoderResult.class }; 909 // Method encoderCall = Encoder.getDeclaredMethod("call", params); 910 // 911 // // set up decoderCall 912 // params = new Class[] {CharsetDecoderICU.class, Object.class, 913 // ByteBuffer.class, CharBuffer.class, IntBuffer.class, 914 // char[].class, int.class, CoderResult.class}; 915 // Method decoderCall = Decoder.getDeclaredMethod("call", params); 916 // 917 // // get relevant fields 918 // Object SUB_STOP_ON_ILLEGAL = getFieldValue(CharsetCallback, "SUB_STOP_ON_ILLEGAL", null); 919 // 920 // // set up a few arguments 921 // CharsetProvider provider = new CharsetProviderICU(); 922 // Charset charset = provider.charsetForName("UTF-8"); 923 // CharsetEncoderICU encoder = (CharsetEncoderICU)charset.newEncoder(); 924 // CharsetDecoderICU decoder = (CharsetDecoderICU)charset.newDecoder(); 925 // CharBuffer chars = CharBuffer.allocate(10); 926 // chars.put('o'); 927 // chars.put('k'); 928 // ByteBuffer bytes = ByteBuffer.allocate(10); 929 // bytes.put((byte)'o'); 930 // bytes.put((byte)'k'); 931 // IntBuffer offsets = IntBuffer.allocate(10); 932 // offsets.put(0); 933 // offsets.put(1); 934 // char[] buffer = null; 935 // Integer length = 2; 936 // Integer cp = 0; 937 // CoderResult unmap = CoderResult.unmappableForLength(2); 938 // CoderResult malf = CoderResult.malformedForLength(2); 939 // CoderResult under = CoderResult.UNDERFLOW; 940 // 941 // // set up error arrays 942 // Integer invalidCharLength = 1; 943 // Byte subChar1 = (byte)0; 944 // Byte subChar1_alternate = (byte)1; // for TO_U_CALLBACK_SUBSTITUTE 945 // 946 // // set up chars and bytes backups and expected values for certain cases 947 // CharBuffer charsBackup = bufferCopy(chars); 948 // ByteBuffer bytesBackup = bufferCopy(bytes); 949 // IntBuffer offsetsBackup = bufferCopy(offsets); 950 // CharBuffer encoderCharsExpected = bufferCopy(chars); 951 // ByteBuffer encoderBytesExpected = bufferCopy(bytes); 952 // IntBuffer encoderOffsetsExpected = bufferCopy(offsets); 953 // CharBuffer decoderCharsExpected1 = bufferCopy(chars); 954 // CharBuffer decoderCharsExpected2 = bufferCopy(chars); 955 // IntBuffer decoderOffsetsExpected1 = bufferCopy(offsets); 956 // IntBuffer decoderOffsetsExpected2 = bufferCopy(offsets); 957 // 958 // // initialize fields to obtain expected data 959 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 960 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1); 961 // 962 // // run cbFromUWriteSub 963 // Method cbFromUWriteSub = CharsetEncoderICU.class.getDeclaredMethod("cbFromUWriteSub", new Class[] { CharsetEncoderICU.class, CharBuffer.class, ByteBuffer.class, IntBuffer.class}); 964 // cbFromUWriteSub.setAccessible(true); 965 // CoderResult encoderResultExpected = (CoderResult)cbFromUWriteSub.invoke(encoder, new Object[] {encoder, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected}); 966 // 967 // // run toUWriteUChars with normal data 968 // Method toUWriteUChars = CharsetDecoderICU.class.getDeclaredMethod("toUWriteUChars", new Class[] { CharsetDecoderICU.class, char[].class, int.class, int.class, CharBuffer.class, IntBuffer.class, int.class}); 969 // toUWriteUChars.setAccessible(true); 970 // CoderResult decoderResultExpected1 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0xFFFD}, Integer.valueOf(0), Integer.valueOf(1), decoderCharsExpected1, decoderOffsetsExpected1, Integer.valueOf(bytes.position())}); 971 // 972 // // reset certain fields 973 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 974 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1_alternate); 975 // 976 // // run toUWriteUChars again 977 // CoderResult decoderResultExpected2 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0x1A}, Integer.valueOf(0), Integer.valueOf(1), decoderCharsExpected2, decoderOffsetsExpected2, Integer.valueOf(bytes.position())}); 978 // 979 // // begin creating the tests array 980 // ArrayList tests = new ArrayList(); 981 // 982 // // create tests for FROM_U_CALLBACK_SKIP 0 983 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 984 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 985 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 986 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 987 // 988 // // create tests for TO_U_CALLBACK_SKIP 4 989 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 990 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 991 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 992 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 993 // 994 // // create tests for FROM_U_CALLBACK_STOP 8 995 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 996 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 997 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 998 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 999 // 1000 // // create tests for TO_U_CALLBACK_STOP 12 1001 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1002 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1003 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1004 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1005 // 1006 // // create tests for FROM_U_CALLBACK_SUBSTITUTE 16 1007 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1008 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1009 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1010 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1011 // 1012 // // create tests for TO_U_CALLBACK_SUBSTITUTE 20 1013 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected1, decoderCharsExpected1, bytesBackup, decoderOffsetsExpected1, new Object[] { invalidCharLength, subChar1 }}); 1014 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected2, decoderCharsExpected2, bytesBackup, decoderOffsetsExpected2, new Object[] { invalidCharLength, subChar1_alternate }}); 1015 // 1016 // Iterator iter = tests.iterator(); 1017 // for (int i=0; iter.hasNext(); i++) { 1018 // // get the data out of the map 1019 // Object[] next = (Object[])iter.next(); 1020 // 1021 // Method method = (Method)next[0]; 1022 // String fieldName = (String)next[1]; 1023 // Object field = getFieldValue(CharsetCallback, fieldName, null); 1024 // Object[] args = (Object[])next[2]; 1025 // CoderResult expected = (CoderResult)next[3]; 1026 // CharBuffer charsExpected = (CharBuffer)next[4]; 1027 // ByteBuffer bytesExpected = (ByteBuffer)next[5]; 1028 // IntBuffer offsetsExpected = (IntBuffer)next[6]; 1029 // 1030 // // set up error arrays and certain fields 1031 // Object[] values = (Object[])next[7]; 1032 // if (method == decoderCall) { 1033 // decoder.reset(); 1034 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, values[0]); 1035 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), values[1]); 1036 // } else if (method == encoderCall) { 1037 // encoder.reset(); 1038 // } 1039 // 1040 // try { 1041 // // invoke the method 1042 // CoderResult actual = (CoderResult)method.invoke(field, args); 1043 // 1044 // // if expected != actual 1045 // if (!coderResultsEqual(expected, actual)) { 1046 // // case #i refers to the index in the arraylist tests 1047 // errln(fieldName + " failed to return the correct result for case #" + i + "."); 1048 // } 1049 // // if the expected buffers != actual buffers 1050 // else if (!(buffersEqual(chars, charsExpected) && 1051 // buffersEqual(bytes, bytesExpected) && 1052 // buffersEqual(offsets, offsetsExpected))) { 1053 // // case #i refers to the index in the arraylist tests 1054 // errln(fieldName + " did not perform the correct operation on the buffers for case #" + i + "."); 1055 // } 1056 // } catch (InvocationTargetException ex) { 1057 // // case #i refers to the index in the arraylist tests 1058 // errln(fieldName + " threw an exception for case #" + i + ": " + ex.getCause()); 1059 // //ex.getCause().printStackTrace(); 1060 // } 1061 // 1062 // // reset the buffers 1063 // System.arraycopy(bytesBackup.array(), 0, bytes.array(), 0, 10); 1064 // System.arraycopy(charsBackup.array(), 0, chars.array(), 0, 10); 1065 // System.arraycopy(offsetsBackup.array(), 0, offsets.array(), 0, 10); 1066 // bytes.position(bytesBackup.position()); 1067 // chars.position(charsBackup.position()); 1068 // offsets.position(offsetsBackup.position()); 1069 // } 1070 // 1071 // } catch (Exception ex) { 1072 // errln("TestCharsetCallback skipped due to " + ex.toString()); 1073 // ex.printStackTrace(); 1074 // } 1075 // } 1076 // 1077 // private Object getFieldValue(Class c, String name, Object instance) throws Exception { 1078 // Field field = c.getDeclaredField(name); 1079 // field.setAccessible(true); 1080 // return field.get(instance); 1081 // } 1082 // private void setFieldValue(Class c, String name, Object instance, Object value) throws Exception { 1083 // Field field = c.getDeclaredField(name); 1084 // field.setAccessible(true); 1085 // if (value instanceof Boolean) 1086 // field.setBoolean(instance, ((Boolean)value).booleanValue()); 1087 // else if (value instanceof Byte) 1088 // field.setByte(instance, ((Byte)value).byteValue()); 1089 // else if (value instanceof Character) 1090 // field.setChar(instance, ((Character)value).charValue()); 1091 // else if (value instanceof Double) 1092 // field.setDouble(instance, ((Double)value).doubleValue()); 1093 // else if (value instanceof Float) 1094 // field.setFloat(instance, ((Float)value).floatValue()); 1095 // else if (value instanceof Integer) 1096 // field.setInt(instance, ((Integer)value).intValue()); 1097 // else if (value instanceof Long) 1098 // field.setLong(instance, ((Long)value).longValue()); 1099 // else if (value instanceof Short) 1100 // field.setShort(instance, ((Short)value).shortValue()); 1101 // else 1102 // field.set(instance, value); 1103 // } 1104 // private boolean coderResultsEqual(CoderResult a, CoderResult b) { 1105 // if (a == null && b == null) 1106 // return true; 1107 // if (a == null || b == null) 1108 // return false; 1109 // if ((a.isUnderflow() && b.isUnderflow()) || (a.isOverflow() && b.isOverflow())) 1110 // return true; 1111 // if (a.length() != b.length()) 1112 // return false; 1113 // if ((a.isMalformed() && b.isMalformed()) || (a.isUnmappable() && b.isUnmappable())) 1114 // return true; 1115 // return false; 1116 // } 1117 // private boolean buffersEqual(ByteBuffer a, ByteBuffer b) { 1118 // if (a.position() != b.position()) 1119 // return false; 1120 // int limit = a.position(); 1121 // for (int i=0; i<limit; i++) 1122 // if (a.get(i) != b.get(i)) 1123 // return false; 1124 // return true; 1125 // } 1126 // private boolean buffersEqual(CharBuffer a, CharBuffer b) { 1127 // if (a.position() != b.position()) 1128 // return false; 1129 // int limit = a.position(); 1130 // for (int i=0; i<limit; i++) 1131 // if (a.get(i) != b.get(i)) 1132 // return false; 1133 // return true; 1134 // } 1135 // private boolean buffersEqual(IntBuffer a, IntBuffer b) { 1136 // if (a.position() != b.position()) 1137 // return false; 1138 // int limit = a.position(); 1139 // for (int i=0; i<limit; i++) 1140 // if (a.get(i) != b.get(i)) 1141 // return false; 1142 // return true; 1143 // } 1144 // private ByteBuffer bufferCopy(ByteBuffer src) { 1145 // ByteBuffer dest = ByteBuffer.allocate(src.limit()); 1146 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1147 // dest.position(src.position()); 1148 // return dest; 1149 // } 1150 // private CharBuffer bufferCopy(CharBuffer src) { 1151 // CharBuffer dest = CharBuffer.allocate(src.limit()); 1152 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1153 // dest.position(src.position()); 1154 // return dest; 1155 // } 1156 // private IntBuffer bufferCopy(IntBuffer src) { 1157 // IntBuffer dest = IntBuffer.allocate(src.limit()); 1158 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1159 // dest.position(src.position()); 1160 // return dest; 1161 // } 1162 1163 1164 @Test TestAPISemantics( )1165 public void TestAPISemantics(/*String encoding*/) { 1166 String encoding = "UTF-16"; 1167 CharsetDecoder decoder = null; 1168 CharsetEncoder encoder = null; 1169 try { 1170 CharsetProviderICU provider = new CharsetProviderICU(); 1171 Charset charset = provider.charsetForName(encoding); 1172 decoder = charset.newDecoder(); 1173 encoder = charset.newEncoder(); 1174 } catch(MissingResourceException ex) { 1175 warnln("Could not load charset data: " + encoding); 1176 return; 1177 } 1178 1179 final String unistr = "abcd\ud800\udc00\u1234\u00a5\u3000\r\n"; 1180 final byte[] byteStr = { 1181 (byte) 0x00,(byte) 'a', 1182 (byte) 0x00,(byte) 'b', 1183 (byte) 0x00,(byte) 'c', 1184 (byte) 0x00,(byte) 'd', 1185 (byte) 0xd8,(byte) 0x00, 1186 (byte) 0xdc,(byte) 0x00, 1187 (byte) 0x12,(byte) 0x34, 1188 (byte) 0x00,(byte) 0xa5, 1189 (byte) 0x30,(byte) 0x00, 1190 (byte) 0x00,(byte) 0x0d, 1191 (byte) 0x00,(byte) 0x0a 1192 }; 1193 final byte[] expectedByteStr = { 1194 (byte) 0xfe,(byte) 0xff, 1195 (byte) 0x00,(byte) 'a', 1196 (byte) 0x00,(byte) 'b', 1197 (byte) 0x00,(byte) 'c', 1198 (byte) 0x00,(byte) 'd', 1199 (byte) 0xd8,(byte) 0x00, 1200 (byte) 0xdc,(byte) 0x00, 1201 (byte) 0x12,(byte) 0x34, 1202 (byte) 0x00,(byte) 0xa5, 1203 (byte) 0x30,(byte) 0x00, 1204 (byte) 0x00,(byte) 0x0d, 1205 (byte) 0x00,(byte) 0x0a 1206 }; 1207 1208 ByteBuffer byes = ByteBuffer.wrap(byteStr); 1209 CharBuffer uniVal = CharBuffer.wrap(unistr); 1210 ByteBuffer expected = ByteBuffer.wrap(expectedByteStr); 1211 1212 int rc = 0; 1213 if(decoder==null){ 1214 warnln("Could not load decoder."); 1215 return; 1216 } 1217 decoder.reset(); 1218 /* Convert the whole buffer to Unicode */ 1219 try { 1220 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1221 CoderResult result = decoder.decode(byes, chars, false); 1222 1223 if (result.isError()) { 1224 errln("ToChars encountered Error"); 1225 rc = 1; 1226 } 1227 if (result.isOverflow()) { 1228 errln("ToChars encountered overflow exception"); 1229 rc = 1; 1230 } 1231 if (!equals(chars, unistr)) { 1232 errln("ToChars does not match"); 1233 printchars(chars); 1234 errln("Expected : "); 1235 printchars(unistr); 1236 rc = 2; 1237 } 1238 1239 } catch (Exception e) { 1240 errln("ToChars - exception in buffer"); 1241 rc = 5; 1242 } 1243 1244 /* Convert single bytes to Unicode */ 1245 try { 1246 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1247 ByteBuffer b = ByteBuffer.wrap(byteStr); 1248 decoder.reset(); 1249 CoderResult result=null; 1250 for (int i = 1; i <= byteStr.length; i++) { 1251 b.limit(i); 1252 result = decoder.decode(b, chars, false); 1253 if(result.isOverflow()){ 1254 errln("ToChars single threw an overflow exception"); 1255 } 1256 if (result.isError()) { 1257 errln("ToChars single the result is an error "+result.toString()); 1258 } 1259 } 1260 if (unistr.length() != (chars.limit())) { 1261 errln("ToChars single len does not match"); 1262 rc = 3; 1263 } 1264 if (!equals(chars, unistr)) { 1265 errln("ToChars single does not match"); 1266 printchars(chars); 1267 rc = 4; 1268 } 1269 } catch (Exception e) { 1270 errln("ToChars - exception in single"); 1271 //e.printStackTrace(); 1272 rc = 6; 1273 } 1274 1275 /* Convert the buffer one at a time to Unicode */ 1276 try { 1277 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1278 decoder.reset(); 1279 byes.rewind(); 1280 for (int i = 1; i <= byteStr.length; i++) { 1281 byes.limit(i); 1282 CoderResult result = decoder.decode(byes, chars, false); 1283 if (result.isError()) { 1284 errln("Error while decoding: "+result.toString()); 1285 } 1286 if(result.isOverflow()){ 1287 errln("ToChars Simple threw an overflow exception"); 1288 } 1289 } 1290 if (chars.limit() != unistr.length()) { 1291 errln("ToChars Simple buffer len does not match"); 1292 rc = 7; 1293 } 1294 if (!equals(chars, unistr)) { 1295 errln("ToChars Simple buffer does not match"); 1296 printchars(chars); 1297 err(" Expected : "); 1298 printchars(unistr); 1299 rc = 8; 1300 } 1301 } catch (Exception e) { 1302 errln("ToChars - exception in single buffer"); 1303 //e.printStackTrace(System.err); 1304 rc = 9; 1305 } 1306 if (rc != 0) { 1307 errln("Test Simple ToChars for encoding : FAILED"); 1308 } 1309 1310 rc = 0; 1311 /* Convert the whole buffer from unicode */ 1312 try { 1313 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1314 encoder.reset(); 1315 CoderResult result = encoder.encode(uniVal, bytes, false); 1316 if (result.isError()) { 1317 errln("FromChars reported error: " + result.toString()); 1318 rc = 1; 1319 } 1320 if(result.isOverflow()){ 1321 errln("FromChars threw an overflow exception"); 1322 } 1323 bytes.position(0); 1324 if (!bytes.equals(expected)) { 1325 errln("FromChars does not match"); 1326 printbytes(bytes); 1327 printbytes(expected); 1328 rc = 2; 1329 } 1330 } catch (Exception e) { 1331 errln("FromChars - exception in buffer"); 1332 //e.printStackTrace(System.err); 1333 rc = 5; 1334 } 1335 1336 /* Convert the buffer one char at a time to unicode */ 1337 try { 1338 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1339 CharBuffer c = CharBuffer.wrap(unistr); 1340 encoder.reset(); 1341 CoderResult result= null; 1342 for (int i = 1; i <= unistr.length(); i++) { 1343 c.limit(i); 1344 result = encoder.encode(c, bytes, false); 1345 if(result.isOverflow()){ 1346 errln("FromChars single threw an overflow exception"); 1347 } 1348 if(result.isError()){ 1349 errln("FromChars single threw an error: "+ result.toString()); 1350 } 1351 } 1352 if (expectedByteStr.length != bytes.limit()) { 1353 errln("FromChars single len does not match"); 1354 rc = 3; 1355 } 1356 1357 bytes.position(0); 1358 if (!bytes.equals(expected)) { 1359 errln("FromChars single does not match"); 1360 printbytes(bytes); 1361 printbytes(expected); 1362 rc = 4; 1363 } 1364 1365 } catch (Exception e) { 1366 errln("FromChars - exception in single"); 1367 //e.printStackTrace(System.err); 1368 rc = 6; 1369 } 1370 1371 /* Convert one char at a time to unicode */ 1372 try { 1373 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1374 encoder.reset(); 1375 char[] temp = unistr.toCharArray(); 1376 CoderResult result=null; 1377 for (int i = 0; i <= temp.length; i++) { 1378 uniVal.limit(i); 1379 result = encoder.encode(uniVal, bytes, false); 1380 if(result.isOverflow()){ 1381 errln("FromChars simple threw an overflow exception"); 1382 } 1383 if(result.isError()){ 1384 errln("FromChars simple threw an error: "+ result.toString()); 1385 } 1386 } 1387 if (bytes.limit() != expectedByteStr.length) { 1388 errln("FromChars Simple len does not match"); 1389 rc = 7; 1390 } 1391 if (!bytes.equals(byes)) { 1392 errln("FromChars Simple does not match"); 1393 printbytes(bytes); 1394 printbytes(byes); 1395 rc = 8; 1396 } 1397 } catch (Exception e) { 1398 errln("FromChars - exception in single buffer"); 1399 //e.printStackTrace(System.err); 1400 rc = 9; 1401 } 1402 if (rc != 0) { 1403 errln("Test Simple FromChars " + encoding + " --FAILED"); 1404 } 1405 } 1406 printchars(CharBuffer buf)1407 void printchars(CharBuffer buf) { 1408 int i; 1409 char[] chars = new char[buf.limit()]; 1410 //save the current position 1411 int pos = buf.position(); 1412 buf.position(0); 1413 buf.get(chars); 1414 //reset to old position 1415 buf.position(pos); 1416 for (i = 0; i < chars.length; i++) { 1417 err(hex(chars[i]) + " "); 1418 } 1419 errln(""); 1420 } printchars(String str)1421 void printchars(String str) { 1422 char[] chars = str.toCharArray(); 1423 for (int i = 0; i < chars.length; i++) { 1424 err(hex(chars[i]) + " "); 1425 } 1426 errln(""); 1427 } printbytes(ByteBuffer buf)1428 void printbytes(ByteBuffer buf) { 1429 int i; 1430 byte[] bytes = new byte[buf.limit()]; 1431 //save the current position 1432 int pos = buf.position(); 1433 buf.position(0); 1434 buf.get(bytes); 1435 //reset to old position 1436 buf.position(pos); 1437 for (i = 0; i < bytes.length; i++) { 1438 System.out.print(hex(bytes[i]) + " "); 1439 } 1440 errln(""); 1441 } 1442 equals(CoderResult a, CoderResult b)1443 public boolean equals(CoderResult a, CoderResult b) { 1444 return (a.isUnderflow() && b.isUnderflow()) 1445 || (a.isOverflow() && b.isOverflow()) 1446 || (a.isMalformed() && b.isMalformed() && a.length() == b.length()) 1447 || (a.isUnmappable() && b.isUnmappable() && a.length() == b.length()); 1448 } equals(CharBuffer buf, String str)1449 public boolean equals(CharBuffer buf, String str) { 1450 return equals(buf, str.toCharArray()); 1451 } equals(CharBuffer buf, CharBuffer str)1452 public boolean equals(CharBuffer buf, CharBuffer str) { 1453 if (buf.limit() != str.limit()) 1454 return false; 1455 int limit = buf.limit(); 1456 for (int i = 0; i < limit; i++) 1457 if (buf.get(i) != str.get(i)) 1458 return false; 1459 return true; 1460 } equals(CharBuffer buf, CharBuffer str, int limit)1461 public boolean equals(CharBuffer buf, CharBuffer str, int limit) { 1462 if (limit > buf.limit() || limit > str.limit()) 1463 return false; 1464 for (int i = 0; i < limit; i++) 1465 if (buf.get(i) != str.get(i)) 1466 return false; 1467 return true; 1468 } equals(CharBuffer buf, char[] compareTo)1469 public boolean equals(CharBuffer buf, char[] compareTo) { 1470 char[] chars = new char[buf.limit()]; 1471 //save the current position 1472 int pos = buf.position(); 1473 buf.position(0); 1474 buf.get(chars); 1475 //reset to old position 1476 buf.position(pos); 1477 return equals(chars, compareTo); 1478 } 1479 equals(char[] chars, char[] compareTo)1480 public boolean equals(char[] chars, char[] compareTo) { 1481 if (chars.length != compareTo.length) { 1482 errln( 1483 "Length does not match chars: " 1484 + chars.length 1485 + " compareTo: " 1486 + compareTo.length); 1487 return false; 1488 } else { 1489 boolean result = true; 1490 for (int i = 0; i < chars.length; i++) { 1491 if (chars[i] != compareTo[i]) { 1492 logln( 1493 "Got: " 1494 + hex(chars[i]) 1495 + " Expected: " 1496 + hex(compareTo[i]) 1497 + " At: " 1498 + i); 1499 result = false; 1500 } 1501 } 1502 return result; 1503 } 1504 } 1505 equals(ByteBuffer buf, byte[] compareTo)1506 public boolean equals(ByteBuffer buf, byte[] compareTo) { 1507 byte[] chars = new byte[buf.limit()]; 1508 //save the current position 1509 int pos = buf.position(); 1510 buf.position(0); 1511 buf.get(chars); 1512 //reset to old position 1513 buf.position(pos); 1514 return equals(chars, compareTo); 1515 } equals(ByteBuffer buf, ByteBuffer compareTo)1516 public boolean equals(ByteBuffer buf, ByteBuffer compareTo) { 1517 if (buf.limit() != compareTo.limit()) 1518 return false; 1519 int limit = buf.limit(); 1520 for (int i = 0; i < limit; i++) 1521 if (buf.get(i) != compareTo.get(i)) 1522 return false; 1523 return true; 1524 } equals(ByteBuffer buf, ByteBuffer compareTo, int limit)1525 public boolean equals(ByteBuffer buf, ByteBuffer compareTo, int limit) { 1526 if (limit > buf.limit() || limit > compareTo.limit()) 1527 return false; 1528 for (int i = 0; i < limit; i++) 1529 if (buf.get(i) != compareTo.get(i)) 1530 return false; 1531 return true; 1532 } equals(byte[] chars, byte[] compareTo)1533 public boolean equals(byte[] chars, byte[] compareTo) { 1534 if (false/*chars.length != compareTo.length*/) { 1535 errln( 1536 "Length does not match chars: " 1537 + chars.length 1538 + " compareTo: " 1539 + compareTo.length); 1540 return false; 1541 } else { 1542 boolean result = true; 1543 for (int i = 0; i < chars.length; i++) { 1544 if (chars[i] != compareTo[i]) { 1545 logln( 1546 "Got: " 1547 + hex(chars[i]) 1548 + " Expected: " 1549 + hex(compareTo[i]) 1550 + " At: " 1551 + i); 1552 result = false; 1553 } 1554 } 1555 return result; 1556 } 1557 } 1558 1559 // TODO 1560 /* 1561 @Test 1562 public void TestCallback(String encoding) throws Exception { 1563 1564 byte[] gbSource = 1565 { 1566 (byte) 0x81, 1567 (byte) 0x36, 1568 (byte) 0xDE, 1569 (byte) 0x36, 1570 (byte) 0x81, 1571 (byte) 0x36, 1572 (byte) 0xDE, 1573 (byte) 0x37, 1574 (byte) 0x81, 1575 (byte) 0x36, 1576 (byte) 0xDE, 1577 (byte) 0x38, 1578 (byte) 0xe3, 1579 (byte) 0x32, 1580 (byte) 0x9a, 1581 (byte) 0x36 }; 1582 1583 char[] subChars = { 'P', 'I' }; 1584 1585 decoder.reset(); 1586 1587 decoder.replaceWith(new String(subChars)); 1588 ByteBuffer mySource = ByteBuffer.wrap(gbSource); 1589 CharBuffer myTarget = CharBuffer.allocate(5); 1590 1591 decoder.decode(mySource, myTarget, true); 1592 char[] expectedResult = 1593 { '\u22A6', '\u22A7', '\u22A8', '\u0050', '\u0049', }; 1594 1595 if (!equals(myTarget, new String(expectedResult))) { 1596 errln("Test callback GB18030 to Unicode : FAILED"); 1597 } 1598 1599 } 1600 */ 1601 1602 @Test TestCanConvert( )1603 public void TestCanConvert(/*String encoding*/)throws Exception { 1604 char[] mySource = { 1605 '\ud800', '\udc00',/*surrogate pair */ 1606 '\u22A6','\u22A7','\u22A8','\u22A9','\u22AA', 1607 '\u22AB','\u22AC','\u22AD','\u22AE','\u22AF', 1608 '\u22B0','\u22B1','\u22B2','\u22B3','\u22B4', 1609 '\ud800','\udc00',/*surrogate pair */ 1610 '\u22B5','\u22B6','\u22B7','\u22B8','\u22B9', 1611 '\u22BA','\u22BB','\u22BC','\u22BD','\u22BE' 1612 }; 1613 String encoding = "UTF-16"; 1614 CharsetEncoder encoder = null; 1615 try { 1616 CharsetProviderICU provider = new CharsetProviderICU(); 1617 Charset charset = provider.charsetForName(encoding); 1618 encoder = charset.newEncoder(); 1619 } catch(MissingResourceException ex) { 1620 warnln("Could not load charset data: " + encoding); 1621 return; 1622 } 1623 if (!encoder.canEncode(new String(mySource))) { 1624 errln("Test canConvert() " + encoding + " failed. "+encoder); 1625 } 1626 1627 } 1628 1629 @Test TestAvailableCharsets()1630 public void TestAvailableCharsets() { 1631 SortedMap map = Charset.availableCharsets(); 1632 Set keySet = map.keySet(); 1633 Iterator iter = keySet.iterator(); 1634 while(iter.hasNext()){ 1635 logln("Charset name: "+iter.next().toString()); 1636 } 1637 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1638 int mapSize = map.size(); 1639 if(mapSize < charsets.length){ 1640 errln("Charset.availableCharsets() returned a number less than the number returned by icu. ICU: " + charsets.length 1641 + " JDK: " + mapSize); 1642 } 1643 logln("Total Number of chasets = " + map.size()); 1644 } 1645 1646 @Test TestWindows936()1647 public void TestWindows936(){ 1648 CharsetProviderICU icu = new CharsetProviderICU(); 1649 Charset cs = icu.charsetForName("windows-936-2000"); 1650 String canonicalName = cs.name(); 1651 if(!canonicalName.equals("GBK")){ 1652 errln("Did not get the expected canonical name. Got: "+canonicalName); //get the canonical name 1653 } 1654 } 1655 1656 @Test TestICUAvailableCharsets()1657 public void TestICUAvailableCharsets() { 1658 CharsetProviderICU icu = new CharsetProviderICU(); 1659 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1660 for(int i=0;i<charsets.length;i++){ 1661 Charset cs = icu.charsetForName((String)charsets[i]); 1662 try{ 1663 CharsetEncoder encoder = cs.newEncoder(); 1664 if(encoder!=null){ 1665 logln("Creation of encoder succeeded. "+cs.toString()); 1666 } 1667 }catch(Exception ex){ 1668 errln("Could not instantiate encoder for "+charsets[i]+". Error: "+ex.toString()); 1669 } 1670 try{ 1671 CharsetDecoder decoder = cs.newDecoder(); 1672 if(decoder!=null){ 1673 logln("Creation of decoder succeeded. "+cs.toString()); 1674 } 1675 }catch(Exception ex){ 1676 errln("Could not instantiate decoder for "+charsets[i]+". Error: "+ex.toString()); 1677 } 1678 } 1679 } 1680 1681 /* jitterbug 4312 */ 1682 @Test TestUnsupportedCharset()1683 public void TestUnsupportedCharset(){ 1684 CharsetProvider icu = new CharsetProviderICU(); 1685 Charset icuChar = icu.charsetForName("impossible"); 1686 if(icuChar != null){ 1687 errln("ICU does not conform to the spec"); 1688 } 1689 } 1690 1691 @Test TestEncoderCreation()1692 public void TestEncoderCreation(){ 1693 // Use CharsetICU.forNameICU() so that we get the ICU version 1694 // even if the system or another provider also supports this charset. 1695 String encoding = "GB_2312-80"; 1696 try{ 1697 Charset cs = CharsetICU.forNameICU(encoding); 1698 CharsetEncoder enc = cs.newEncoder(); 1699 if(enc!=null){ 1700 logln("Successfully created an encoder for " + encoding + ": " + enc); 1701 if(!(enc instanceof CharsetEncoderICU)) { 1702 errln("Expected " + encoding + 1703 " to be implemented by ICU but got an instance of " + enc.getClass()); 1704 } 1705 }else{ 1706 errln("Error creating charset encoder for " + encoding); 1707 } 1708 }catch(Exception e){ 1709 warnln("Error creating charset encoder for " + encoding + ": " + e); 1710 } 1711 // Use Charset.forName() which may return an ICU Charset or some other implementation. 1712 encoding = "x-ibm-971_P100-1995"; 1713 try{ 1714 Charset cs = Charset.forName(encoding); 1715 CharsetEncoder enc = cs.newEncoder(); 1716 if(enc!=null){ 1717 logln("Successfully created an encoder for " + encoding + ": " + enc + 1718 " which is implemented by ICU? " + (enc instanceof CharsetEncoderICU)); 1719 }else{ 1720 errln("Error creating charset encoder for " + encoding); 1721 } 1722 }catch(Exception e){ 1723 warnln("Error creating charset encoder for " + encoding + ": " + e); 1724 } 1725 } 1726 1727 @Test TestSubBytes()1728 public void TestSubBytes(){ 1729 try{ 1730 //create utf-8 decoder 1731 CharsetDecoder decoder = new CharsetProviderICU().charsetForName("utf-8").newDecoder(); 1732 1733 //create a valid byte array, which can be decoded to " buffer" 1734 byte[] unibytes = new byte[] { 0x0020, 0x0062, 0x0075, 0x0066, 0x0066, 0x0065, 0x0072 }; 1735 1736 ByteBuffer buffer = ByteBuffer.allocate(20); 1737 1738 //add a evil byte to make the byte buffer be malformed input 1739 buffer.put((byte)0xd8); 1740 1741 //put the valid byte array 1742 buffer.put(unibytes); 1743 1744 //reset position 1745 buffer.flip(); 1746 1747 decoder.onMalformedInput(CodingErrorAction.REPLACE); 1748 CharBuffer out = decoder.decode(buffer); 1749 String expected = "\ufffd buffer"; 1750 if(!expected.equals(new String(out.array()))){ 1751 errln("Did not get the expected result for substitution chars. Got: "+ 1752 new String(out.array()) + "("+ hex(out.array())+")"); 1753 } 1754 logln("Output: "+ new String(out.array()) + "("+ hex(out.array())+")"); 1755 }catch (CharacterCodingException ex){ 1756 errln("Unexpected exception: "+ex.toString()); 1757 } 1758 } 1759 /* 1760 1761 @Test 1762 public void TestImplFlushFailure(){ 1763 1764 try{ 1765 CharBuffer in = CharBuffer.wrap("\u3005\u3006\u3007\u30FC\u2015\u2010\uFF0F"); 1766 CharsetEncoder encoder = new CharsetProviderICU().charsetForName("iso-2022-jp").newEncoder(); 1767 ByteBuffer out = ByteBuffer.allocate(30); 1768 encoder.encode(in, out, true); 1769 encoder.flush(out); 1770 if(out.position()!= 20){ 1771 errln("Did not get the expected position from flush"); 1772 } 1773 1774 }catch (Exception ex){ 1775 errln("Could not create encoder for iso-2022-jp exception: "+ex.toString()); 1776 } 1777 } 1778 */ 1779 1780 @Test TestISO88591()1781 public void TestISO88591() { 1782 1783 Charset cs = new CharsetProviderICU().charsetForName("iso-8859-1"); 1784 if(cs!=null){ 1785 CharsetEncoder encoder = cs.newEncoder(); 1786 if(encoder!=null){ 1787 encoder.canEncode("\uc2a3"); 1788 }else{ 1789 errln("Could not create encoder for iso-8859-1"); 1790 } 1791 }else{ 1792 errln("Could not create Charset for iso-8859-1"); 1793 } 1794 1795 } 1796 1797 @Test TestUTF8Encode()1798 public void TestUTF8Encode() { 1799 // Test with a lead surrogate in the middle of the input text. 1800 // Java API behavior is unclear for surrogates at the end, see ticket #11546. 1801 CharBuffer in = CharBuffer.wrap("\ud800a"); 1802 ByteBuffer out = ByteBuffer.allocate(30); 1803 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1804 CoderResult result = encoderICU.encode(in, out, true); 1805 1806 if (result.isMalformed()) { 1807 logln("\\ud800 is malformed for ICU4JNI utf-8 encoder"); 1808 } else if (result.isUnderflow()) { 1809 errln("FAIL: \\ud800 is OK for ICU4JNI utf-8 encoder"); 1810 } 1811 1812 in.position(0); 1813 out.clear(); 1814 1815 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1816 result = encoderJDK.encode(in, out, true); 1817 if (result.isMalformed()) { 1818 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1819 } else if (result.isUnderflow()) { 1820 errln("BAD: \\ud800 is OK for JDK utf-8 encoder"); 1821 } 1822 } 1823 1824 /* private void printCB(CharBuffer buf){ 1825 buf.rewind(); 1826 while(buf.hasRemaining()){ 1827 System.out.println(hex(buf.get())); 1828 } 1829 buf.rewind(); 1830 } 1831 */ 1832 1833 @Test TestUTF8()1834 public void TestUTF8() throws CharacterCodingException{ 1835 try{ 1836 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1837 encoderICU.encode(CharBuffer.wrap("\ud800")); 1838 errln("\\ud800 is OK for ICU4JNI utf-8 encoder"); 1839 }catch (Exception e) { 1840 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1841 //e.printStackTrace(); 1842 } 1843 1844 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1845 try { 1846 encoderJDK.encode(CharBuffer.wrap("\ud800")); 1847 errln("\\ud800 is OK for JDK utf-8 encoder"); 1848 } catch (Exception e) { 1849 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1850 //e.printStackTrace(); 1851 } 1852 } 1853 1854 @Test TestUTF16Bom()1855 public void TestUTF16Bom(){ 1856 1857 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-16"); 1858 char[] in = new char[] { 0x1122, 0x2211, 0x3344, 0x4433, 1859 0x5566, 0x6655, 0x7788, 0x8877, 0x9900 }; 1860 CharBuffer inBuf = CharBuffer.allocate(in.length); 1861 inBuf.put(in); 1862 CharsetEncoder encoder = cs.newEncoder(); 1863 ByteBuffer outBuf = ByteBuffer.allocate(in.length*2+2); 1864 inBuf.rewind(); 1865 encoder.encode(inBuf, outBuf, true); 1866 outBuf.rewind(); 1867 if(outBuf.get(0)!= (byte)0xFE && outBuf.get(1)!= (byte)0xFF){ 1868 errln("The UTF16 encoder did not appended bom. Length returned: " + outBuf.remaining()); 1869 } 1870 while(outBuf.hasRemaining()){ 1871 logln("0x"+hex(outBuf.get())); 1872 } 1873 CharsetDecoder decoder = cs.newDecoder(); 1874 outBuf.rewind(); 1875 CharBuffer rt = CharBuffer.allocate(in.length); 1876 CoderResult cr = decoder.decode(outBuf, rt, true); 1877 if(cr.isError()){ 1878 errln("Decoding with BOM failed. Error: "+ cr.toString()); 1879 } 1880 equals(rt, in); 1881 { 1882 rt.clear(); 1883 outBuf.rewind(); 1884 Charset utf16 = Charset.forName("UTF-16"); 1885 CharsetDecoder dc = utf16.newDecoder(); 1886 cr = dc.decode(outBuf, rt, true); 1887 equals(rt, in); 1888 } 1889 } 1890 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush)1891 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1892 boolean throwException, boolean flush) throws BufferOverflowException, Exception { 1893 smBufDecode(decoder, encoding, source, target, throwException, flush, true); 1894 } 1895 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray)1896 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1897 boolean throwException, boolean flush, boolean backedByArray) throws BufferOverflowException, Exception { 1898 smBufDecode(decoder, encoding, source, target, throwException, flush, backedByArray, -1); 1899 } 1900 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)1901 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1902 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) 1903 throws BufferOverflowException, Exception { 1904 ByteBuffer mySource; 1905 CharBuffer myTarget; 1906 if (backedByArray) { 1907 mySource = ByteBuffer.allocate(source.capacity()); 1908 myTarget = CharBuffer.allocate(target.capacity()); 1909 } else { 1910 // this does not guarantee by any means that mySource and myTarget 1911 // are not backed by arrays 1912 mySource = ByteBuffer.allocateDirect(source.capacity()); 1913 myTarget = ByteBuffer.allocateDirect(target.capacity() * 2).asCharBuffer(); 1914 } 1915 mySource.position(source.position()); 1916 for (int i = source.position(); i < source.limit(); i++) 1917 mySource.put(i, source.get(i)); 1918 1919 { 1920 decoder.reset(); 1921 myTarget.limit(target.limit()); 1922 mySource.limit(source.limit()); 1923 mySource.position(source.position()); 1924 CoderResult result = CoderResult.UNDERFLOW; 1925 result = decoder.decode(mySource, myTarget, true); 1926 if (flush) { 1927 result = decoder.flush(myTarget); 1928 } 1929 if (result.isError()) { 1930 if (throwException) { 1931 throw new Exception(); 1932 } 1933 errln("Test complete buffers while decoding failed. " + result.toString()); 1934 return; 1935 } 1936 if (result.isOverflow()) { 1937 if (throwException) { 1938 throw new BufferOverflowException(); 1939 } 1940 errln("Test complete buffers while decoding threw overflow exception"); 1941 return; 1942 } 1943 myTarget.limit(myTarget.position()); 1944 myTarget.position(0); 1945 target.position(0); 1946 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1947 errln(" Test complete buffers while decoding " + encoding + " TO Unicode--failed"); 1948 } 1949 } 1950 if (isQuick()) { 1951 return; 1952 } 1953 { 1954 decoder.reset(); 1955 myTarget.limit(target.position()); 1956 mySource.limit(source.position()); 1957 mySource.position(source.position()); 1958 myTarget.clear(); 1959 myTarget.position(0); 1960 1961 int inputLen = mySource.remaining(); 1962 1963 CoderResult result = CoderResult.UNDERFLOW; 1964 for (int i = 1; i <= inputLen; i++) { 1965 mySource.limit(i); 1966 if (i == inputLen) { 1967 result = decoder.decode(mySource, myTarget, true); 1968 } else { 1969 result = decoder.decode(mySource, myTarget, false); 1970 } 1971 if (result.isError()) { 1972 errln("Test small input buffers while decoding failed. " + result.toString()); 1973 break; 1974 } 1975 if (result.isOverflow()) { 1976 if (throwException) { 1977 throw new BufferOverflowException(); 1978 } 1979 errln("Test small input buffers while decoding threw overflow exception"); 1980 break; 1981 } 1982 1983 } 1984 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1985 errln("Test small input buffers while decoding " + encoding + " TO Unicode--failed"); 1986 } 1987 } 1988 { 1989 decoder.reset(); 1990 myTarget.limit(0); 1991 mySource.limit(0); 1992 mySource.position(source.position()); 1993 myTarget.clear(); 1994 while (true) { 1995 CoderResult result = decoder.decode(mySource, myTarget, false); 1996 if (result.isUnderflow()) { 1997 if (mySource.limit() < source.limit()) 1998 mySource.limit(mySource.limit() + 1); 1999 } else if (result.isOverflow()) { 2000 if (myTarget.limit() < target.limit()) 2001 myTarget.limit(myTarget.limit() + 1); 2002 else 2003 break; 2004 } else /*if (result.isError())*/ { 2005 errln("Test small output buffers while decoding " + result.toString()); 2006 } 2007 if (mySource.position() == mySource.limit()) { 2008 result = decoder.decode(mySource, myTarget, true); 2009 if (result.isError()) { 2010 errln("Test small output buffers while decoding " + result.toString()); 2011 } 2012 result = decoder.flush(myTarget); 2013 if (result.isError()) { 2014 errln("Test small output buffers while decoding " + result.toString()); 2015 } 2016 break; 2017 } 2018 } 2019 2020 if (!equals(myTarget, target, targetLimit)) { 2021 errln("Test small output buffers " + encoding + " TO Unicode failed"); 2022 } 2023 } 2024 } 2025 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush)2026 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2027 boolean throwException, boolean flush) throws Exception, BufferOverflowException { 2028 smBufEncode(encoder, encoding, source, target, throwException, flush, true); 2029 } 2030 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray)2031 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2032 boolean throwException, boolean flush, boolean backedByArray) throws Exception, BufferOverflowException { 2033 smBufEncode(encoder, encoding, source, target, throwException, flush, true, -1); 2034 } 2035 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)2036 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2037 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) throws Exception, 2038 BufferOverflowException { 2039 logln("Running smBufEncode for " + encoding + " with class " + encoder); 2040 2041 CharBuffer mySource; 2042 ByteBuffer myTarget; 2043 if (backedByArray) { 2044 mySource = CharBuffer.allocate(source.capacity()); 2045 myTarget = ByteBuffer.allocate(target.capacity()); 2046 } else { 2047 mySource = ByteBuffer.allocateDirect(source.capacity() * 2).asCharBuffer(); 2048 myTarget = ByteBuffer.allocateDirect(target.capacity()); 2049 } 2050 mySource.position(source.position()); 2051 for (int i = source.position(); i < source.limit(); i++) 2052 mySource.put(i, source.get(i)); 2053 2054 myTarget.clear(); 2055 { 2056 logln("Running tests on small input buffers for " + encoding); 2057 encoder.reset(); 2058 myTarget.limit(target.limit()); 2059 mySource.limit(source.limit()); 2060 mySource.position(source.position()); 2061 CoderResult result = null; 2062 2063 result = encoder.encode(mySource, myTarget, true); 2064 if (flush) { 2065 result = encoder.flush(myTarget); 2066 } 2067 2068 if (result.isError()) { 2069 if (throwException) { 2070 throw new Exception(); 2071 } 2072 errln("Test complete while encoding failed. " + result.toString()); 2073 } 2074 if (result.isOverflow()) { 2075 if (throwException) { 2076 throw new BufferOverflowException(); 2077 } 2078 errln("Test complete while encoding threw overflow exception"); 2079 } 2080 if (!equals(myTarget, target, targetLimit)) { 2081 errln("Test complete buffers while encoding for " + encoding + " failed"); 2082 2083 } else { 2084 logln("Tests complete buffers for " + encoding + " passed"); 2085 } 2086 } 2087 if (isQuick()) { 2088 return; 2089 } 2090 { 2091 logln("Running tests on small input buffers for " + encoding); 2092 encoder.reset(); 2093 myTarget.clear(); 2094 myTarget.limit(target.limit()); 2095 mySource.limit(source.limit()); 2096 mySource.position(source.position()); 2097 int inputLen = mySource.limit(); 2098 CoderResult result = null; 2099 for (int i = 1; i <= inputLen; i++) { 2100 mySource.limit(i); 2101 result = encoder.encode(mySource, myTarget, false); 2102 if (result.isError()) { 2103 errln("Test small input buffers while encoding failed. " + result.toString()); 2104 } 2105 if (result.isOverflow()) { 2106 if (throwException) { 2107 throw new BufferOverflowException(); 2108 } 2109 errln("Test small input buffers while encoding threw overflow exception"); 2110 } 2111 } 2112 if (!equals(myTarget, target, targetLimit)) { 2113 errln("Test small input buffers " + encoding + " From Unicode failed"); 2114 } else { 2115 logln("Tests on small input buffers for " + encoding + " passed"); 2116 } 2117 } 2118 { 2119 logln("Running tests on small output buffers for " + encoding); 2120 encoder.reset(); 2121 myTarget.clear(); 2122 myTarget.limit(target.limit()); 2123 mySource.limit(source.limit()); 2124 mySource.position(source.position()); 2125 mySource.position(0); 2126 myTarget.position(0); 2127 2128 logln("myTarget.limit: " + myTarget.limit() + " myTarget.capcity: " + myTarget.capacity()); 2129 2130 while (true) { 2131 int pos = myTarget.position(); 2132 2133 CoderResult result = encoder.encode(mySource, myTarget, false); 2134 logln("myTarget.Position: " + pos + " myTarget.limit: " + myTarget.limit()); 2135 logln("mySource.position: " + mySource.position() + " mySource.limit: " + mySource.limit()); 2136 2137 if (result.isError()) { 2138 errln("Test small output buffers while encoding " + result.toString()); 2139 } 2140 if (mySource.position() == mySource.limit()) { 2141 result = encoder.encode(mySource, myTarget, true); 2142 if (result.isError()) { 2143 errln("Test small output buffers while encoding " + result.toString()); 2144 } 2145 2146 myTarget.limit(myTarget.capacity()); 2147 result = encoder.flush(myTarget); 2148 if (result.isError()) { 2149 errln("Test small output buffers while encoding " + result.toString()); 2150 } 2151 break; 2152 } 2153 } 2154 if (!equals(myTarget, target, targetLimit)) { 2155 errln("Test small output buffers " + encoding + " From Unicode failed."); 2156 } 2157 logln("Tests on small output buffers for " + encoding + " passed"); 2158 } 2159 } 2160 2161 2162 //TODO 2163 /* 2164 @Test 2165 public void TestString(ByteBuffer bSource, CharBuffer uSource) throws Exception { 2166 try { 2167 { 2168 String source = uSource.toString(); 2169 byte[] target = source.getBytes(m_encoding); 2170 if (!equals(target, bSource.array())) { 2171 errln("encode using string API failed"); 2172 } 2173 } 2174 { 2175 2176 String target = new String(bSource.array(), m_encoding); 2177 if (!equals(uSource, target.toCharArray())) { 2178 errln("decode using string API failed"); 2179 } 2180 } 2181 } catch (Exception e) { 2182 //e.printStackTrace(); 2183 errln(e.getMessage()); 2184 } 2185 } 2186 2187 /*private void fromUnicodeTest() throws Exception { 2188 2189 logln("Loaded Charset: " + charset.getClass().toString()); 2190 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2191 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2192 2193 ByteBuffer myTarget = ByteBuffer.allocate(gbSource.length); 2194 logln("Created ByteBuffer of length: " + uSource.length); 2195 CharBuffer mySource = CharBuffer.wrap(uSource); 2196 logln("Wrapped ByteBuffer with CharBuffer "); 2197 encoder.reset(); 2198 logln("Test Unicode to " + encoding ); 2199 encoder.encode(mySource, myTarget, true); 2200 if (!equals(myTarget, gbSource)) { 2201 errln("--Test Unicode to " + encoding + ": FAILED"); 2202 } 2203 logln("Test Unicode to " + encoding +" passed"); 2204 } 2205 2206 @Test 2207 public void TestToUnicode( ) throws Exception { 2208 2209 logln("Loaded Charset: " + charset.getClass().toString()); 2210 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2211 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2212 2213 CharBuffer myTarget = CharBuffer.allocate(uSource.length); 2214 ByteBuffer mySource = ByteBuffer.wrap(getByteArray(gbSource)); 2215 decoder.reset(); 2216 CoderResult result = decoder.decode(mySource, myTarget, true); 2217 if (result.isError()) { 2218 errln("Test ToUnicode -- FAILED"); 2219 } 2220 if (!equals(myTarget, uSource)) { 2221 errln("--Test " + encoding + " to Unicode :FAILED"); 2222 } 2223 } 2224 2225 public static byte[] getByteArray(char[] source) { 2226 byte[] target = new byte[source.length]; 2227 int i = source.length; 2228 for (; --i >= 0;) { 2229 target[i] = (byte) source[i]; 2230 } 2231 return target; 2232 } 2233 /* 2234 private void smBufCharset(Charset charset) { 2235 try { 2236 ByteBuffer bTarget = charset.encode(CharBuffer.wrap(uSource)); 2237 CharBuffer uTarget = 2238 charset.decode(ByteBuffer.wrap(getByteArray(gbSource))); 2239 2240 if (!equals(uTarget, uSource)) { 2241 errln("Test " + charset.toString() + " to Unicode :FAILED"); 2242 } 2243 if (!equals(bTarget, gbSource)) { 2244 errln("Test " + charset.toString() + " from Unicode :FAILED"); 2245 } 2246 } catch (Exception ex) { 2247 errln("Encountered exception in smBufCharset"); 2248 } 2249 } 2250 2251 @Test 2252 public void TestMultithreaded() throws Exception { 2253 final Charset cs = Charset.forName(encoding); 2254 if (cs == charset) { 2255 errln("The objects are equal"); 2256 } 2257 smBufCharset(cs); 2258 try { 2259 final Thread t1 = new Thread() { 2260 public void run() { 2261 // commented out since the methods on 2262 // Charset API are supposed to be thread 2263 // safe ... to test it we don't sync 2264 2265 // synchronized(charset){ 2266 while (!interrupted()) { 2267 try { 2268 smBufCharset(cs); 2269 } catch (UnsupportedCharsetException ueEx) { 2270 errln(ueEx.toString()); 2271 } 2272 } 2273 2274 // } 2275 } 2276 }; 2277 final Thread t2 = new Thread() { 2278 public void run() { 2279 // synchronized(charset){ 2280 while (!interrupted()) { 2281 try { 2282 smBufCharset(cs); 2283 } catch (UnsupportedCharsetException ueEx) { 2284 errln(ueEx.toString()); 2285 } 2286 } 2287 2288 //} 2289 } 2290 }; 2291 t1.start(); 2292 t2.start(); 2293 int i = 0; 2294 for (;;) { 2295 if (i > 1000000000) { 2296 try { 2297 t1.interrupt(); 2298 } catch (Exception e) { 2299 } 2300 try { 2301 t2.interrupt(); 2302 } catch (Exception e) { 2303 } 2304 break; 2305 } 2306 i++; 2307 } 2308 } catch (Exception e) { 2309 throw e; 2310 } 2311 } 2312 2313 @Test 2314 public void TestSynchronizedMultithreaded() throws Exception { 2315 // Methods on CharsetDecoder and CharsetEncoder classes 2316 // are inherently unsafe if accessed by multiple concurrent 2317 // thread so we synchronize them 2318 final Charset charset = Charset.forName(encoding); 2319 final CharsetDecoder decoder = charset.newDecoder(); 2320 final CharsetEncoder encoder = charset.newEncoder(); 2321 try { 2322 final Thread t1 = new Thread() { 2323 public void run() { 2324 while (!interrupted()) { 2325 try { 2326 synchronized (encoder) { 2327 smBufEncode(encoder, encoding); 2328 } 2329 synchronized (decoder) { 2330 smBufDecode(decoder, encoding); 2331 } 2332 } catch (UnsupportedCharsetException ueEx) { 2333 errln(ueEx.toString()); 2334 } 2335 } 2336 2337 } 2338 }; 2339 final Thread t2 = new Thread() { 2340 public void run() { 2341 while (!interrupted()) { 2342 try { 2343 synchronized (encoder) { 2344 smBufEncode(encoder, encoding); 2345 } 2346 synchronized (decoder) { 2347 smBufDecode(decoder, encoding); 2348 } 2349 } catch (UnsupportedCharsetException ueEx) { 2350 errln(ueEx.toString()); 2351 } 2352 } 2353 } 2354 }; 2355 t1.start(); 2356 t2.start(); 2357 int i = 0; 2358 for (;;) { 2359 if (i > 1000000000) { 2360 try { 2361 t1.interrupt(); 2362 } catch (Exception e) { 2363 } 2364 try { 2365 t2.interrupt(); 2366 } catch (Exception e) { 2367 } 2368 break; 2369 } 2370 i++; 2371 } 2372 } catch (Exception e) { 2373 throw e; 2374 } 2375 } 2376 */ 2377 2378 @Test TestMBCS()2379 public void TestMBCS(){ 2380 { 2381 // Encoder: from Unicode conversion 2382 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("ibm-971").newEncoder(); 2383 ByteBuffer out = ByteBuffer.allocate(6); 2384 encoderICU.onUnmappableCharacter(CodingErrorAction.REPLACE); 2385 CoderResult result = encoderICU.encode(CharBuffer.wrap("\u0131\u0061\u00a1"), out, true); 2386 if(!result.isError()){ 2387 byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE}; 2388 if(!equals(expected, out.array())){ 2389 // Android patch: Skip tests that fail with customized data. 2390 logln("Did not get the expected result for substitution bytes. Got: "+ 2391 hex(out.array())); 2392 // Android patch end. 2393 } 2394 logln("Output: "+ hex(out.array())); 2395 }else{ 2396 errln("Encode operation failed for encoder: "+encoderICU.toString()); 2397 } 2398 } 2399 { 2400 // Decoder: to Unicode conversion 2401 CharsetDecoder decoderICU = new CharsetProviderICU().charsetForName("ibm-971").newDecoder(); 2402 CharBuffer out = CharBuffer.allocate(3); 2403 decoderICU.onMalformedInput(CodingErrorAction.REPLACE); 2404 CoderResult result = decoderICU.decode(ByteBuffer.wrap(new byte[] { (byte)0xA2, (byte)0xAE, (byte)0x12, (byte)0x34, (byte)0xEF, (byte)0xDC }), out, true); 2405 if(!result.isError()){ 2406 char[] expected = {'\u00a1', '\ufffd', '\u6676'}; 2407 if(!equals(expected, out.array())){ 2408 errln("Did not get the expected result for substitution chars. Got: "+ 2409 hex(out.array())); 2410 } 2411 logln("Output: "+ hex(out.array())); 2412 }else{ 2413 errln("Decode operation failed for encoder: "+decoderICU.toString()); 2414 } 2415 } 2416 } 2417 2418 @Test TestJB4897()2419 public void TestJB4897(){ 2420 CharsetProviderICU provider = new CharsetProviderICU(); 2421 Charset charset = provider.charsetForName("x-abracadabra"); 2422 if(charset!=null && charset.canEncode()== true){ 2423 errln("provider.charsetForName() does not validate the charset names" ); 2424 } 2425 } 2426 2427 @Test TestJB5027()2428 public void TestJB5027() { 2429 CharsetProviderICU provider= new CharsetProviderICU(); 2430 2431 Charset fake = provider.charsetForName("doesNotExist"); 2432 if(fake != null){ 2433 errln("\"doesNotExist\" returned " + fake); 2434 } 2435 Charset xfake = provider.charsetForName("x-doesNotExist"); 2436 if(xfake!=null){ 2437 errln("\"x-doesNotExist\" returned " + xfake); 2438 } 2439 } 2440 2441 //test to make sure that number of aliases and canonical names are in the charsets that are in 2442 @Test TestAllNames()2443 public void TestAllNames() { 2444 2445 CharsetProviderICU provider= new CharsetProviderICU(); 2446 Object[] available = CharsetProviderICU.getAvailableNames(); 2447 for(int i=0; i<available.length;i++){ 2448 try{ 2449 String canon = CharsetProviderICU.getICUCanonicalName((String)available[i]); 2450 2451 // ',' is not allowed by Java's charset name checker 2452 if(canon.indexOf(',')>=0){ 2453 continue; 2454 } 2455 Charset cs = provider.charsetForName((String)available[i]); 2456 2457 Object[] javaAliases = cs.aliases().toArray(); 2458 //seach for ICU canonical name in javaAliases 2459 boolean inAliasList = false; 2460 for(int j=0; j<javaAliases.length; j++){ 2461 String java = (String) javaAliases[j]; 2462 if(java.equals(canon)){ 2463 logln("javaAlias: " + java + " canon: " + canon); 2464 inAliasList = true; 2465 } 2466 } 2467 if(inAliasList == false){ 2468 errln("Could not find ICU canonical name: "+canon+ " for java canonical name: "+ available[i]+ " "+ i); 2469 } 2470 }catch(UnsupportedCharsetException ex){ 2471 errln("could no load charset "+ available[i]+" "+ex.getMessage()); 2472 continue; 2473 } 2474 } 2475 } 2476 2477 @Test TestDecoderImplFlush()2478 public void TestDecoderImplFlush() { 2479 CharsetProviderICU provider = new CharsetProviderICU(); 2480 Charset ics = provider.charsetForName("UTF-16"); 2481 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2482 execDecoder(jcs); 2483 execDecoder(ics); 2484 } 2485 2486 @Test TestEncoderImplFlush()2487 public void TestEncoderImplFlush() { 2488 CharsetProviderICU provider = new CharsetProviderICU(); 2489 Charset ics = provider.charsetForName("UTF-16"); 2490 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2491 execEncoder(jcs); 2492 execEncoder(ics); 2493 } execDecoder(Charset cs)2494 private void execDecoder(Charset cs){ 2495 CharsetDecoder decoder = cs.newDecoder(); 2496 decoder.onMalformedInput(CodingErrorAction.REPORT); 2497 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2498 CharBuffer out = CharBuffer.allocate(10); 2499 CoderResult result = decoder.decode(ByteBuffer.wrap(new byte[] { -1, 2500 -2, 32, 0, 98 }), out, false); 2501 result = decoder.decode(ByteBuffer.wrap(new byte[] { 98 }), out, true); 2502 2503 logln(cs.getClass().toString()+ ":" +result.toString()); 2504 try { 2505 result = decoder.flush(out); 2506 logln(cs.getClass().toString()+ ":" +result.toString()); 2507 } catch (Exception e) { 2508 errln(e.getMessage()+" "+cs.getClass().toString()); 2509 } 2510 } execEncoder(Charset cs)2511 private void execEncoder(Charset cs){ 2512 CharsetEncoder encoder = cs.newEncoder(); 2513 encoder.onMalformedInput(CodingErrorAction.REPORT); 2514 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2515 ByteBuffer out = ByteBuffer.allocate(10); 2516 CoderResult result = encoder.encode(CharBuffer.wrap(new char[] { '\uFFFF', 2517 '\u2345', 32, 98 }), out, false); 2518 logln(cs.getClass().toString()+ ":" +result.toString()); 2519 result = encoder.encode(CharBuffer.wrap(new char[] { 98 }), out, true); 2520 2521 logln(cs.getClass().toString()+ ":" +result.toString()); 2522 try { 2523 result = encoder.flush(out); 2524 logln(cs.getClass().toString()+ ":" +result.toString()); 2525 } catch (Exception e) { 2526 errln(e.getMessage()+" "+cs.getClass().toString()); 2527 } 2528 } 2529 2530 @Test TestDecodeMalformed()2531 public void TestDecodeMalformed() { 2532 CharsetProviderICU provider = new CharsetProviderICU(); 2533 Charset ics = provider.charsetForName("UTF-16BE"); 2534 //Use SUN's charset 2535 Charset jcs = Charset.forName("UTF-16"); 2536 CoderResult ir = execMalformed(ics); 2537 CoderResult jr = execMalformed(jcs); 2538 if(ir!=jr){ 2539 errln("ICU's decoder did not return the same result as Sun. ICU: "+ir.toString()+" Sun: "+jr.toString()); 2540 } 2541 } 2542 execMalformed(Charset cs)2543 private CoderResult execMalformed(Charset cs){ 2544 CharsetDecoder decoder = cs.newDecoder(); 2545 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2546 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2547 ByteBuffer in = ByteBuffer.wrap(new byte[] { 0x00, 0x41, 0x00, 0x42, 0x01 }); 2548 CharBuffer out = CharBuffer.allocate(3); 2549 return decoder.decode(in, out, true); 2550 } 2551 2552 @Test TestJavaUTF16Decoder()2553 public void TestJavaUTF16Decoder(){ 2554 CharsetProviderICU provider = new CharsetProviderICU(); 2555 Charset ics = provider.charsetForName("UTF-16BE"); 2556 //Use SUN's charset 2557 Charset jcs = Charset.forName("UTF-16"); 2558 Exception ie = execConvertAll(ics); 2559 Exception je = execConvertAll(jcs); 2560 if(ie!=je){ 2561 errln("ICU's decoder did not return the same result as Sun. ICU: "+ie.toString()+" Sun: "+je.toString()); 2562 } 2563 } execConvertAll(Charset cs)2564 private Exception execConvertAll(Charset cs){ 2565 ByteBuffer in = ByteBuffer.allocate(400); 2566 int i=0; 2567 while(in.position()!=in.capacity()){ 2568 in.put((byte)0xD8); 2569 in.put((byte)i); 2570 in.put((byte)0xDC); 2571 in.put((byte)i); 2572 i++; 2573 } 2574 in.limit(in.position()); 2575 in.position(0); 2576 CharsetDecoder decoder = cs.newDecoder(); 2577 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2578 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2579 try{ 2580 CharBuffer out = decoder.decode(in); 2581 if(out!=null){ 2582 logln(cs.toString()+" encoing succeeded as expected!"); 2583 } 2584 }catch ( Exception ex){ 2585 errln("Did not get expected exception for encoding: "+cs.toString()); 2586 return ex; 2587 } 2588 return null; 2589 } 2590 2591 @Test TestUTF32BOM()2592 public void TestUTF32BOM(){ 2593 2594 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-32"); 2595 char[] in = new char[] { 0xd800, 0xdc00, 2596 0xd801, 0xdc01, 2597 0xdbff, 0xdfff, 2598 0xd900, 0xdd00, 2599 0x0000, 0x0041, 2600 0x0000, 0x0042, 2601 0x0000, 0x0043}; 2602 2603 CharBuffer inBuf = CharBuffer.allocate(in.length); 2604 inBuf.put(in); 2605 CharsetEncoder encoder = cs.newEncoder(); 2606 ByteBuffer outBuf = ByteBuffer.allocate(in.length*4+4); 2607 inBuf.rewind(); 2608 encoder.encode(inBuf, outBuf, true); 2609 outBuf.rewind(); 2610 if(outBuf.get(0)!= (byte)0x00 && outBuf.get(1)!= (byte)0x00 && 2611 outBuf.get(2)!= (byte)0xFF && outBuf.get(3)!= (byte)0xFE){ 2612 errln("The UTF32 encoder did not appended bom. Length returned: " + outBuf.remaining()); 2613 } 2614 while(outBuf.hasRemaining()){ 2615 logln("0x"+hex(outBuf.get())); 2616 } 2617 CharsetDecoder decoder = cs.newDecoder(); 2618 outBuf.limit(outBuf.position()); 2619 outBuf.rewind(); 2620 CharBuffer rt = CharBuffer.allocate(in.length); 2621 CoderResult cr = decoder.decode(outBuf, rt, true); 2622 if(cr.isError()){ 2623 errln("Decoding with BOM failed. Error: "+ cr.toString()); 2624 } 2625 equals(rt, in); 2626 try{ 2627 rt.clear(); 2628 outBuf.rewind(); 2629 Charset utf16 = Charset.forName("UTF-32"); 2630 CharsetDecoder dc = utf16.newDecoder(); 2631 cr = dc.decode(outBuf, rt, true); 2632 equals(rt, in); 2633 }catch(UnsupportedCharsetException ex){ 2634 // swallow the expection. 2635 } 2636 } 2637 2638 /* 2639 * Michael Ow 2640 * Modified 070424 2641 */ 2642 /*The following two methods provides the option of exceptions when Decoding 2643 * and Encoding if needed for testing purposes. 2644 */ smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target)2645 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target) { 2646 smBufDecode(decoder, encoding, source, target, true); 2647 } smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray)2648 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray) { 2649 try { 2650 smBufDecode(decoder, encoding, source, target, false, false, backedByArray); 2651 } 2652 catch (Exception ex) { 2653 System.out.println("!exception!"); 2654 } 2655 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target)2656 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target) { 2657 smBufEncode(encoder, encoding, source, target, true); 2658 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray)2659 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray) { 2660 try { 2661 smBufEncode(encoder, encoding, source, target, false, false); 2662 } 2663 catch (Exception ex) { 2664 System.out.println("!exception!"); 2665 } 2666 } 2667 2668 //Test CharsetICUProvider 2669 @Test TestNullCanonicalName()2670 public void TestNullCanonicalName() { 2671 String enc = null; 2672 String canonicalName = CharsetProviderICU.getICUCanonicalName(enc); 2673 2674 if (canonicalName != null) { 2675 errln("getICUCanonicalName return a non-null string for given null string"); 2676 } 2677 } 2678 2679 @Test TestGetAllNames()2680 public void TestGetAllNames() { 2681 String[] names = null; 2682 2683 names = CharsetProviderICU.getAllNames(); 2684 2685 if (names == null) { 2686 errln("getAllNames returned a null string."); 2687 } 2688 } 2689 2690 //Test CharsetICU 2691 @Test TestCharsetContains()2692 public void TestCharsetContains() { 2693 boolean test; 2694 2695 CharsetProvider provider = new CharsetProviderICU(); 2696 Charset cs1 = provider.charsetForName("UTF-32"); 2697 Charset cs2 = null; 2698 2699 test = cs1.contains(cs2); 2700 2701 if (test != false) { 2702 errln("Charset.contains returned true for a null charset."); 2703 } 2704 2705 cs2 = CharsetICU.forNameICU("UTF-32"); 2706 2707 test = cs1.contains(cs2); 2708 2709 if (test != true) { 2710 errln("Charset.contains returned false for an identical charset."); 2711 } 2712 2713 cs2 = provider.charsetForName("UTF-8"); 2714 2715 test = cs1.contains(cs2); 2716 2717 if (test != false) { 2718 errln("Charset.contains returned true for a different charset."); 2719 } 2720 } 2721 2722 @Test TestCharsetICUNullCharsetName()2723 public void TestCharsetICUNullCharsetName() { 2724 String charsetName = null; 2725 2726 try { 2727 CharsetICU.forNameICU(charsetName); 2728 errln("CharsetICU.forName should have thown an exception after getting a null charsetName."); 2729 } 2730 catch(Exception ex) { 2731 } 2732 } 2733 2734 //Test CharsetASCII 2735 @Test TestCharsetASCIIOverFlow()2736 public void TestCharsetASCIIOverFlow() { 2737 int byteBufferLimit; 2738 int charBufferLimit; 2739 2740 CharsetProvider provider = new CharsetProviderICU(); 2741 Charset cs = provider.charsetForName("ASCII"); 2742 CharsetEncoder encoder = cs.newEncoder(); 2743 CharsetDecoder decoder = cs.newDecoder(); 2744 2745 CharBuffer charBuffer = CharBuffer.allocate(0x90); 2746 ByteBuffer byteBuffer = ByteBuffer.allocate(0x90); 2747 2748 CharBuffer charBufferTest = CharBuffer.allocate(0xb0); 2749 ByteBuffer byteBufferTest = ByteBuffer.allocate(0xb0); 2750 2751 for(int j=0;j<=0x7f; j++){ 2752 charBuffer.put((char)j); 2753 byteBuffer.put((byte)j); 2754 } 2755 2756 byteBuffer.limit(byteBufferLimit = byteBuffer.position()); 2757 byteBuffer.position(0); 2758 charBuffer.limit(charBufferLimit = charBuffer.position()); 2759 charBuffer.position(0); 2760 2761 //test for overflow 2762 byteBufferTest.limit(byteBufferLimit - 5); 2763 byteBufferTest.position(0); 2764 charBufferTest.limit(charBufferLimit - 5); 2765 charBufferTest.position(0); 2766 try { 2767 smBufDecode(decoder, "ASCII", byteBuffer, charBufferTest, true, false); 2768 errln("Overflow exception while decoding ASCII should have been thrown."); 2769 } 2770 catch(Exception ex) { 2771 } 2772 try { 2773 smBufEncode(encoder, "ASCII", charBuffer, byteBufferTest, true, false); 2774 errln("Overflow exception while encoding ASCII should have been thrown."); 2775 } 2776 catch (Exception ex) { 2777 } 2778 2779 // For better code coverage 2780 /* For better code coverage */ 2781 byte byteout[] = { 2782 (byte)0x01 2783 }; 2784 char charin[] = { 2785 (char)0x0001, (char)0x0002 2786 }; 2787 ByteBuffer bb = ByteBuffer.wrap(byteout); 2788 CharBuffer cb = CharBuffer.wrap(charin); 2789 // Cast up to CharSequence to insulate against the CharBuffer.subSequence() return type change 2790 // which makes code compiled for a newer JDK not run on an older one. 2791 CharBuffer cb2 = CharBuffer.wrap(((CharSequence)cb).subSequence(0, 2)); 2792 encoder.reset(); 2793 if (!(encoder.encode(cb2, bb, true)).isOverflow()) { 2794 errln("Overflow error while encoding ASCII should have occurred."); 2795 } 2796 } 2797 2798 //Test CharsetUTF7 2799 @Test TestCharsetUTF7()2800 public void TestCharsetUTF7() { 2801 CoderResult result = CoderResult.UNDERFLOW; 2802 CharsetProvider provider = new CharsetProviderICU(); 2803 Charset cs = provider.charsetForName("UTF-7"); 2804 CharsetEncoder encoder = cs.newEncoder(); 2805 CharsetDecoder decoder = cs.newDecoder(); 2806 2807 CharBuffer us = CharBuffer.allocate(0x100); 2808 ByteBuffer bs = ByteBuffer.allocate(0x100); 2809 2810 /* Unicode : A<not equal to Alpha Lamda>. */ 2811 /* UTF7: AImIDkQ. */ 2812 us.put((char)0x41); us.put((char)0x2262); us.put((char)0x391); us.put((char)0x39B); us.put((char)0x2e); 2813 bs.put((byte)0x41); bs.put((byte)0x2b); bs.put((byte)0x49); bs.put((byte)0x6d); 2814 bs.put((byte)0x49); bs.put((byte)0x44); bs.put((byte)0x6b); bs.put((byte)0x51); 2815 bs.put((byte)0x4f); bs.put((byte)0x62); bs.put((byte)0x2e); 2816 2817 bs.limit(bs.position()); 2818 bs.position(0); 2819 us.limit(us.position()); 2820 us.position(0); 2821 2822 smBufDecode(decoder, "UTF-7", bs, us); 2823 smBufEncode(encoder, "UTF-7", us, bs); 2824 2825 /* Testing UTF-7 toUnicode with substitute callbacks */ 2826 { 2827 byte [] bytesTestErrorConsumption = { 2828 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 2829 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 2830 2831 }; 2832 char [] unicodeTestErrorConsumption = { 2833 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 2834 }; 2835 bs = ByteBuffer.wrap(bytesTestErrorConsumption); 2836 us = CharBuffer.wrap(unicodeTestErrorConsumption); 2837 2838 CodingErrorAction savedMal = decoder.malformedInputAction(); 2839 CodingErrorAction savedUMap = decoder.unmappableCharacterAction(); 2840 decoder.onMalformedInput(CodingErrorAction.REPLACE); 2841 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 2842 smBufDecode(decoder, "UTF-7 DE Error Consumption", bs, us); 2843 decoder.onMalformedInput(savedMal); 2844 decoder.onUnmappableCharacter(savedUMap); 2845 } 2846 /* ticket 6151 */ 2847 CharBuffer smallus = CharBuffer.allocate(1); 2848 ByteBuffer bigbs = ByteBuffer.allocate(3); 2849 bigbs.put((byte)0x41); bigbs.put((byte)0x41); bigbs.put((byte)0x41); 2850 bigbs.position(0); 2851 try { 2852 smBufDecode(decoder, "UTF-7-DE-Overflow", bigbs, smallus, true, false); 2853 errln("Buffer Overflow exception should have been thrown while decoding UTF-7."); 2854 } catch (Exception ex) { 2855 } 2856 2857 //The rest of the code in this method is to provide better code coverage 2858 CharBuffer ccus = CharBuffer.allocate(0x10); 2859 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 2860 2861 //start of charset decoder code coverage code 2862 //test for accurate illegal and control character checking 2863 ccbs.put((byte)0x0D); ccbs.put((byte)0x05); 2864 ccus.put((char)0x0000); 2865 2866 ccbs.limit(ccbs.position()); 2867 ccbs.position(0); 2868 ccus.limit(ccus.position()); 2869 ccus.position(0); 2870 2871 try { 2872 smBufDecode(decoder, "UTF-7-CC-DE-1", ccbs, ccus, true, false); 2873 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2874 } 2875 catch (Exception ex) { 2876 } 2877 2878 ccbs.clear(); 2879 ccus.clear(); 2880 2881 //test for illegal base64 character 2882 ccbs.put((byte)0x2b); ccbs.put((byte)0xff); 2883 ccus.put((char)0x0000); 2884 2885 ccbs.limit(ccbs.position()); 2886 ccbs.position(0); 2887 ccus.limit(ccus.position()); 2888 ccus.position(0); 2889 2890 try { 2891 smBufDecode(decoder, "UTF-7-CC-DE-2", ccbs, ccus, true, false); 2892 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2893 } 2894 catch (Exception ex) { 2895 } 2896 2897 ccbs.clear(); 2898 ccus.clear(); 2899 2900 //test for illegal order of the base64 character sequence 2901 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 2902 ccus.put((char)0x0000); ccus.put((char)0x0000); 2903 2904 ccbs.limit(ccbs.position()); 2905 ccbs.position(0); 2906 ccus.limit(ccus.position()); 2907 ccus.position(0); 2908 2909 try { 2910 smBufDecode(decoder, "UTF-7-CC-DE-3", ccbs, ccus, true, false); 2911 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2912 } 2913 catch (Exception ex) { 2914 } 2915 2916 ccbs.clear(); 2917 ccus.clear(); 2918 2919 //test for illegal order of the base64 character sequence 2920 ccbs.put((byte)0x2b); ccbs.put((byte)0x0a); ccbs.put((byte)0x09); 2921 ccus.put((char)0x0000); 2922 2923 ccbs.limit(ccbs.position()); 2924 ccbs.position(0); 2925 ccus.limit(ccus.position()); 2926 ccus.position(0); 2927 2928 try { 2929 smBufDecode(decoder, "UTF-7-CC-DE-4", ccbs, ccus, true, false); 2930 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2931 } 2932 catch (Exception ex) { 2933 } 2934 2935 ccbs.clear(); 2936 ccus.clear(); 2937 2938 //test for illegal order of the base64 character sequence 2939 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x0a); 2940 ccus.put((char)0x0000); 2941 2942 ccbs.limit(ccbs.position()); 2943 ccbs.position(0); 2944 ccus.limit(ccus.position()); 2945 ccus.position(0); 2946 2947 try { 2948 smBufDecode(decoder, "UTF-7-CC-DE-5", ccbs, ccus, true, false); 2949 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2950 } 2951 catch (Exception ex) { 2952 } 2953 2954 ccbs.clear(); 2955 ccus.clear(); 2956 2957 //test for illegal order of the base64 character sequence 2958 ccbs.put((byte)0x2b); ccbs.put((byte)0x00); 2959 ccus.put((char)0x0000); 2960 2961 ccbs.limit(ccbs.position()); 2962 ccbs.position(0); 2963 ccus.limit(ccus.position()); 2964 ccus.position(0); 2965 2966 try { 2967 smBufDecode(decoder, "UTF-7-CC-DE-6", ccbs, ccus, true, false); 2968 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2969 } 2970 catch (Exception ex) { 2971 } 2972 2973 ccbs.clear(); 2974 ccus.clear(); 2975 2976 //test for overflow buffer error 2977 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); 2978 2979 ccbs.limit(ccbs.position()); 2980 ccbs.position(0); 2981 ccus.limit(0); 2982 ccus.position(0); 2983 2984 try { 2985 smBufDecode(decoder, "UTF-7-CC-DE-7", ccbs, ccus, true, false); 2986 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2987 } 2988 catch (Exception ex) { 2989 } 2990 2991 ccbs.clear(); 2992 ccus.clear(); 2993 2994 //test for overflow buffer error 2995 ccbs.put((byte)0x0c); ccbs.put((byte)0x0c); 2996 2997 ccbs.limit(ccbs.position()); 2998 ccbs.position(0); 2999 ccus.limit(0); 3000 ccus.position(0); 3001 3002 try { 3003 smBufDecode(decoder, "UTF-7-CC-DE-8", ccbs, ccus, true, false); 3004 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 3005 } 3006 catch (Exception ex) { 3007 } 3008 //end of charset decoder code coverage code 3009 3010 //start of charset encoder code coverage code 3011 ccbs.clear(); 3012 ccus.clear(); 3013 //test for overflow buffer error 3014 ccus.put((char)0x002b); 3015 ccbs.put((byte)0x2b); 3016 3017 ccbs.limit(ccbs.position()); 3018 ccbs.position(0); 3019 ccus.limit(ccus.position()); 3020 ccus.position(0); 3021 3022 try { 3023 smBufEncode(encoder, "UTF-7-CC-EN-1", ccus, ccbs, true, false); 3024 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3025 } 3026 catch (Exception ex) { 3027 } 3028 3029 ccbs.clear(); 3030 ccus.clear(); 3031 3032 //test for overflow buffer error 3033 ccus.put((char)0x002b); ccus.put((char)0x2262); 3034 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3035 3036 ccbs.limit(ccbs.position()); 3037 ccbs.position(0); 3038 ccus.limit(ccus.position()); 3039 ccus.position(0); 3040 3041 try { 3042 smBufEncode(encoder, "UTF-7-CC-EN-2", ccus, ccbs, true, false); 3043 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3044 } 3045 catch (Exception ex) { 3046 } 3047 3048 ccbs.clear(); 3049 ccus.clear(); 3050 3051 //test for overflow buffer error 3052 ccus.put((char)0x2262); ccus.put((char)0x0049); 3053 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3054 ccbs.limit(ccbs.position()); 3055 ccbs.position(0); 3056 ccus.limit(ccus.position()); 3057 ccus.position(0); 3058 3059 try { 3060 smBufEncode(encoder, "UTF-7-CC-EN-3", ccus, ccbs, true, false); 3061 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3062 } 3063 catch (Exception ex) { 3064 } 3065 3066 ccbs.clear(); 3067 ccus.clear(); 3068 3069 //test for overflow buffer error 3070 ccus.put((char)0x2262); ccus.put((char)0x0395); 3071 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3072 ccbs.limit(ccbs.position()); 3073 ccbs.position(0); 3074 ccus.limit(ccus.position()); 3075 ccus.position(0); 3076 3077 try { 3078 smBufEncode(encoder, "UTF-7-CC-EN-4", ccus, ccbs, true, false); 3079 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3080 } 3081 catch (Exception ex) { 3082 } 3083 3084 ccbs.clear(); 3085 ccus.clear(); 3086 3087 //test for overflow buffer error 3088 ccus.put((char)0x2262); ccus.put((char)0x0395); 3089 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3090 ccbs.limit(ccbs.position()); 3091 ccbs.position(0); 3092 ccus.limit(ccus.position()); 3093 ccus.position(0); 3094 3095 try { 3096 smBufEncode(encoder, "UTF-7-CC-EN-5", ccus, ccbs, true, false); 3097 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3098 } 3099 catch (Exception ex) { 3100 } 3101 3102 ccbs.clear(); 3103 ccus.clear(); 3104 3105 //test for overflow buffer error 3106 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3107 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3108 ccbs.limit(ccbs.position()); 3109 ccbs.position(0); 3110 ccus.limit(ccus.position()); 3111 ccus.position(0); 3112 3113 try { 3114 smBufEncode(encoder, "UTF-7-CC-EN-6", ccus, ccbs, true, false); 3115 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3116 } 3117 catch (Exception ex) { 3118 } 3119 3120 ccbs.clear(); 3121 ccus.clear(); 3122 3123 //test for overflow buffer error 3124 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3125 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3126 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3127 ccbs.limit(ccbs.position()); 3128 ccbs.position(0); 3129 ccus.limit(ccus.position()); 3130 ccus.position(0); 3131 3132 try { 3133 smBufEncode(encoder, "UTF-7-CC-EN-7", ccus, ccbs, true, false); 3134 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3135 } 3136 catch (Exception ex) { 3137 } 3138 3139 ccbs.clear(); 3140 ccus.clear(); 3141 3142 //test for overflow buffer error 3143 ccus.put((char)0x0049); ccus.put((char)0x0048); 3144 ccbs.put((byte)0x00); 3145 ccbs.limit(ccbs.position()); 3146 ccbs.position(0); 3147 ccus.limit(ccus.position()); 3148 ccus.position(0); 3149 3150 try { 3151 smBufEncode(encoder, "UTF-7-CC-EN-8", ccus, ccbs, true, false); 3152 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3153 } 3154 catch (Exception ex) { 3155 } 3156 3157 ccbs.clear(); 3158 ccus.clear(); 3159 3160 //test for overflow buffer error 3161 ccus.put((char)0x2262); 3162 ccbs.put((byte)0x00); 3163 ccbs.limit(ccbs.position()); 3164 ccbs.position(0); 3165 ccus.limit(ccus.position()); 3166 ccus.position(0); 3167 3168 try { 3169 smBufEncode(encoder, "UTF-7-CC-EN-9", ccus, ccbs, true, false); 3170 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3171 } 3172 catch (Exception ex) { 3173 } 3174 3175 ccbs.clear(); 3176 ccus.clear(); 3177 3178 //test for overflow buffer error 3179 ccus.put((char)0x2262); ccus.put((char)0x0049); 3180 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3181 ccbs.limit(ccbs.position()); 3182 ccbs.position(0); 3183 ccus.limit(ccus.position()); 3184 ccus.position(0); 3185 3186 try { 3187 smBufEncode(encoder, "UTF-7-CC-EN-10", ccus, ccbs, true, false); 3188 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3189 } 3190 catch (Exception ex) { 3191 } 3192 3193 ccbs.clear(); 3194 ccus.clear(); 3195 3196 //test for overflow buffer error 3197 ccus.put((char)0x2262); 3198 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x6d); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 3199 3200 ccbs.limit(ccbs.position()); 3201 ccbs.position(0); 3202 ccus.limit(ccus.position()); 3203 ccus.position(0); 3204 try { 3205 smBufEncode(encoder, "UTF-7-CC-EN-11", ccus, ccbs, false, true); 3206 } catch (Exception ex) { 3207 errln("Exception while encoding UTF-7 code coverage test should not have been thrown."); 3208 } 3209 3210 ccbs.clear(); 3211 ccus.clear(); 3212 3213 //test for overflow buffer error 3214 encoder.reset(); 3215 ccus.put((char)0x3980); ccus.put((char)0x2715); 3216 ccbs.put((byte)0x2b); ccbs.put((byte)0x4f); ccbs.put((byte)0x59); ccbs.put((byte)0x2d); 3217 3218 ccbs.limit(ccbs.position()); 3219 ccbs.position(0); 3220 ccus.limit(ccus.position()); 3221 ccus.position(0); 3222 3223 result = encoder.encode(ccus, ccbs, true); 3224 result = encoder.flush(ccbs); 3225 if (!result.isOverflow()) { 3226 errln("Overflow buffer while encoding UTF-7 should have occurred."); 3227 } 3228 //end of charset encoder code coverage code 3229 } 3230 3231 @Test TestBug12956()3232 public void TestBug12956() { 3233 final CharsetProvider provider = new CharsetProviderICU(); 3234 final Charset cs_utf7 = provider.charsetForName("UTF-7"); 3235 final Charset cs_imap = provider.charsetForName("IMAP-mailbox-name"); 3236 final String test = "新"; 3237 final byte[] expected_utf7 = {0x2b, 0x5a, 0x62, 0x41, 0x2d}; 3238 final byte[] expected_imap = {0x26, 0x5a, 0x62, 0x41, 0x2d}; 3239 3240 byte[] bytes = test.getBytes(cs_utf7); 3241 if (!Arrays.equals(bytes, expected_utf7)) { 3242 errln("Incorrect UTF-7 conversion. Got " + new String(bytes) + " but expect " + 3243 new String(expected_utf7)); 3244 } 3245 3246 bytes = test.getBytes(cs_imap); 3247 if (!Arrays.equals(bytes, expected_imap)) { 3248 errln("Incorrect IMAP-mailbox-name conversion. Got " + new String(bytes) + 3249 " but expect " + new String(expected_imap)); 3250 } 3251 } 3252 3253 //Test Charset ISCII 3254 @Test TestCharsetISCII()3255 public void TestCharsetISCII() { 3256 CharsetProvider provider = new CharsetProviderICU(); 3257 Charset cs = provider.charsetForName("ISCII,version=0"); 3258 CharsetEncoder encoder = cs.newEncoder(); 3259 CharsetDecoder decoder = cs.newDecoder(); 3260 3261 CharBuffer us = CharBuffer.allocate(0x100); 3262 ByteBuffer bs = ByteBuffer.allocate(0x100); 3263 ByteBuffer bsr = ByteBuffer.allocate(0x100); 3264 3265 //test full range of Devanagari 3266 us.put((char)0x0901); us.put((char)0x0902); us.put((char)0x0903); us.put((char)0x0905); us.put((char)0x0906); us.put((char)0x0907); 3267 us.put((char)0x0908); us.put((char)0x0909); us.put((char)0x090A); us.put((char)0x090B); us.put((char)0x090E); us.put((char)0x090F); 3268 us.put((char)0x0910); us.put((char)0x090D); us.put((char)0x0912); us.put((char)0x0913); us.put((char)0x0914); us.put((char)0x0911); 3269 us.put((char)0x0915); us.put((char)0x0916); us.put((char)0x0917); us.put((char)0x0918); us.put((char)0x0919); us.put((char)0x091A); 3270 us.put((char)0x091B); us.put((char)0x091C); us.put((char)0x091D); us.put((char)0x091E); us.put((char)0x091F); us.put((char)0x0920); 3271 us.put((char)0x0921); us.put((char)0x0922); us.put((char)0x0923); us.put((char)0x0924); us.put((char)0x0925); us.put((char)0x0926); 3272 us.put((char)0x0927); us.put((char)0x0928); us.put((char)0x0929); us.put((char)0x092A); us.put((char)0x092B); us.put((char)0x092C); 3273 us.put((char)0x092D); us.put((char)0x092E); us.put((char)0x092F); us.put((char)0x095F); us.put((char)0x0930); us.put((char)0x0931); 3274 us.put((char)0x0932); us.put((char)0x0933); us.put((char)0x0934); us.put((char)0x0935); us.put((char)0x0936); us.put((char)0x0937); 3275 us.put((char)0x0938); us.put((char)0x0939); us.put((char)0x200D); us.put((char)0x093E); us.put((char)0x093F); us.put((char)0x0940); 3276 us.put((char)0x0941); us.put((char)0x0942); us.put((char)0x0943); us.put((char)0x0946); us.put((char)0x0947); us.put((char)0x0948); 3277 us.put((char)0x0945); us.put((char)0x094A); us.put((char)0x094B); us.put((char)0x094C); us.put((char)0x0949); us.put((char)0x094D); 3278 us.put((char)0x093D); us.put((char)0x0966); us.put((char)0x0967); us.put((char)0x0968); us.put((char)0x0969); us.put((char)0x096A); 3279 us.put((char)0x096B); us.put((char)0x096C); us.put((char)0x096D); us.put((char)0x096E); us.put((char)0x096F); 3280 3281 bs.put((byte)0xEF); bs.put((byte)0x42); 3282 bs.put((byte)0xA1); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xA4); bs.put((byte)0xA5); bs.put((byte)0xA6); 3283 bs.put((byte)0xA7); bs.put((byte)0xA8); bs.put((byte)0xA9); bs.put((byte)0xAA); bs.put((byte)0xAB); bs.put((byte)0xAC); 3284 bs.put((byte)0xAD); bs.put((byte)0xAE); bs.put((byte)0xAF); bs.put((byte)0xB0); bs.put((byte)0xB1); bs.put((byte)0xB2); 3285 bs.put((byte)0xB3); bs.put((byte)0xB4); bs.put((byte)0xB5); bs.put((byte)0xB6); bs.put((byte)0xB7); bs.put((byte)0xB8); 3286 bs.put((byte)0xB9); bs.put((byte)0xBA); bs.put((byte)0xBB); bs.put((byte)0xBC); bs.put((byte)0xBD); bs.put((byte)0xBE); 3287 bs.put((byte)0xBF); bs.put((byte)0xC0); bs.put((byte)0xC1); bs.put((byte)0xC2); bs.put((byte)0xC3); bs.put((byte)0xC4); 3288 bs.put((byte)0xC5); bs.put((byte)0xC6); bs.put((byte)0xC7); bs.put((byte)0xC8); bs.put((byte)0xC9); bs.put((byte)0xCA); 3289 bs.put((byte)0xCB); bs.put((byte)0xCC); bs.put((byte)0xCD); bs.put((byte)0xCE); bs.put((byte)0xCF); bs.put((byte)0xD0); 3290 bs.put((byte)0xD1); bs.put((byte)0xD2); bs.put((byte)0xD3); bs.put((byte)0xD4); bs.put((byte)0xD5); bs.put((byte)0xD6); 3291 bs.put((byte)0xD7); bs.put((byte)0xD8); bs.put((byte)0xD9); bs.put((byte)0xDA); bs.put((byte)0xDB); bs.put((byte)0xDC); 3292 bs.put((byte)0xDD); bs.put((byte)0xDE); bs.put((byte)0xDF); bs.put((byte)0xE0); bs.put((byte)0xE1); bs.put((byte)0xE2); 3293 bs.put((byte)0xE3); bs.put((byte)0xE4); bs.put((byte)0xE5); bs.put((byte)0xE6); bs.put((byte)0xE7); bs.put((byte)0xE8); 3294 bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xF1); bs.put((byte)0xF2); bs.put((byte)0xF3); bs.put((byte)0xF4); 3295 bs.put((byte)0xF5); bs.put((byte)0xF6); bs.put((byte)0xF7); bs.put((byte)0xF8); bs.put((byte)0xF9); bs.put((byte)0xFA); 3296 3297 bsr.put((byte)0xA1); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xA4); bsr.put((byte)0xA5); bsr.put((byte)0xA6); 3298 bsr.put((byte)0xA7); bsr.put((byte)0xA8); bsr.put((byte)0xA9); bsr.put((byte)0xAA); bsr.put((byte)0xAB); bsr.put((byte)0xAC); 3299 bsr.put((byte)0xAD); bsr.put((byte)0xAE); bsr.put((byte)0xAF); bsr.put((byte)0xB0); bsr.put((byte)0xB1); bsr.put((byte)0xB2); 3300 bsr.put((byte)0xB3); bsr.put((byte)0xB4); bsr.put((byte)0xB5); bsr.put((byte)0xB6); bsr.put((byte)0xB7); bsr.put((byte)0xB8); 3301 bsr.put((byte)0xB9); bsr.put((byte)0xBA); bsr.put((byte)0xBB); bsr.put((byte)0xBC); bsr.put((byte)0xBD); bsr.put((byte)0xBE); 3302 bsr.put((byte)0xBF); bsr.put((byte)0xC0); bsr.put((byte)0xC1); bsr.put((byte)0xC2); bsr.put((byte)0xC3); bsr.put((byte)0xC4); 3303 bsr.put((byte)0xC5); bsr.put((byte)0xC6); bsr.put((byte)0xC7); bsr.put((byte)0xC8); bsr.put((byte)0xC9); bsr.put((byte)0xCA); 3304 bsr.put((byte)0xCB); bsr.put((byte)0xCC); bsr.put((byte)0xCD); bsr.put((byte)0xCE); bsr.put((byte)0xCF); bsr.put((byte)0xD0); 3305 bsr.put((byte)0xD1); bsr.put((byte)0xD2); bsr.put((byte)0xD3); bsr.put((byte)0xD4); bsr.put((byte)0xD5); bsr.put((byte)0xD6); 3306 bsr.put((byte)0xD7); bsr.put((byte)0xD8); bsr.put((byte)0xD9); bsr.put((byte)0xDA); bsr.put((byte)0xDB); bsr.put((byte)0xDC); 3307 bsr.put((byte)0xDD); bsr.put((byte)0xDE); bsr.put((byte)0xDF); bsr.put((byte)0xE0); bsr.put((byte)0xE1); bsr.put((byte)0xE2); 3308 bsr.put((byte)0xE3); bsr.put((byte)0xE4); bsr.put((byte)0xE5); bsr.put((byte)0xE6); bsr.put((byte)0xE7); bsr.put((byte)0xE8); 3309 bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xF1); bsr.put((byte)0xF2); bsr.put((byte)0xF3); bsr.put((byte)0xF4); 3310 bsr.put((byte)0xF5); bsr.put((byte)0xF6); bsr.put((byte)0xF7); bsr.put((byte)0xF8); bsr.put((byte)0xF9); bsr.put((byte)0xFA); 3311 3312 //test Soft Halant 3313 us.put((char)0x0915); us.put((char)0x094d); us.put((char)0x200D); 3314 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE9); 3315 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE9); 3316 3317 //test explicit halant 3318 us.put((char)0x0915); us.put((char)0x094D); us.put((char)0x200C); 3319 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE8); 3320 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE8); 3321 3322 //test double danda 3323 us.put((char)0x0965); 3324 bs.put((byte)0xEA); bs.put((byte)0xEA); 3325 bsr.put((byte)0xEA); bsr.put((byte)0xEA); 3326 3327 //test ASCII 3328 us.put((char)0x1B); us.put((char)0x24); us.put((char)0x29); us.put((char)0x47); us.put((char)0x0E); us.put((char)0x23); 3329 us.put((char)0x21); us.put((char)0x23); us.put((char)0x22); us.put((char)0x23); us.put((char)0x23); us.put((char)0x23); 3330 us.put((char)0x24); us.put((char)0x23); us.put((char)0x25); us.put((char)0x23); us.put((char)0x26); us.put((char)0x23); 3331 us.put((char)0x27); us.put((char)0x23); us.put((char)0x28); us.put((char)0x23); us.put((char)0x29); us.put((char)0x23); 3332 us.put((char)0x2A); us.put((char)0x23); us.put((char)0x2B); us.put((char)0x0F); us.put((char)0x2F); us.put((char)0x2A); 3333 3334 bs.put((byte)0x1B); bs.put((byte)0x24); bs.put((byte)0x29); bs.put((byte)0x47); bs.put((byte)0x0E); bs.put((byte)0x23); 3335 bs.put((byte)0x21); bs.put((byte)0x23); bs.put((byte)0x22); bs.put((byte)0x23); bs.put((byte)0x23); bs.put((byte)0x23); 3336 bs.put((byte)0x24); bs.put((byte)0x23); bs.put((byte)0x25); bs.put((byte)0x23); bs.put((byte)0x26); bs.put((byte)0x23); 3337 bs.put((byte)0x27); bs.put((byte)0x23); bs.put((byte)0x28); bs.put((byte)0x23); bs.put((byte)0x29); bs.put((byte)0x23); 3338 bs.put((byte)0x2A); bs.put((byte)0x23); bs.put((byte)0x2B); bs.put((byte)0x0F); bs.put((byte)0x2F); bs.put((byte)0x2A); 3339 3340 bsr.put((byte)0x1B); bsr.put((byte)0x24); bsr.put((byte)0x29); bsr.put((byte)0x47); bsr.put((byte)0x0E); bsr.put((byte)0x23); 3341 bsr.put((byte)0x21); bsr.put((byte)0x23); bsr.put((byte)0x22); bsr.put((byte)0x23); bsr.put((byte)0x23); bsr.put((byte)0x23); 3342 bsr.put((byte)0x24); bsr.put((byte)0x23); bsr.put((byte)0x25); bsr.put((byte)0x23); bsr.put((byte)0x26); bsr.put((byte)0x23); 3343 bsr.put((byte)0x27); bsr.put((byte)0x23); bsr.put((byte)0x28); bsr.put((byte)0x23); bsr.put((byte)0x29); bsr.put((byte)0x23); 3344 bsr.put((byte)0x2A); bsr.put((byte)0x23); bsr.put((byte)0x2B); bsr.put((byte)0x0F); bsr.put((byte)0x2F); bsr.put((byte)0x2A); 3345 3346 //test from Lotus 3347 //Some of the Lotus ISCII code points have been changed or commented out. 3348 us.put((char)0x0061); us.put((char)0x0915); us.put((char)0x000D); us.put((char)0x000A); us.put((char)0x0996); us.put((char)0x0043); 3349 us.put((char)0x0930); us.put((char)0x094D); us.put((char)0x200D); us.put((char)0x0901); us.put((char)0x000D); us.put((char)0x000A); 3350 us.put((char)0x0905); us.put((char)0x0985); us.put((char)0x0043); us.put((char)0x0915); us.put((char)0x0921); us.put((char)0x002B); 3351 us.put((char)0x095F); 3352 bs.put((byte)0x61); bs.put((byte)0xB3); 3353 bs.put((byte)0x0D); bs.put((byte)0x0A); 3354 bs.put((byte)0xEF); bs.put((byte)0x42); 3355 bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xB4); bs.put((byte)0x43); 3356 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xCF); bs.put((byte)0xE8); bs.put((byte)0xE9); bs.put((byte)0xA1); bs.put((byte)0x0D); bs.put((byte)0x0A); bs.put((byte)0xEF); bs.put((byte)0x42); 3357 bs.put((byte)0xA4); bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xA4); bs.put((byte)0x43); bs.put((byte)0xEF); 3358 bs.put((byte)0x42); bs.put((byte)0xB3); bs.put((byte)0xBF); bs.put((byte)0x2B); 3359 bs.put((byte)0xCE); 3360 bsr.put((byte)0x61); bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xEF); bsr.put((byte)0x30); bsr.put((byte)0xB3); 3361 bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xB4); bsr.put((byte)0x43); 3362 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xCF); bsr.put((byte)0xE8); bsr.put((byte)0xD9); bsr.put((byte)0xEF); 3363 bsr.put((byte)0x42); bsr.put((byte)0xA1); bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3364 bsr.put((byte)0xA4); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xA4); bsr.put((byte)0x43); bsr.put((byte)0xEF); 3365 bsr.put((byte)0x42); bsr.put((byte)0xB3); bsr.put((byte)0xBF); bsr.put((byte)0x2B); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3366 bsr.put((byte)0xCE); 3367 //end of test from Lotus 3368 3369 //tamil range 3370 us.put((char)0x0B86); us.put((char)0x0B87); us.put((char)0x0B88); 3371 bs.put((byte)0xEF); bs.put((byte)0x44); bs.put((byte)0xA5); bs.put((byte)0xA6); bs.put((byte)0xA7); 3372 bsr.put((byte)0xEF); bsr.put((byte)0x44); bsr.put((byte)0xA5); bsr.put((byte)0xA6); bsr.put((byte)0xA7); 3373 3374 //telugu range 3375 us.put((char)0x0C05); us.put((char)0x0C02); us.put((char)0x0C03); us.put((char)0x0C31); 3376 bs.put((byte)0xEF); bs.put((byte)0x45); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xD0); 3377 bsr.put((byte)0xEF); bsr.put((byte)0x45); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xD0); 3378 3379 //kannada range 3380 us.put((char)0x0C85); us.put((char)0x0C82); us.put((char)0x0C83); 3381 bs.put((byte)0xEF); bs.put((byte)0x48); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); 3382 bsr.put((byte)0xEF); bsr.put((byte)0x48); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); 3383 3384 //test Abbr sign and Anudatta 3385 us.put((char)0x0970); us.put((char)0x0952); us.put((char)0x0960); us.put((char)0x0944); us.put((char)0x090C); us.put((char)0x0962); 3386 us.put((char)0x0961); us.put((char)0x0963); us.put((char)0x0950); us.put((char)0x093D); us.put((char)0x0958); us.put((char)0x0959); 3387 us.put((char)0x095A); us.put((char)0x095B); us.put((char)0x095C); us.put((char)0x095D); us.put((char)0x095E); us.put((char)0x0020); 3388 us.put((char)0x094D); us.put((char)0x0930); us.put((char)0x0000); us.put((char)0x00A0); 3389 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xF0); bs.put((byte)0xBF); bs.put((byte)0xF0); bs.put((byte)0xB8); 3390 bs.put((byte)0xAA); bs.put((byte)0xE9); bs.put((byte)0xDF); bs.put((byte)0xE9); bs.put((byte)0xA6); bs.put((byte)0xE9); 3391 bs.put((byte)0xDB); bs.put((byte)0xE9); bs.put((byte)0xA7); bs.put((byte)0xE9); bs.put((byte)0xDC); bs.put((byte)0xE9); 3392 bs.put((byte)0xA1); bs.put((byte)0xE9); bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xB3); bs.put((byte)0xE9); 3393 bs.put((byte)0xB4); bs.put((byte)0xE9); bs.put((byte)0xB5); bs.put((byte)0xE9); bs.put((byte)0xBA); bs.put((byte)0xE9); 3394 bs.put((byte)0xBF); bs.put((byte)0xE9); bs.put((byte)0xC0); bs.put((byte)0xE9); bs.put((byte)0xC9); bs.put((byte)0xE9); 3395 bs.put((byte)0x20); bs.put((byte)0xE8); bs.put((byte)0xCF); bs.put((byte)0x00); bs.put((byte)0xA0); 3396 //bs.put((byte)0xEF); bs.put((byte)0x30); 3397 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xF0); bsr.put((byte)0xBF); bsr.put((byte)0xF0); bsr.put((byte)0xB8); 3398 bsr.put((byte)0xAA); bsr.put((byte)0xE9); bsr.put((byte)0xDF); bsr.put((byte)0xE9); bsr.put((byte)0xA6); bsr.put((byte)0xE9); 3399 bsr.put((byte)0xDB); bsr.put((byte)0xE9); bsr.put((byte)0xA7); bsr.put((byte)0xE9); bsr.put((byte)0xDC); bsr.put((byte)0xE9); 3400 bsr.put((byte)0xA1); bsr.put((byte)0xE9); bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xB3); bsr.put((byte)0xE9); 3401 bsr.put((byte)0xB4); bsr.put((byte)0xE9); bsr.put((byte)0xB5); bsr.put((byte)0xE9); bsr.put((byte)0xBA); bsr.put((byte)0xE9); 3402 bsr.put((byte)0xBF); bsr.put((byte)0xE9); bsr.put((byte)0xC0); bsr.put((byte)0xE9); bsr.put((byte)0xC9); bsr.put((byte)0xE9); 3403 bsr.put((byte)0xD9); bsr.put((byte)0xE8); bsr.put((byte)0xCF); bsr.put((byte)0x00); bsr.put((byte)0xA0); 3404 3405 bs.limit(bs.position()); 3406 bs.position(0); 3407 us.limit(us.position()); 3408 us.position(0); 3409 bsr.limit(bsr.position()); 3410 bsr.position(0); 3411 3412 //round trip test 3413 try { 3414 smBufDecode(decoder, "ISCII-part1", bsr, us, false, true); 3415 smBufEncode(encoder, "ISCII-part2", us, bs); 3416 smBufDecode(decoder, "ISCII-part3", bs, us, false, true); 3417 } catch (Exception ex) { 3418 errln("ISCII round trip test failed."); 3419 } 3420 3421 //Test new characters in the ISCII charset 3422 encoder = provider.charsetForName("ISCII,version=0").newEncoder(); 3423 decoder = provider.charsetForName("ISCII,version=0").newDecoder(); 3424 char u_pts[] = { 3425 /* DEV */ (char)0x0904, 3426 /* PNJ */ (char)0x0A01, (char)0x0A03, (char)0x0A33, (char)0x0A70 3427 }; 3428 byte b_pts[] = { 3429 (byte)0xef, (byte)0x42, 3430 /* DEV */ (byte)0xa4, (byte)0xe0, 3431 /* PNJ */ (byte)0xef, (byte)0x4b, (byte)0xa1, (byte)0xa3, (byte)0xd2, (byte)0xf0, (byte)0xbf 3432 }; 3433 us = CharBuffer.allocate(u_pts.length); 3434 bs = ByteBuffer.allocate(b_pts.length); 3435 us.put(u_pts); 3436 bs.put(b_pts); 3437 3438 bs.limit(bs.position()); 3439 bs.position(0); 3440 us.limit(us.position()); 3441 us.position(0); 3442 3443 try { 3444 smBufDecode(decoder, "ISCII-update", bs, us, true, true); 3445 bs.position(0); 3446 us.position(0); 3447 smBufEncode(encoder, "ISCII-update", us, bs, true, true); 3448 } catch (Exception ex) { 3449 errln("Error occurred while encoding/decoding ISCII with the new characters."); 3450 } 3451 3452 //The rest of the code in this method is to provide better code coverage 3453 CharBuffer ccus = CharBuffer.allocate(0x10); 3454 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 3455 3456 //start of charset decoder code coverage code 3457 //test overflow buffer 3458 ccbs.put((byte)0x49); 3459 3460 ccbs.limit(ccbs.position()); 3461 ccbs.position(0); 3462 ccus.limit(0); 3463 ccus.position(0); 3464 3465 try { 3466 smBufDecode(decoder, "ISCII-CC-DE-1", ccbs, ccus, true, false); 3467 errln("Exception while decoding ISCII should have been thrown."); 3468 } 3469 catch (Exception ex) { 3470 } 3471 3472 ccbs.clear(); 3473 ccus.clear(); 3474 3475 //test atr overflow buffer 3476 ccbs.put((byte)0xEF); ccbs.put((byte)0x40); ccbs.put((byte)0xEF); ccbs.put((byte)0x20); 3477 ccus.put((char)0x00); 3478 3479 ccbs.limit(ccbs.position()); 3480 ccbs.position(0); 3481 ccus.limit(ccus.position()); 3482 ccus.position(0); 3483 3484 try { 3485 smBufDecode(decoder, "ISCII-CC-DE-2", ccbs, ccus, true, false); 3486 errln("Exception while decoding ISCII should have been thrown."); 3487 } 3488 catch (Exception ex) { 3489 } 3490 3491 //end of charset decoder code coverage code 3492 3493 ccbs.clear(); 3494 ccus.clear(); 3495 3496 //start of charset encoder code coverage code 3497 //test ascii overflow buffer 3498 ccus.put((char)0x41); 3499 3500 ccus.limit(ccus.position()); 3501 ccus.position(0); 3502 ccbs.limit(0); 3503 ccbs.position(0); 3504 3505 try { 3506 smBufEncode(encoder, "ISCII-CC-EN-1", ccus, ccbs, true, false); 3507 errln("Exception while encoding ISCII should have been thrown."); 3508 } 3509 catch (Exception ex) { 3510 } 3511 3512 ccbs.clear(); 3513 ccus.clear(); 3514 3515 //test ascii overflow buffer 3516 ccus.put((char)0x0A); ccus.put((char)0x0043); 3517 ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3518 3519 ccus.limit(ccus.position()); 3520 ccus.position(0); 3521 ccbs.limit(ccbs.position()); 3522 ccbs.position(0); 3523 3524 try { 3525 smBufEncode(encoder, "ISCII-CC-EN-2", ccus, ccbs, true, false); 3526 errln("Exception while encoding ISCII should have been thrown."); 3527 } 3528 catch (Exception ex) { 3529 } 3530 3531 ccbs.clear(); 3532 ccus.clear(); 3533 3534 //test surrogate malform 3535 ccus.put((char)0x06E3); 3536 ccbs.put((byte)0x00); 3537 3538 ccus.limit(ccus.position()); 3539 ccus.position(0); 3540 ccbs.limit(ccbs.position()); 3541 ccbs.position(0); 3542 3543 try { 3544 smBufEncode(encoder, "ISCII-CC-EN-3", ccus, ccbs, true, false); 3545 errln("Exception while encoding ISCII should have been thrown."); 3546 } 3547 catch (Exception ex) { 3548 } 3549 3550 ccbs.clear(); 3551 ccus.clear(); 3552 3553 //test surrogate malform 3554 ccus.put((char)0xD801); ccus.put((char)0xDD01); 3555 ccbs.put((byte)0x00); 3556 3557 ccus.limit(ccus.position()); 3558 ccus.position(0); 3559 ccbs.limit(ccbs.position()); 3560 ccbs.position(0); 3561 3562 try { 3563 smBufEncode(encoder, "ISCII-CC-EN-4", ccus, ccbs, true, false); 3564 errln("Exception while encoding ISCII should have been thrown."); 3565 } 3566 catch (Exception ex) { 3567 } 3568 3569 ccbs.clear(); 3570 ccus.clear(); 3571 3572 //test trail surrogate malform 3573 ccus.put((char)0xDD01); 3574 ccbs.put((byte)0x00); 3575 3576 ccus.limit(ccus.position()); 3577 ccus.position(0); 3578 ccbs.limit(ccbs.position()); 3579 ccbs.position(0); 3580 3581 try { 3582 smBufEncode(encoder, "ISCII-CC-EN-5", ccus, ccbs, true, false); 3583 errln("Exception while encoding ISCII should have been thrown."); 3584 } 3585 catch (Exception ex) { 3586 } 3587 3588 ccbs.clear(); 3589 ccus.clear(); 3590 3591 //test lead surrogates malform 3592 ccus.put((char)0xD801); ccus.put((char)0xD802); 3593 ccbs.put((byte)0x00); 3594 3595 ccus.limit(ccus.position()); 3596 ccus.position(0); 3597 ccbs.limit(ccbs.position()); 3598 ccbs.position(0); 3599 3600 try { 3601 smBufEncode(encoder, "ISCII-CC-EN-6", ccus, ccbs, true, false); 3602 errln("Exception while encoding ISCII should have been thrown."); 3603 } 3604 catch (Exception ex) { 3605 } 3606 3607 ccus.clear(); 3608 ccbs.clear(); 3609 3610 //test overflow buffer 3611 ccus.put((char)0x0901); 3612 ccbs.put((byte)0x00); 3613 3614 ccus.limit(ccus.position()); 3615 ccus.position(0); 3616 ccbs.limit(ccbs.position()); 3617 ccbs.position(0); 3618 3619 cs = provider.charsetForName("ISCII,version=0"); 3620 encoder = cs.newEncoder(); 3621 3622 try { 3623 smBufEncode(encoder, "ISCII-CC-EN-7", ccus, ccbs, true, false); 3624 errln("Exception while encoding ISCII should have been thrown."); 3625 } 3626 catch (Exception ex) { 3627 } 3628 //end of charset encoder code coverage code 3629 } 3630 3631 //Test for the IMAP Charset 3632 @Test TestCharsetIMAP()3633 public void TestCharsetIMAP() { 3634 CharsetProvider provider = new CharsetProviderICU(); 3635 Charset cs = provider.charsetForName("IMAP-mailbox-name"); 3636 CharsetEncoder encoder = cs.newEncoder(); 3637 CharsetDecoder decoder = cs.newDecoder(); 3638 3639 CharBuffer us = CharBuffer.allocate(0x20); 3640 ByteBuffer bs = ByteBuffer.allocate(0x20); 3641 3642 us.put((char)0x00A3); us.put((char)0x2020); us.put((char)0x41); 3643 3644 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x4B); bs.put((byte)0x4D); bs.put((byte)0x67); bs.put((byte)0x49); 3645 bs.put((byte)0x41); bs.put((byte)0x2D); bs.put((byte)0x41); 3646 3647 3648 bs.limit(bs.position()); 3649 bs.position(0); 3650 us.limit(us.position()); 3651 us.position(0); 3652 3653 smBufDecode(decoder, "IMAP", bs, us); 3654 smBufEncode(encoder, "IMAP", us, bs); 3655 3656 //the rest of the code in this method is for better code coverage 3657 us.clear(); 3658 bs.clear(); 3659 3660 //start of charset encoder code coverage 3661 //test buffer overflow 3662 us.put((char)0x0026); us.put((char)0x17A9); 3663 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3664 3665 bs.limit(bs.position()); 3666 bs.position(0); 3667 us.limit(us.position()); 3668 us.position(0); 3669 3670 try { 3671 smBufEncode(encoder, "IMAP-EN-1", us, bs, true, false); 3672 errln("Exception while encoding IMAP (1) should have been thrown."); 3673 } catch(Exception ex) { 3674 } 3675 3676 us.clear(); 3677 bs.clear(); 3678 3679 //test buffer overflow 3680 us.put((char)0x17A9); us.put((char)0x0941); 3681 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3682 3683 bs.limit(bs.position()); 3684 bs.position(0); 3685 us.limit(us.position()); 3686 us.position(0); 3687 3688 try { 3689 smBufEncode(encoder, "IMAP-EN-2", us, bs, true, false); 3690 errln("Exception while encoding IMAP (2) should have been thrown."); 3691 } catch(Exception ex) { 3692 } 3693 3694 us.clear(); 3695 bs.clear(); 3696 3697 //test buffer overflow 3698 us.put((char)0x17A9); us.put((char)0x0941); 3699 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3700 3701 bs.limit(bs.position()); 3702 bs.position(0); 3703 us.limit(us.position()); 3704 us.position(0); 3705 3706 try { 3707 smBufEncode(encoder, "IMAP-EN-3", us, bs, true, false); 3708 errln("Exception while encoding IMAP (3) should have been thrown."); 3709 } catch(Exception ex) { 3710 } 3711 3712 us.clear(); 3713 bs.clear(); 3714 3715 //test buffer overflow 3716 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3717 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3718 bs.put((byte)0x00); 3719 3720 bs.limit(bs.position()); 3721 bs.position(0); 3722 us.limit(us.position()); 3723 us.position(0); 3724 3725 try { 3726 smBufEncode(encoder, "IMAP-EN-4", us, bs, true, false); 3727 errln("Exception while encoding IMAP (4) should have been thrown."); 3728 } catch(Exception ex) { 3729 } 3730 3731 us.clear(); 3732 bs.clear(); 3733 3734 //test buffer overflow 3735 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3736 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3737 bs.put((byte)0x00); bs.put((byte)0x00); 3738 3739 bs.limit(bs.position()); 3740 bs.position(0); 3741 us.limit(us.position()); 3742 us.position(0); 3743 3744 try { 3745 smBufEncode(encoder, "IMAP-EN-5", us, bs, true, false); 3746 errln("Exception while encoding IMAP (5) should have been thrown."); 3747 } catch(Exception ex) { 3748 } 3749 3750 us.clear(); 3751 bs.clear(); 3752 3753 //test buffer overflow 3754 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); us.put((char)0x0970); 3755 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3756 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3757 3758 bs.limit(bs.position()); 3759 bs.position(0); 3760 us.limit(us.position()); 3761 us.position(0); 3762 3763 try { 3764 smBufEncode(encoder, "IMAP-EN-6", us, bs, true, false); 3765 errln("Exception while encoding IMAP (6) should have been thrown."); 3766 } catch(Exception ex) { 3767 } 3768 3769 us.clear(); 3770 bs.clear(); 3771 3772 //test buffer overflow 3773 us.put((char)0x17A9); us.put((char)0x0941); 3774 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3775 bs.put((byte)0x00); 3776 3777 bs.limit(bs.position()); 3778 bs.position(0); 3779 us.limit(us.position()); 3780 us.position(0); 3781 3782 try { 3783 smBufEncode(encoder, "IMAP-EN-7", us, bs, true, true); 3784 errln("Exception while encoding IMAP (7) should have been thrown."); 3785 } catch(Exception ex) { 3786 } 3787 3788 us.clear(); 3789 bs.clear(); 3790 3791 //test flushing 3792 us.put((char)0x17A9); us.put((char)0x0941); 3793 bs.put((byte)0x26); bs.put((byte)0x46); bs.put((byte)0x36); bs.put((byte)0x6b); bs.put((byte)0x4a); bs.put((byte)0x51); 3794 bs.put((byte)0x51); bs.put((byte)0x2d); 3795 3796 bs.limit(bs.position()); 3797 bs.position(0); 3798 us.limit(us.position()); 3799 us.position(0); 3800 3801 try { 3802 smBufEncode(encoder, "IMAP-EN-8", us, bs, true, true); 3803 } catch(Exception ex) { 3804 errln("Exception while encoding IMAP (8) should not have been thrown."); 3805 } 3806 3807 us = CharBuffer.allocate(0x08); 3808 bs = ByteBuffer.allocate(0x08); 3809 3810 //test flushing buffer overflow 3811 us.put((char)0x0061); 3812 bs.put((byte)0x61); bs.put((byte)0x00); 3813 3814 bs.limit(bs.position()); 3815 bs.position(0); 3816 us.limit(us.position()); 3817 us.position(0); 3818 3819 try { 3820 smBufEncode(encoder, "IMAP-EN-9", us, bs, true, true); 3821 } catch(Exception ex) { 3822 errln("Exception while encoding IMAP (9) should not have been thrown."); 3823 } 3824 //end of charset encoder code coverage 3825 3826 us = CharBuffer.allocate(0x10); 3827 bs = ByteBuffer.allocate(0x10); 3828 3829 //start of charset decoder code coverage 3830 //test malform case 2 3831 us.put((char)0x0000); us.put((char)0x0000); 3832 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x43); bs.put((byte)0x41); 3833 3834 bs.limit(bs.position()); 3835 bs.position(0); 3836 us.limit(us.position()); 3837 us.position(0); 3838 3839 try { 3840 smBufDecode(decoder, "IMAP-DE-1", bs, us, true, false); 3841 errln("Exception while decoding IMAP (1) should have been thrown."); 3842 } catch(Exception ex) { 3843 } 3844 3845 us.clear(); 3846 bs.clear(); 3847 3848 //test malform case 5 3849 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3850 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3851 bs.put((byte)0x41); bs.put((byte)0x49); bs.put((byte)0x41); 3852 3853 bs.limit(bs.position()); 3854 bs.position(0); 3855 us.limit(us.position()); 3856 us.position(0); 3857 3858 try { 3859 smBufDecode(decoder, "IMAP-DE-2", bs, us, true, false); 3860 errln("Exception while decoding IMAP (2) should have been thrown."); 3861 } catch(Exception ex) { 3862 } 3863 3864 us.clear(); 3865 bs.clear(); 3866 3867 //test malform case 7 3868 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3869 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3870 bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x42); 3871 bs.put((byte)0x41); 3872 3873 bs.limit(bs.position()); 3874 bs.position(0); 3875 us.limit(us.position()); 3876 us.position(0); 3877 3878 try { 3879 smBufDecode(decoder, "IMAP-DE-3", bs, us, true, false); 3880 errln("Exception while decoding IMAP (3) should have been thrown."); 3881 } catch(Exception ex) { 3882 } 3883 //end of charset decoder coder coverage 3884 } 3885 3886 //Test for charset UTF32LE to provide better code coverage 3887 @Test TestCharsetUTF32LE()3888 public void TestCharsetUTF32LE() { 3889 CoderResult result = CoderResult.UNDERFLOW; 3890 CharsetProvider provider = new CharsetProviderICU(); 3891 Charset cs = provider.charsetForName("UTF-32LE"); 3892 CharsetEncoder encoder = cs.newEncoder(); 3893 //CharsetDecoder decoder = cs.newDecoder(); 3894 3895 CharBuffer us = CharBuffer.allocate(0x10); 3896 ByteBuffer bs = ByteBuffer.allocate(0x10); 3897 3898 3899 //test malform surrogate 3900 us.put((char)0xD901); 3901 bs.put((byte)0x00); 3902 3903 bs.limit(bs.position()); 3904 bs.position(0); 3905 us.limit(us.position()); 3906 us.position(0); 3907 3908 try { 3909 smBufEncode(encoder, "UTF32LE-EN-1", us, bs, true, false); 3910 errln("Exception while encoding UTF32LE (1) should have been thrown."); 3911 } catch (Exception ex) { 3912 } 3913 3914 bs.clear(); 3915 us.clear(); 3916 3917 //test malform surrogate 3918 us.put((char)0xD901); us.put((char)0xD902); 3919 bs.put((byte)0x00); 3920 3921 bs.limit(bs.position()); 3922 bs.position(0); 3923 us.limit(us.position()); 3924 us.position(0); 3925 3926 result = encoder.encode(us, bs, true); 3927 3928 if (!result.isError() && !result.isOverflow()) { 3929 errln("Error while encoding UTF32LE (2) should have occurred."); 3930 } 3931 3932 bs.clear(); 3933 us.clear(); 3934 3935 //test overflow trail surrogate 3936 us.put((char)0xDD01); us.put((char)0xDD0E); us.put((char)0xDD0E); 3937 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3938 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3939 3940 bs.limit(bs.position()); 3941 bs.position(0); 3942 us.limit(us.position()); 3943 us.position(0); 3944 3945 result = encoder.encode(us, bs, true); 3946 3947 if (!result.isError() && !result.isOverflow()) { 3948 errln("Error while encoding UTF32LE (3) should have occurred."); 3949 } 3950 3951 bs.clear(); 3952 us.clear(); 3953 3954 //test malform lead surrogate 3955 us.put((char)0xD90D); us.put((char)0xD90E); 3956 bs.put((byte)0x00); 3957 3958 bs.limit(bs.position()); 3959 bs.position(0); 3960 us.limit(us.position()); 3961 us.position(0); 3962 3963 try { 3964 smBufEncode(encoder, "UTF32LE-EN-4", us, bs, true, false); 3965 errln("Exception while encoding UTF32LE (4) should have been thrown."); 3966 } catch (Exception ex) { 3967 } 3968 3969 bs.clear(); 3970 us.clear(); 3971 3972 //test overflow buffer 3973 us.put((char)0x0061); 3974 bs.put((byte)0x00); 3975 3976 bs.limit(bs.position()); 3977 bs.position(0); 3978 us.limit(us.position()); 3979 us.position(0); 3980 3981 try { 3982 smBufEncode(encoder, "UTF32LE-EN-5", us, bs, true, false); 3983 errln("Exception while encoding UTF32LE (5) should have been thrown."); 3984 } catch (Exception ex) { 3985 } 3986 3987 bs.clear(); 3988 us.clear(); 3989 3990 //test malform trail surrogate 3991 us.put((char)0xDD01); 3992 bs.put((byte)0x00); 3993 3994 bs.limit(bs.position()); 3995 bs.position(0); 3996 us.limit(us.position()); 3997 us.position(0); 3998 3999 try { 4000 smBufEncode(encoder, "UTF32LE-EN-6", us, bs, true, false); 4001 errln("Exception while encoding UTF32LE (6) should have been thrown."); 4002 } catch (Exception ex) { 4003 } 4004 } 4005 4006 //Test for charset UTF16LE to provide better code coverage 4007 @Test TestCharsetUTF16LE()4008 public void TestCharsetUTF16LE() { 4009 CoderResult result = CoderResult.UNDERFLOW; 4010 CharsetProvider provider = new CharsetProviderICU(); 4011 Charset cs = provider.charsetForName("UTF-16LE"); 4012 CharsetEncoder encoder = cs.newEncoder(); 4013 //CharsetDecoder decoder = cs.newDecoder(); 4014 4015 // Test for malform and change fromUChar32 for next call 4016 char u_pts1[] = { 4017 (char)0xD805, 4018 (char)0xDC01, (char)0xDC02, (char)0xDC03, 4019 (char)0xD901, (char)0xD902 4020 }; 4021 byte b_pts1[] = { 4022 (byte)0x00, 4023 (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00 4024 }; 4025 4026 CharBuffer us = CharBuffer.allocate(u_pts1.length); 4027 ByteBuffer bs = ByteBuffer.allocate(b_pts1.length); 4028 4029 us.put(u_pts1); 4030 bs.put(b_pts1); 4031 4032 us.limit(1); 4033 us.position(0); 4034 bs.limit(1); 4035 bs.position(0); 4036 4037 result = encoder.encode(us, bs, true); 4038 4039 if (!result.isMalformed()) { 4040 // LE should not output BOM, so this should be malformed 4041 errln("Malformed while encoding UTF-16LE (1) should have occured."); 4042 } 4043 4044 // Test for malform surrogate from previous buffer 4045 us.limit(4); 4046 us.position(1); 4047 bs.limit(7); 4048 bs.position(1); 4049 4050 result = encoder.encode(us, bs, true); 4051 4052 if (!result.isMalformed()) { 4053 errln("Error while encoding UTF-16LE (2) should have occured."); 4054 } 4055 4056 // Test for malform trail surrogate 4057 encoder.reset(); 4058 4059 us.limit(1); 4060 us.position(0); 4061 bs.limit(1); 4062 bs.position(0); 4063 4064 result = encoder.encode(us, bs, true); 4065 4066 us.limit(6); 4067 us.position(4); 4068 bs.limit(4); 4069 bs.position(1); 4070 4071 result = encoder.encode(us, bs, true); 4072 4073 if (!result.isMalformed()) { 4074 errln("Error while encoding UTF-16LE (3) should have occured."); 4075 } 4076 } 4077 4078 //provide better code coverage for the generic charset UTF32 4079 @Test TestCharsetUTF32()4080 public void TestCharsetUTF32() { 4081 CoderResult result = CoderResult.UNDERFLOW; 4082 CharsetProvider provider = new CharsetProviderICU(); 4083 Charset cs = provider.charsetForName("UTF-32"); 4084 CharsetDecoder decoder = cs.newDecoder(); 4085 CharsetEncoder encoder = cs.newEncoder(); 4086 4087 //start of decoding code coverage 4088 char us_array[] = { 4089 0x0000, 0x0000, 0x0000, 0x0000, 4090 }; 4091 4092 byte bs_array1[] = { 4093 (byte)0x00, (byte)0x00, (byte)0xFE, (byte)0xFF, 4094 (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x43, 4095 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4096 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4097 }; 4098 4099 byte bs_array2[] = { 4100 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4101 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4102 }; 4103 4104 CharBuffer us = CharBuffer.allocate(us_array.length); 4105 ByteBuffer bs = ByteBuffer.allocate(bs_array1.length); 4106 4107 us.put(us_array); 4108 bs.put(bs_array1); 4109 4110 us.limit(us.position()); 4111 us.position(0); 4112 bs.limit(bs.position()); 4113 bs.position(0); 4114 4115 try { 4116 smBufDecode(decoder, "UTF32-DE-1", bs, us, true, false); 4117 errln("Malform exception while decoding UTF32 charset (1) should have been thrown."); 4118 } catch (Exception ex) { 4119 } 4120 4121 decoder = cs.newDecoder(); 4122 4123 bs = ByteBuffer.allocate(bs_array2.length); 4124 bs.put(bs_array2); 4125 4126 us.limit(4); 4127 us.position(0); 4128 bs.limit(bs.position()); 4129 bs.position(0); 4130 4131 try { 4132 smBufDecode(decoder, "UTF32-DE-2", bs, us, true, false); 4133 } catch (Exception ex) { 4134 // should recognize little endian BOM 4135 errln("Exception while decoding UTF32 charset (2) should not have been thrown."); 4136 } 4137 4138 //Test malform exception 4139 bs.clear(); 4140 us.clear(); 4141 4142 bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); bs.put((byte)0x00); bs.put((byte)0x00); 4143 us.put((char)0x0000); 4144 4145 us.limit(us.position()); 4146 us.position(0); 4147 bs.limit(bs.position()); 4148 bs.position(0); 4149 4150 try { 4151 smBufDecode(decoder, "UTF32-DE-3", bs, us, true, false); 4152 errln("Malform exception while decoding UTF32 charset (3) should have been thrown."); 4153 } catch (Exception ex) { 4154 } 4155 4156 //Test BOM testing 4157 bs.clear(); 4158 us.clear(); 4159 4160 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFF); bs.put((byte)0xFE); 4161 us.put((char)0x0000); 4162 4163 us.limit(us.position()); 4164 us.position(0); 4165 bs.limit(bs.position()); 4166 bs.position(0); 4167 4168 try { 4169 smBufDecode(decoder, "UTF32-DE-4", bs, us, true, false); 4170 } catch (Exception ex) { 4171 // should recognize big endian BOM 4172 errln("Exception while decoding UTF32 charset (4) should not have been thrown."); 4173 } 4174 //end of decoding code coverage 4175 4176 //start of encoding code coverage 4177 us = CharBuffer.allocate(0x10); 4178 bs = ByteBuffer.allocate(0x10); 4179 4180 //test wite BOM overflow error 4181 us.put((char)0xDC01); 4182 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4183 4184 us.limit(us.position()); 4185 us.position(0); 4186 bs.limit(bs.position()); 4187 bs.position(0); 4188 4189 result = encoder.encode(us, bs, true); 4190 // must try to output BOM first for UTF-32 (not UTF-32BE or UTF-32LE) 4191 if (!result.isOverflow()) { 4192 errln("Buffer overflow error while encoding UTF32 charset (1) should have occurred."); 4193 } 4194 4195 us.clear(); 4196 bs.clear(); 4197 4198 //test malform surrogate and store value in fromChar32 4199 us.put((char)0xD801); us.put((char)0xD802); 4200 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4201 4202 us.limit(us.position()); 4203 us.position(0); 4204 bs.limit(bs.position()); 4205 bs.position(0); 4206 4207 result = encoder.encode(us, bs, true); 4208 if (!result.isMalformed()) { 4209 errln("Malformed error while encoding UTF32 charset (2) should have occurred."); 4210 } 4211 4212 us.clear(); 4213 bs.clear(); 4214 4215 //test malform surrogate 4216 us.put((char)0x0000); us.put((char)0xD902); 4217 4218 us.limit(us.position()); 4219 us.position(0); 4220 bs.limit(bs.position()); 4221 bs.position(0); 4222 4223 result = encoder.encode(us, bs, true); 4224 if (!result.isOverflow()) { 4225 errln("Overflow error while encoding UTF32 charset (3) should have occurred."); 4226 } 4227 4228 us.clear(); 4229 bs.clear(); 4230 4231 //test malform surrogate 4232 encoder.reset(); 4233 us.put((char)0xD801); 4234 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4235 4236 us.limit(us.position()); 4237 us.position(0); 4238 bs.limit(bs.position()); 4239 bs.position(0); 4240 4241 result = encoder.encode(us, bs, true); 4242 if (!result.isMalformed()) { 4243 errln("Malform error while encoding UTF32 charset (4) should have occurred."); 4244 } 4245 4246 us.clear(); 4247 bs.clear(); 4248 4249 //test overflow surrogate 4250 us.put((char)0x0000); us.put((char)0xDDE1); us.put((char)0xD915); us.put((char)0xDDF2); 4251 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4252 4253 us.limit(us.position()); 4254 us.position(0); 4255 bs.limit(bs.position()); 4256 bs.position(0); 4257 4258 result = encoder.encode(us, bs, true); 4259 if (!result.isOverflow()) { 4260 errln("Overflow error while encoding UTF32 charset (5) should have occurred."); 4261 } 4262 4263 us.clear(); 4264 bs.clear(); 4265 4266 //test malform surrogate 4267 encoder.reset(); 4268 us.put((char)0xDDE1); 4269 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4270 4271 us.limit(us.position()); 4272 us.position(0); 4273 bs.limit(bs.position()); 4274 bs.position(0); 4275 4276 result = encoder.encode(us, bs, true); 4277 if (!result.isMalformed()) { 4278 errln("Malform error while encoding UTF32 charset (6) should have occurred."); 4279 } 4280 //end of encoding code coverage 4281 } 4282 4283 //this method provides better code coverage decoding UTF32 LE/BE 4284 @Test TestDecodeUTF32LEBE()4285 public void TestDecodeUTF32LEBE() { 4286 CoderResult result = CoderResult.UNDERFLOW; 4287 CharsetProvider provider = new CharsetProviderICU(); 4288 CharsetDecoder decoder; 4289 CharBuffer us = CharBuffer.allocate(0x10); 4290 ByteBuffer bs = ByteBuffer.allocate(0x10); 4291 4292 //decode UTF32LE 4293 decoder = provider.charsetForName("UTF-32LE").newDecoder(); 4294 //test overflow buffer 4295 bs.put((byte)0x41); bs.put((byte)0xFF); bs.put((byte)0x01); bs.put((byte)0x00); 4296 us.put((char)0x0000); 4297 4298 us.limit(us.position()); 4299 us.position(0); 4300 bs.limit(bs.position()); 4301 bs.position(0); 4302 4303 try { 4304 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4305 errln("Overflow exception while decoding UTF32LE (1) should have been thrown."); 4306 } catch (Exception ex) { 4307 } 4308 // test overflow buffer handling in CharsetDecoderICU 4309 bs.position(0); 4310 us.position(0); 4311 decoder.reset(); 4312 result = decoder.decode(bs, us, true); 4313 if (result.isOverflow()) { 4314 result = decoder.decode(bs, us, true); 4315 if (!result.isOverflow()) { 4316 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4317 } 4318 } else { 4319 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4320 } 4321 4322 us.clear(); 4323 bs.clear(); 4324 //test malform buffer 4325 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4326 us.put((char)0x0000); 4327 4328 us.limit(us.position()); 4329 us.position(0); 4330 bs.limit(bs.position()); 4331 bs.position(0); 4332 4333 try { 4334 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4335 errln("Malform exception while decoding UTF32LE (2) should have been thrown."); 4336 } catch (Exception ex) { 4337 } 4338 4339 us.clear(); 4340 bs.clear(); 4341 //test malform buffer 4342 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4343 bs.put((byte)0xFF); bs.put((byte)0xDF); bs.put((byte)0x10); 4344 us.put((char)0x0000); 4345 4346 us.limit(us.position()); 4347 us.position(0); 4348 bs.limit(bs.position()); 4349 bs.position(0); 4350 4351 try { 4352 // must flush in order to exhibit malformed behavior 4353 smBufDecode(decoder, "UTF-32LE", bs, us, true, true); 4354 errln("Malform exception while decoding UTF32LE (3) should have been thrown."); 4355 } catch (Exception ex) { 4356 } 4357 4358 us.clear(); 4359 bs.clear(); 4360 //test malform buffer 4361 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4362 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4363 us.put((char)0x0000); 4364 4365 us.limit(us.position()); 4366 us.position(0); 4367 bs.limit(bs.position()); 4368 bs.position(0); 4369 4370 try { 4371 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4372 errln("Malform exception while decoding UTF32LE (4) should have been thrown."); 4373 } catch (Exception ex) { 4374 } 4375 4376 us.clear(); 4377 bs.clear(); 4378 //test overflow buffer 4379 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4380 bs.put((byte)0xDD); bs.put((byte)0xFF); bs.put((byte)0x10); bs.put((byte)0x00); 4381 us.put((char)0x0000); 4382 4383 us.limit(us.position()); 4384 us.position(0); 4385 bs.limit(bs.position()); 4386 bs.position(0); 4387 4388 try { 4389 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4390 errln("Overflow exception while decoding UTF32LE (5) should have been thrown."); 4391 } catch (Exception ex) { 4392 } 4393 //end of decode UTF32LE 4394 4395 bs.clear(); 4396 us.clear(); 4397 4398 //decode UTF32BE 4399 decoder = provider.charsetForName("UTF-32BE").newDecoder(); 4400 //test overflow buffer 4401 bs.put((byte)0x00); bs.put((byte)0x01); bs.put((byte)0xFF); bs.put((byte)0x41); 4402 us.put((char)0x0000); 4403 4404 us.limit(us.position()); 4405 us.position(0); 4406 bs.limit(bs.position()); 4407 bs.position(0); 4408 4409 try { 4410 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4411 errln("Overflow exception while decoding UTF32BE (1) should have been thrown."); 4412 } catch (Exception ex) { 4413 } 4414 4415 bs.clear(); 4416 us.clear(); 4417 //test malform buffer 4418 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xD9); bs.put((byte)0x02); 4419 us.put((char)0x0000); 4420 4421 us.limit(us.position()); 4422 us.position(0); 4423 bs.limit(bs.position()); 4424 bs.position(0); 4425 4426 try { 4427 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4428 errln("Malform exception while decoding UTF32BE (2) should have been thrown."); 4429 } catch (Exception ex) { 4430 } 4431 4432 bs.clear(); 4433 us.clear(); 4434 //test malform buffer 4435 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4436 bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDF); 4437 us.put((char)0x0000); 4438 4439 us.limit(us.position()); 4440 us.position(0); 4441 bs.limit(bs.position()); 4442 bs.position(0); 4443 4444 try { 4445 // must flush to exhibit malformed behavior 4446 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4447 errln("Malform exception while decoding UTF32BE (3) should have been thrown."); 4448 } catch (Exception ex) { 4449 } 4450 4451 bs.clear(); 4452 us.clear(); 4453 //test overflow buffer 4454 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4455 bs.put((byte)0x00); bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDD); 4456 us.put((char)0x0000); 4457 4458 us.limit(us.position()); 4459 us.position(0); 4460 bs.limit(bs.position()); 4461 bs.position(0); 4462 4463 try { 4464 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4465 errln("Overflow exception while decoding UTF32BE (4) should have been thrown."); 4466 } catch (Exception ex) { 4467 } 4468 4469 bs.clear(); 4470 us.clear(); 4471 //test malform buffer 4472 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); 4473 us.put((char)0x0000); 4474 4475 us.limit(us.position()); 4476 us.position(0); 4477 bs.limit(bs.position()); 4478 bs.position(0); 4479 4480 try { 4481 // must flush to exhibit malformed behavior 4482 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4483 errln("Malform exception while decoding UTF32BE (5) should have been thrown."); 4484 } catch (Exception ex) { 4485 } 4486 //end of decode UTF32BE 4487 } 4488 4489 //provide better code coverage for UTF8 4490 @Test TestCharsetUTF8()4491 public void TestCharsetUTF8() { 4492 CoderResult result = CoderResult.UNDERFLOW; 4493 CharsetProvider provider = new CharsetProviderICU(); 4494 CharsetDecoder decoder = provider.charsetForName("UTF-8").newDecoder(); 4495 CharsetEncoder encoder = provider.charsetForName("UTF-8").newEncoder(); 4496 4497 CharBuffer us = CharBuffer.allocate(0x10); 4498 ByteBuffer bs = ByteBuffer.allocate(0x10); 4499 ByteBuffer bs2; 4500 CharBuffer us2; 4501 int limit_us; 4502 int limit_bs; 4503 4504 //encode and decode using read only buffer 4505 encoder.reset(); 4506 decoder.reset(); 4507 us.put((char)0x0041); us.put((char)0x0081); us.put((char)0xEF65); us.put((char)0xD902); 4508 bs.put((byte)0x41); bs.put((byte)0xc2); bs.put((byte)0x81); bs.put((byte)0xee); bs.put((byte)0xbd); bs.put((byte)0xa5); 4509 bs.put((byte)0x00); 4510 limit_us = us.position(); 4511 limit_bs = bs.position(); 4512 4513 us.limit(limit_us); 4514 us.position(0); 4515 bs.limit(limit_bs); 4516 bs.position(0); 4517 bs2 = bs.asReadOnlyBuffer(); 4518 us2 = us.asReadOnlyBuffer(); 4519 4520 result = decoder.decode(bs2, us, true); 4521 if (!result.isUnderflow() || !equals(us, us2)) { 4522 errln("Error while decoding UTF-8 (1) should not have occured."); 4523 } 4524 4525 us2.limit(limit_us); 4526 us2.position(0); 4527 bs.limit(limit_bs); 4528 bs.position(0); 4529 4530 result = encoder.encode(us2, bs, true); 4531 if (!result.isUnderflow() || !equals(bs, bs2)) { 4532 errln("Error while encoding UTF-8 (1) should not have occured."); 4533 } 4534 4535 us.clear(); 4536 bs.clear(); 4537 4538 //test overflow buffer while encoding 4539 //readonly buffer 4540 encoder.reset(); 4541 us.put((char)0x0081); us.put((char)0xEF65); 4542 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4543 limit_us = us.position(); 4544 us2 = us.asReadOnlyBuffer(); 4545 us2.limit(limit_us); 4546 us2.position(0); 4547 bs.limit(1); 4548 bs.position(0); 4549 result = encoder.encode(us2, bs, true); 4550 if (!result.isOverflow()) { 4551 errln("Overflow Error should have occured while encoding UTF-8 (2)."); 4552 } 4553 4554 encoder.reset(); 4555 4556 us2.limit(limit_us); 4557 us2.position(1); 4558 bs.limit(1); 4559 bs.position(0); 4560 result = encoder.encode(us2, bs, true); 4561 if (!result.isOverflow()) { 4562 errln("Overflow Error should have occured while encoding UTF-8 (3)."); 4563 } 4564 4565 encoder.reset(); 4566 4567 us2.limit(limit_us); 4568 us2.position(1); 4569 bs.limit(2); 4570 bs.position(0); 4571 result = encoder.encode(us2, bs, true); 4572 if (!result.isOverflow()) { 4573 errln("Overflow Error should have occured while encoding UTF-8 (4)."); 4574 } 4575 4576 encoder.reset(); 4577 4578 us2.limit(limit_us); 4579 us2.position(0); 4580 bs.limit(2); 4581 bs.position(0); 4582 result = encoder.encode(us2, bs, true); 4583 if (!result.isOverflow()) { 4584 errln("Overflow Error should have occured while encoding UTF-8 (5)."); 4585 } 4586 4587 //not readonly buffer 4588 encoder.reset(); 4589 4590 us.limit(limit_us); 4591 us.position(0); 4592 bs.limit(1); 4593 bs.position(0); 4594 result = encoder.encode(us, bs, true); 4595 if (!result.isOverflow()) { 4596 errln("Overflow Error should have occured while encoding UTF-8 (6)."); 4597 } 4598 4599 encoder.reset(); 4600 4601 us.limit(limit_us); 4602 us.position(0); 4603 bs.limit(3); 4604 bs.position(0); 4605 result = encoder.encode(us, bs, true); 4606 if (!result.isOverflow()) { 4607 errln("Overflow Error should have occured while encoding UTF-8 (7)."); 4608 } 4609 4610 encoder.reset(); 4611 4612 us.limit(limit_us); 4613 us.position(1); 4614 bs.limit(2); 4615 bs.position(0); 4616 result = encoder.encode(us, bs, true); 4617 if (!result.isOverflow()) { 4618 errln("Overflow Error should have occured while encoding UTF-8 (8)."); 4619 } 4620 4621 encoder.reset(); 4622 4623 us.limit(limit_us + 1); 4624 us.position(1); 4625 bs.limit(3); 4626 bs.position(0); 4627 result = encoder.encode(us, bs, true); 4628 if (!result.isOverflow()) { 4629 errln("Overflow Error should have occured while encoding UTF-8 (9)."); 4630 } 4631 4632 us.clear(); 4633 bs.clear(); 4634 4635 //test encoding 4 byte characters 4636 encoder.reset(); 4637 us.put((char)0xD902); us.put((char)0xDD02); us.put((char)0x0041); 4638 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4639 limit_us = us.position(); 4640 us2 = us.asReadOnlyBuffer(); 4641 us2.limit(limit_us); 4642 us2.position(0); 4643 bs.limit(1); 4644 bs.position(0); 4645 result = encoder.encode(us2, bs, true); 4646 if (!result.isOverflow()) { 4647 errln("Overflow Error should have occured while encoding UTF-8 (10)."); 4648 } 4649 4650 encoder.reset(); 4651 4652 us2.limit(limit_us); 4653 us2.position(0); 4654 bs.limit(2); 4655 bs.position(0); 4656 result = encoder.encode(us2, bs, true); 4657 if (!result.isOverflow()) { 4658 errln("Overflow Error should have occured while encoding UTF-8 (11)."); 4659 } 4660 4661 encoder.reset(); 4662 4663 us2.limit(limit_us); 4664 us2.position(0); 4665 bs.limit(3); 4666 bs.position(0); 4667 result = encoder.encode(us2, bs, true); 4668 if (!result.isOverflow()) { 4669 errln("Overflow Error should have occured while encoding UTF-8 (12)."); 4670 } 4671 4672 encoder.reset(); 4673 4674 us2.limit(limit_us); 4675 us2.position(0); 4676 bs.limit(4); 4677 bs.position(0); 4678 result = encoder.encode(us2, bs, true); 4679 if (!result.isOverflow()) { 4680 errln("Overflow Error should have occured while encoding UTF-8 (13)."); 4681 } 4682 4683 us.clear(); 4684 bs.clear(); 4685 4686 //decoding code coverage 4687 //test malform error 4688 decoder.reset(); 4689 bs.put((byte)0xC2); bs.put((byte)0xC2); 4690 us.put((char)0x0000); 4691 bs2 = bs.asReadOnlyBuffer(); 4692 4693 us.limit(1); 4694 us.position(0); 4695 bs2.limit(1); 4696 bs2.position(0); 4697 4698 result = decoder.decode(bs2, us, true); 4699 result = decoder.flush(us); 4700 if (!result.isMalformed()) { 4701 errln("Malform error should have occurred while decoding UTF-8 (1)."); 4702 } 4703 4704 us.limit(1); 4705 us.position(0); 4706 bs2.limit(1); 4707 bs2.position(0); 4708 4709 decoder.reset(); 4710 4711 result = decoder.decode(bs2, us, true); 4712 us.limit(1); 4713 us.position(0); 4714 bs2.limit(2); 4715 bs2.position(0); 4716 result = decoder.decode(bs2, us, true); 4717 if (!result.isMalformed()) { 4718 errln("Malform error should have occurred while decoding UTF-8 (2)."); 4719 } 4720 4721 us.clear(); 4722 bs.clear(); 4723 4724 //test overflow buffer 4725 bs.put((byte)0x01); bs.put((byte)0x41); 4726 us.put((char)0x0000); 4727 bs2 = bs.asReadOnlyBuffer(); 4728 us.limit(1); 4729 us.position(0); 4730 bs2.limit(2); 4731 bs2.position(0); 4732 4733 result = decoder.decode(bs2, us, true); 4734 if (!result.isOverflow()) { 4735 errln("Overflow error should have occurred while decoding UTF-8 (3)."); 4736 } 4737 4738 us.clear(); 4739 bs.clear(); 4740 4741 //test malform string 4742 decoder.reset(); 4743 bs.put((byte)0xF5); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4744 us.put((char)0x0000); 4745 bs2 = bs.asReadOnlyBuffer(); 4746 us.limit(1); 4747 us.position(0); 4748 bs2.limit(4); 4749 bs2.position(0); 4750 4751 result = decoder.decode(bs2, us, true); 4752 if (!result.isMalformed()) { 4753 errln("Malform error should have occurred while decoding UTF-8 (4)."); 4754 } 4755 4756 bs.clear(); 4757 4758 //test overflow 4759 decoder.reset(); 4760 bs.put((byte)0xF3); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4761 bs2 = bs.asReadOnlyBuffer(); 4762 us.limit(1); 4763 us.position(0); 4764 bs2.limit(4); 4765 bs2.position(0); 4766 4767 result = decoder.decode(bs2, us, true); 4768 if (!result.isOverflow()) { 4769 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4770 } 4771 4772 //test overflow 4773 decoder.reset(); 4774 us.limit(2); 4775 us.position(0); 4776 bs2.limit(5); 4777 bs2.position(0); 4778 4779 result = decoder.decode(bs2, us, true); 4780 if (!result.isOverflow()) { 4781 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4782 } 4783 4784 //test overflow 4785 decoder.reset(); 4786 us.limit(1); 4787 us.position(0); 4788 bs.limit(5); 4789 bs.position(0); 4790 4791 result = decoder.decode(bs, us, true); 4792 if (!result.isOverflow()) { 4793 errln("Overflow error should have occurred while decoding UTF-8 (6)."); 4794 } 4795 4796 bs.clear(); 4797 4798 //test overflow 4799 decoder.reset(); 4800 bs.put((byte)0x41); bs.put((byte)0x42); 4801 us.limit(1); 4802 us.position(0); 4803 bs.limit(2); 4804 bs.position(0); 4805 4806 result = decoder.decode(bs, us, true); 4807 if (!result.isOverflow()) { 4808 errln("Overflow error should have occurred while decoding UTF-8 (7)."); 4809 } 4810 4811 } 4812 4813 //provide better code coverage for Charset UTF16 4814 @Test TestCharsetUTF16()4815 public void TestCharsetUTF16() { 4816 CoderResult result = CoderResult.UNDERFLOW; 4817 CharsetProvider provider = new CharsetProviderICU(); 4818 CharsetDecoder decoder = provider.charsetForName("UTF-16").newDecoder(); 4819 CharsetEncoder encoder = provider.charsetForName("UTF-16").newEncoder(); 4820 4821 CharBuffer us = CharBuffer.allocate(0x10); 4822 ByteBuffer bs = ByteBuffer.allocate(0x10); 4823 4824 //test flush buffer and malform string 4825 bs.put((byte)0xFF); 4826 us.put((char)0x0000); 4827 4828 us.limit(us.position()); 4829 us.position(0); 4830 bs.limit(bs.position()); 4831 bs.position(0); 4832 4833 result = decoder.decode(bs, us, true); 4834 result = decoder.flush(us); 4835 if (!result.isMalformed()) { 4836 errln("Malform error while decoding UTF-16 should have occurred."); 4837 } 4838 4839 us.clear(); 4840 bs.clear(); 4841 4842 us.put((char)0xD902); us.put((char)0xDD01); us.put((char)0x0041); 4843 4844 us.limit(1); 4845 us.position(0); 4846 bs.limit(4); 4847 bs.position(0); 4848 4849 result = encoder.encode(us, bs, true); 4850 us.limit(3); 4851 us.position(0); 4852 bs.limit(3); 4853 bs.position(0); 4854 result = encoder.encode(us, bs, true); 4855 if (!result.isOverflow()) { 4856 errln("Overflow buffer while encoding UTF-16 should have occurred."); 4857 } 4858 4859 us.clear(); 4860 bs.clear(); 4861 4862 //test overflow buffer 4863 decoder.reset(); 4864 decoder = provider.charsetForName("UTF-16BE").newDecoder(); 4865 4866 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x41); 4867 4868 us.limit(0); 4869 us.position(0); 4870 bs.limit(3); 4871 bs.position(0); 4872 4873 result = decoder.decode(bs, us, true); 4874 if (!result.isOverflow()) { 4875 errln("Overflow buffer while decoding UTF-16 should have occurred."); 4876 } 4877 } 4878 4879 //provide better code coverage for Charset ISO-2022-KR 4880 @Test TestCharsetISO2022KR()4881 public void TestCharsetISO2022KR() { 4882 CoderResult result = CoderResult.UNDERFLOW; 4883 CharsetProvider provider = new CharsetProviderICU(); 4884 CharsetDecoder decoder = provider.charsetForName("ISO-2022-KR").newDecoder(); 4885 4886 byte bytearray[] = { 4887 (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x43, (byte)0x41, (byte)0x42, 4888 }; 4889 char chararray[] = { 4890 (char)0x0041 4891 }; 4892 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4893 CharBuffer cb = CharBuffer.wrap(chararray); 4894 4895 result = decoder.decode(bb, cb, true); 4896 4897 if (!result.isOverflow()) { 4898 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4899 } 4900 } 4901 4902 //provide better code coverage for Charset ISO-2022-JP 4903 @Test TestCharsetISO2022JP()4904 public void TestCharsetISO2022JP() { 4905 CoderResult result = CoderResult.UNDERFLOW; 4906 CharsetProvider provider = new CharsetProviderICU(); 4907 CharsetDecoder decoder = provider.charsetForName("ISO-2022-JP-2").newDecoder(); 4908 4909 byte bytearray[] = { 4910 (byte)0x1b, (byte)0x24, (byte)0x28, (byte)0x44, (byte)0x0A, (byte)0x41, 4911 }; 4912 char chararray[] = { 4913 (char)0x000A 4914 }; 4915 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4916 CharBuffer cb = CharBuffer.wrap(chararray); 4917 4918 result = decoder.decode(bb, cb, true); 4919 4920 if (!result.isOverflow()) { 4921 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4922 } 4923 } 4924 4925 //provide better code coverage for Charset ASCII 4926 @Test TestCharsetASCII()4927 public void TestCharsetASCII() { 4928 CoderResult result = CoderResult.UNDERFLOW; 4929 CharsetProvider provider = new CharsetProviderICU(); 4930 CharsetDecoder decoder = provider.charsetForName("US-ASCII").newDecoder(); 4931 4932 byte bytearray[] = { 4933 (byte)0x41 4934 }; 4935 char chararray[] = { 4936 (char)0x0041 4937 }; 4938 4939 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4940 CharBuffer cb = CharBuffer.wrap(chararray); 4941 4942 result = decoder.decode(bb, cb, true); 4943 result = decoder.flush(cb); 4944 4945 if (result.isError()) { 4946 errln("Error occurred while decoding US-ASCII."); 4947 } 4948 } 4949 4950 // provide better code coverage for Charset Callbacks 4951 /* Different aspects of callbacks are being tested including using different context available */ 4952 @Test TestCharsetCallbacks()4953 public void TestCharsetCallbacks() { 4954 CoderResult result = CoderResult.UNDERFLOW; 4955 CharsetProvider provider = new CharsetProviderICU(); 4956 CharsetEncoder encoder = provider.charsetForName("iso-2022-jp").newEncoder(); 4957 CharsetDecoder decoder = provider.charsetForName("iso-2022-jp").newDecoder(); 4958 4959 String context3[] = { 4960 "i", 4961 "J" 4962 }; 4963 4964 // Testing encoder escape callback 4965 String context1[] = { 4966 "J", 4967 "C", 4968 "D", 4969 null 4970 }; 4971 char chararray[] = { 4972 (char)0xd122 4973 }; 4974 ByteBuffer bb = ByteBuffer.allocate(20); 4975 CharBuffer cb = CharBuffer.wrap(chararray); 4976 4977 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.OVERFLOW, CharsetCallback.FROM_U_CALLBACK_ESCAPE, null); // This callback is not valid. 4978 for (int i = 0; i < context1.length; i++) { 4979 encoder.reset(); 4980 cb.position(0); 4981 bb.position(0); 4982 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_ESCAPE, context1[i]); // This callback is valid. 4983 4984 result = encoder.encode(cb, bb, true); 4985 if (result.isError()) { 4986 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4987 } 4988 } 4989 4990 // Testing encoder skip callback 4991 for (int i = 0; i < context3.length; i++) { 4992 encoder.reset(); 4993 cb.position(0); 4994 bb.position(0); 4995 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SKIP, context3[i]); 4996 4997 result = encoder.encode(cb, bb, true); 4998 if (result.isError() && i == 0) { 4999 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 5000 } 5001 } 5002 5003 // Testing encoder sub callback 5004 for (int i = 0; i < context3.length; i++) { 5005 encoder.reset(); 5006 cb.position(0); 5007 bb.position(0); 5008 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE, context3[i]); 5009 5010 result = encoder.encode(cb, bb, true); 5011 if (result.isError() && i == 0) { 5012 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 5013 } 5014 } 5015 5016 // Testing decoder escape callback 5017 String context2[] = { 5018 "X", 5019 "C", 5020 "D", 5021 null 5022 }; 5023 byte bytearray[] = { 5024 (byte)0x1b, (byte)0x2e, (byte)0x43 5025 }; 5026 bb = ByteBuffer.wrap(bytearray); 5027 cb = CharBuffer.allocate(20); 5028 5029 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_ESCAPE, null); // This callback is not valid. 5030 for (int i = 0; i < context2.length; i++) { 5031 decoder.reset(); 5032 cb.position(0); 5033 bb.position(0); 5034 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_ESCAPE, context2[i]); // This callback is valid. 5035 5036 result = decoder.decode(bb, cb, true); 5037 if (result.isError()) { 5038 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder."); 5039 } 5040 } 5041 5042 // Testing decoder skip callback 5043 for (int i = 0; i < context3.length; i++) { 5044 decoder.reset(); 5045 cb.position(0); 5046 bb.position(0); 5047 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_SKIP, context3[i]); 5048 result = decoder.decode(bb, cb, true); 5049 if (!result.isError()) { 5050 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder should have occurred."); 5051 } 5052 } 5053 } 5054 5055 // Testing invalid input exceptions 5056 @Test TestInvalidInput()5057 public void TestInvalidInput() { 5058 CharsetProvider provider = new CharsetProviderICU(); 5059 Charset charset = provider.charsetForName("iso-2022-jp"); 5060 CharsetEncoder encoder = charset.newEncoder(); 5061 CharsetDecoder decoder = charset.newDecoder(); 5062 5063 try { 5064 encoder.encode(CharBuffer.allocate(10), null, true); 5065 errln("Illegal argument exception should have been thrown due to null target."); 5066 } catch (CoderMalfunctionError err) { 5067 // Java 16 updated handling of Exception thrown by encodeLoop(CharBuffer,ByteBuffer). 5068 // Previously when encodeLoop is called with null input/output buffer, it throws 5069 // IllegalArgumentException, and Java CharsetEncoder does not catch the exception. 5070 // In Java 16, a runtime exception thrown by encodeLoop implementation is caught 5071 // and wrapped by CoderMalfunctionError. This block is required because CoderMalfunctionError 5072 // is not an Exception. 5073 } catch (Exception ex) { 5074 // IllegalArgumentException is thrown by encodeLoop(CharBuffer,ByteBuffer) implementation 5075 // is not wrapped by CharsetEncoder up to Java 15. 5076 } 5077 5078 try { 5079 decoder.decode(ByteBuffer.allocate(10), null, true); 5080 errln("Illegal argument exception should have been thrown due to null target."); 5081 } catch (CoderMalfunctionError err) { 5082 } catch (Exception ex) { 5083 } 5084 } 5085 5086 // Test java canonical names 5087 @Test TestGetICUJavaCanonicalNames()5088 public void TestGetICUJavaCanonicalNames() { 5089 // Ambiguous charset name. 5090 String javaCName = CharsetProviderICU.getJavaCanonicalName("windows-1250"); 5091 String icuCName = CharsetProviderICU.getICUCanonicalName("Windows-1250"); 5092 if (javaCName == null || icuCName == null) { 5093 errln("Unable to get Java or ICU canonical name from ambiguous alias"); 5094 } 5095 5096 } 5097 5098 // Port over from ICU4C for test conversion tables (mbcs version 5.x) 5099 // Provide better code coverage in CharsetMBCS, CharsetDecoderICU, and CharsetEncoderICU. 5100 @Test TestCharsetTestData()5101 public void TestCharsetTestData() { 5102 CoderResult result = CoderResult.UNDERFLOW; 5103 String charsetName = "test4"; 5104 CharsetProvider provider = new CharsetProviderICU(); 5105 Charset charset = ((CharsetProviderICU)provider).charsetForName(charsetName, "com/ibm/icu/dev/data/testdata", 5106 this.getClass().getClassLoader()); 5107 CharsetEncoder encoder = charset.newEncoder(); 5108 CharsetDecoder decoder = charset.newDecoder(); 5109 5110 byte bytearray[] = { 5111 0x01, 0x02, 0x03, 0x0a, 5112 0x01, 0x02, 0x03, 0x0b, 5113 0x01, 0x02, 0x03, 0x0d, 5114 }; 5115 5116 // set the callback for overflow errors 5117 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_STOP, null); 5118 5119 ByteBuffer bb = ByteBuffer.wrap(bytearray); 5120 CharBuffer cb = CharBuffer.allocate(10); 5121 5122 bb.limit(4); 5123 cb.limit(1); // Overflow should occur and is expected 5124 result = decoder.decode(bb, cb, false); 5125 if (result.isError()) { 5126 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5127 } 5128 5129 bb.limit(8); 5130 result = decoder.decode(bb, cb, false); 5131 if (result.isError()) { 5132 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5133 } 5134 5135 bb.limit(12); 5136 result = decoder.decode(bb, cb, true); 5137 if (result.isError()) { 5138 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5139 } 5140 5141 char chararray[] = { 5142 0xDBC4,0xDE34,0xD900,0xDC05,/* \U00101234\U00050005 */ 5143 0xD940, /* first half of \U00060006 or \U00060007 */ 5144 0xDC07/* second half of \U00060007 */ 5145 }; 5146 5147 cb = CharBuffer.wrap(chararray); 5148 bb = ByteBuffer.allocate(10); 5149 5150 bb.limit(2); 5151 cb.limit(4); 5152 result = encoder.encode(cb, bb, false); 5153 if (result.isError()) { 5154 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5155 } 5156 cb.limit(5); 5157 result = encoder.encode(cb, bb, false); 5158 if (result.isError()) { 5159 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5160 } 5161 cb.limit(6); 5162 result = encoder.encode(cb, bb, true); 5163 if (!result.isError()) { 5164 errln("Error should have occurred while encoding: " + charsetName); 5165 } 5166 } 5167 5168 /* Round trip test of SCSU converter*/ 5169 @Test TestSCSUConverter()5170 public void TestSCSUConverter(){ 5171 byte allFeaturesSCSU[]={ 5172 0x41,(byte) 0xdf, 0x12,(byte) 0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x1b, 0x03, 5173 (byte)0xdf, 0x1c,(byte) 0x88,(byte) 0x80, 0x0b, (byte)0xbf,(byte) 0xff,(byte) 0xff, 0x0d, 0x0a, 5174 0x41, 0x10, (byte)0xdf, 0x12, (byte)0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x13, 5175 (byte)0xdf, 0x14,(byte) 0x80, 0x15, (byte)0xff 5176 }; 5177 5178 char allFeaturesUTF16[]={ 5179 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 5180 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 5181 0x01df, 0xf000, 0xdbff, 0xdfff 5182 }; 5183 5184 5185 char germanUTF16[]={ 5186 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 5187 }; 5188 5189 byte germanSCSU[]={ 5190 (byte)0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65,(byte) 0xdf, 0x74 5191 }; 5192 5193 char russianUTF16[]={ 5194 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 5195 }; 5196 5197 byte russianSCSU[]={ 5198 0x12, (byte)0x9c,(byte)0xbe,(byte) 0xc1, (byte)0xba, (byte)0xb2, (byte)0xb0 5199 }; 5200 5201 char japaneseUTF16[]={ 5202 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 5203 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 5204 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 5205 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 5206 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 5207 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 5208 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 5209 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 5210 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 5211 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 5212 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 5213 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 5214 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 5215 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 5216 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 5217 }; 5218 5219 // SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 5220 //it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient 5221 byte japaneseSCSU[]={ 5222 0x08, 0x00, 0x1b, 0x4c,(byte) 0xea, 0x16, (byte)0xca, (byte)0xd3,(byte) 0x94, 0x0f, 0x53, (byte)0xef, 0x61, 0x1b, (byte)0xe5,(byte) 0x84, 5223 (byte)0xc4, 0x0f, (byte)0x53,(byte) 0xef, 0x61, 0x1b, (byte)0xe5, (byte)0x84, (byte)0xc4, 0x16, (byte)0xca, (byte)0xd3, (byte)0x94, 0x08, 0x02, 0x0f, 5224 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, (byte)0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41,(byte) 0x88, 0x4c, 5225 (byte) 0xe5,(byte) 0x97, (byte)0x9f, 0x08, 0x0c, 0x16,(byte) 0xca,(byte) 0xd3, (byte)0x94, 0x15, (byte)0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 5226 (byte) 0x8c, (byte)0xb4, (byte)0xa3,(byte) 0x9f,(byte) 0xca, (byte)0x99, (byte)0xcb,(byte) 0x8b, (byte)0xc2,(byte) 0x97,(byte) 0xcc,(byte) 0xaa,(byte) 0x84, 0x08, 0x02, 0x0e, 5227 0x7c, 0x73, (byte)0xe2, 0x16, (byte)0xa3,(byte) 0xb7, (byte)0xcb, (byte)0x93, (byte)0xd3,(byte) 0xb4,(byte) 0xc5, (byte)0xdc, (byte)0x9f, 0x0e, 0x79, 0x3e, 5228 0x06, (byte)0xae, (byte)0xb1, (byte)0x9d,(byte) 0x93, (byte)0xd3, 0x08, 0x0c, (byte)0xbe,(byte) 0xa3, (byte)0x8f, 0x08,(byte) 0x88,(byte) 0xbe,(byte) 0xa3,(byte) 0x8d, 5229 (byte)0xd3,(byte) 0xa8, (byte)0xa3, (byte)0x97,(byte) 0xc5, 0x17,(byte) 0x89, 0x08, 0x0d, 0x15,(byte) 0xd2, 0x08, 0x01, (byte)0x93, (byte)0xc8,(byte) 0xaa, 5230 (byte)0x8f, 0x0e, 0x61, 0x1b, (byte)0x99,(byte) 0xcb, 0x0e, 0x4e, (byte)0xba, (byte)0x9f, (byte)0xa1,(byte) 0xae,(byte) 0x93, (byte)0xa8,(byte) 0xa0, 0x08, 5231 0x02, 0x08, 0x0c, (byte)0xe2, 0x16, (byte)0xa3, (byte)0xb7, (byte)0xcb, 0x0f, 0x4f,(byte) 0xe1,(byte) 0x80, 0x05,(byte) 0xec, 0x60, (byte)0x8d, 5232 (byte)0xea, 0x06,(byte) 0xd3,(byte) 0xe6, 0x0f,(byte) 0x8a, 0x00, 0x30, 0x44, 0x65,(byte) 0xb9, (byte)0xe4, (byte)0xfe,(byte) 0xe7,(byte) 0xc2, 0x06, 5233 (byte)0xcb, (byte)0x82 5234 }; 5235 5236 CharsetProviderICU cs = new CharsetProviderICU(); 5237 CharsetICU charset = (CharsetICU)cs.charsetForName("scsu"); 5238 CharsetDecoder decode = charset.newDecoder(); 5239 CharsetEncoder encode = charset.newEncoder(); 5240 5241 //String[] codePoints = {"allFeatures", "german","russian","japanese"}; 5242 byte[][] fromUnicode={allFeaturesSCSU,germanSCSU,russianSCSU,japaneseSCSU}; 5243 char[][] toUnicode = {allFeaturesUTF16, germanUTF16,russianUTF16,japaneseUTF16}; 5244 5245 for(int i=0;i<4;i++){ 5246 ByteBuffer decoderBuffer = ByteBuffer.wrap(fromUnicode[i]); 5247 CharBuffer encoderBuffer = CharBuffer.wrap(toUnicode[i]); 5248 5249 try{ 5250 // Decoding 5251 CharBuffer decoderResult = decode.decode(decoderBuffer); 5252 encoderBuffer.position(0); 5253 if(!decoderResult.equals(encoderBuffer)){ 5254 errln("Error occured while decoding "+ charset.name()); 5255 } 5256 // Encoding 5257 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5258 // RoundTrip Test 5259 ByteBuffer roundTrip = encoderResult; 5260 CharBuffer roundTripResult = decode.decode(roundTrip); 5261 encoderBuffer.position(0); 5262 if(!roundTripResult.equals(encoderBuffer)){ 5263 errln("Error occured while encoding "+ charset.name()); 5264 } 5265 // Test overflow for code coverage reasons 5266 if (i == 0) { 5267 ByteBuffer test = encoderResult; 5268 test.position(0); 5269 CharBuffer smallBuffer = CharBuffer.allocate(11); 5270 decode.reset(); 5271 CoderResult status = decode.decode(test, smallBuffer, true); 5272 if (status != CoderResult.OVERFLOW) { 5273 errln("Overflow buffer error should have been thrown."); 5274 } 5275 } 5276 }catch(Exception e){ 5277 errln("Exception while converting SCSU thrown: " + e); 5278 } 5279 } 5280 5281 /* Provide better code coverage */ 5282 /* testing illegal codepoints */ 5283 CoderResult illegalResult = CoderResult.UNDERFLOW; 5284 CharBuffer illegalDecoderTrgt = CharBuffer.allocate(10); 5285 5286 byte[] illegalDecoderSrc1 = { (byte)0x41, (byte)0xdf, (byte)0x0c }; 5287 decode.reset(); 5288 illegalResult = decode.decode(ByteBuffer.wrap(illegalDecoderSrc1), illegalDecoderTrgt, true); 5289 if (illegalResult == CoderResult.OVERFLOW || illegalResult == CoderResult.UNDERFLOW) { 5290 errln("Malformed error should have been returned for decoder " + charset.name()); 5291 } 5292 /* code coverage test from nucnvtst.c in ICU4C */ 5293 CoderResult ccResult = CoderResult.UNDERFLOW; 5294 int CCBufSize = 120 * 10; 5295 ByteBuffer trgt = ByteBuffer.allocate(CCBufSize); 5296 CharBuffer test = CharBuffer.allocate(CCBufSize); 5297 String [] ccSrc = { 5298 "\ud800\udc00", /* smallest surrogate*/ 5299 "\ud8ff\udcff", 5300 "\udBff\udFff", /* largest surrogate pair*/ 5301 "\ud834\udc00", 5302 //"\U0010FFFF", 5303 "Hello \u9292 \u9192 World!", 5304 "Hell\u0429o \u9292 \u9192 W\u00e4rld!", 5305 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5306 5307 "\u0648\u06c8", /* catch missing reset*/ 5308 "\u0648\u06c8", 5309 5310 "\u4444\uE001", /* lowest quotable*/ 5311 "\u4444\uf2FF", /* highest quotable*/ 5312 "\u4444\uf188\u4444", 5313 "\u4444\uf188\uf288", 5314 "\u4444\uf188abc\u0429\uf288", 5315 "\u9292\u2222", 5316 "Hell\u0429\u04230o \u9292 \u9292W\u00e4\u0192rld!", 5317 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5318 "Hello World!123456", 5319 "Hello W\u0081\u011f\u0082!", /* Latin 1 run*/ 5320 5321 "abc\u0301\u0302", /* uses SQn for u301 u302*/ 5322 "abc\u4411d", /* uses SQU*/ 5323 "abc\u4411\u4412d",/* uses SCU*/ 5324 "abc\u0401\u0402\u047f\u00a5\u0405", /* uses SQn for ua5*/ 5325 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", /* SJIS like data*/ 5326 "\u9292\u2222", 5327 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", 5328 "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c", 5329 "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002", 5330 5331 "", /* empty input*/ 5332 "\u0000", /* smallest BMP character*/ 5333 "\uFFFF", /* largest BMP character*/ 5334 5335 /* regression tests*/ 5336 "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa", 5337 /*"\u00df\u01df\uf000\udbff\udfff\u000d\n\u0041\u00df\u0401\u015f\u00df\u01df\uf000\udbff\udfff",*/ 5338 "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c", 5339 "\u0041\u00df\u0401\u015f", 5340 "\u9066\u2123abc", 5341 //"\ud266\u43d7\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5", 5342 "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489", 5343 }; 5344 for (int i = 0; i < ccSrc.length; i++) { 5345 CharBuffer ubuf = CharBuffer.wrap(ccSrc[i]); 5346 encode.reset(); 5347 decode.reset(); 5348 trgt.clear(); 5349 test.clear(); 5350 ccResult = encode.encode(ubuf, trgt, true); 5351 if (ccResult.isError()) { 5352 errln("Error while encoding " + charset.name() + " in test for code coverage[" + i + "]."); 5353 } else { 5354 trgt.limit(trgt.position()); 5355 trgt.position(0); 5356 ccResult = decode.decode(trgt, test, true); 5357 if (ccResult.isError()) { 5358 errln("Error while decoding " + charset.name() + " in test for code coverage[" + i + "]."); 5359 } else { 5360 ubuf.position(0); 5361 test.limit(test.position()); 5362 test.position(0); 5363 if (!equals(test, ubuf)) { 5364 errln("Roundtrip failed for " + charset.name() + " in test for code coverage[" + i + "]."); 5365 } 5366 } 5367 } 5368 } 5369 5370 /* Monkey test */ 5371 { 5372 char[] monkeyIn = { 5373 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 5374 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 5375 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 5376 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 5377 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 5378 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 5379 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 5380 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 5381 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 5382 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 5383 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 5384 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 5385 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 5386 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 5387 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 5388 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 5389 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 5390 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 5391 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 5392 /* test non-BMP code points */ 5393 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 5394 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 5395 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 5396 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 5397 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 5398 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 5399 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 5400 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 5401 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 5402 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 5403 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 5404 5405 5406 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 5407 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 5408 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 5409 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 5410 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 5411 }; 5412 encode.reset(); 5413 decode.reset(); 5414 CharBuffer monkeyCB = CharBuffer.wrap(monkeyIn); 5415 try { 5416 ByteBuffer monkeyBB = encode.encode(monkeyCB); 5417 /* CharBuffer monkeyEndResult =*/ decode.decode(monkeyBB); 5418 5419 } catch (Exception ex) { 5420 errln("Exception thrown while encoding/decoding monkey test in SCSU: " + ex); 5421 } 5422 } 5423 // Test malformed 5424 { 5425 char[] malformedSequence = { 5426 0xD899, 0xDC7F, 0xDC88, 0xDC88, 0xD888, 0xDDF9 5427 }; 5428 encode.reset(); 5429 CharBuffer malformedSrc = CharBuffer.wrap(malformedSequence); 5430 5431 try { 5432 encode.encode(malformedSrc); 5433 errln("Malformed error should have thrown an exception."); 5434 } catch (Exception ex) { 5435 } 5436 } 5437 // Test overflow buffer 5438 { 5439 ByteBuffer overflowTest = ByteBuffer.wrap(allFeaturesSCSU); 5440 int sizes[] = { 8, 2, 11 }; 5441 for (int i = 0; i < sizes.length; i++) { 5442 try { 5443 decode.reset(); 5444 overflowTest.position(0); 5445 smBufDecode(decode, "SCSU overflow test", overflowTest, CharBuffer.allocate(sizes[i]), true, false); 5446 errln("Buffer overflow exception should have been thrown."); 5447 } catch (BufferOverflowException ex) { 5448 } catch (Exception ex) { 5449 errln("Buffer overflow exception should have been thrown."); 5450 } 5451 } 5452 5453 } 5454 } 5455 5456 /* Test for BOCU1 converter*/ 5457 @Test TestBOCU1Converter()5458 public void TestBOCU1Converter(){ 5459 char expected[]={ 5460 0xFEFF, 0x0061, 0x0062, 0x0020, // 0 5461 0x0063, 0x0061, 0x000D, 0x000A, 5462 5463 0x0020, 0x0000, 0x00DF, 0x00E6, // 8 5464 0x0930, 0x0020, 0x0918, 0x0909, 5465 5466 0x3086, 0x304D, 0x0020, 0x3053, // 16 5467 0x4000, 0x4E00, 0x7777, 0x0020, 5468 5469 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, // 24 5470 0x0020, 0xD7A3, 0xDC00, 0xD800, 5471 5472 0xD800, 0xDC00, 0xD845, 0xDDDD, // 32 5473 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 5474 5475 0xDFFF, 0x0001, 0x0E40, 0x0020, // 40 5476 0x0009 5477 }; 5478 5479 byte sampleText[]={ // from cintltst/bocu1tst.c/TestBOCU1 text 1 5480 (byte) 0xFB, 5481 (byte) 0xEE, 5482 0x28, // from source offset 0 5483 0x24, 0x1E, 0x52, (byte) 0xB2, 0x20, 5484 (byte) 0xB3, 5485 (byte) 0xB1, 5486 0x0D, 5487 0x0A, 5488 5489 0x20, // from 8 5490 0x00, (byte) 0xD0, 0x6C, (byte) 0xB6, (byte) 0xD8, (byte) 0xA5, 5491 0x20, 0x68, 5492 0x59, 5493 5494 (byte) 0xF9, 5495 0x28, // from 16 5496 0x6D, 0x20, 0x73, (byte) 0xE0, 0x2D, (byte) 0xDE, 0x43, 5497 (byte) 0xD0, 0x33, 0x20, 5498 5499 (byte) 0xFA, 5500 (byte) 0x83, // from 24 5501 0x25, 0x01, (byte) 0xFB, 0x16, (byte) 0x87, 0x4B, 0x16, 0x20, 5502 (byte) 0xE6, (byte) 0xBD, (byte) 0xEB, 0x5B, 0x4B, (byte) 0xCC, 5503 5504 (byte) 0xF9, 5505 (byte) 0xA2, // from 32 5506 (byte) 0xFC, 0x10, 0x3E, (byte) 0xFE, 0x16, 0x3A, (byte) 0x8C, 5507 0x20, (byte) 0xFC, 0x03, (byte) 0xAC, 5508 5509 0x01, /// from 41 5510 (byte) 0xDE, (byte) 0x83, 0x20, 0x09 5511 }; 5512 5513 CharsetProviderICU cs = new CharsetProviderICU(); 5514 CharsetICU charset = (CharsetICU)cs.charsetForName("BOCU-1"); 5515 CharsetDecoder decode = charset.newDecoder(); 5516 CharsetEncoder encode = charset.newEncoder(); 5517 5518 ByteBuffer decoderBuffer = ByteBuffer.wrap(sampleText); 5519 CharBuffer encoderBuffer = CharBuffer.wrap(expected); 5520 try{ 5521 // Decoding 5522 CharBuffer decoderResult = decode.decode(decoderBuffer); 5523 5524 encoderBuffer.position(0); 5525 if(!decoderResult.equals(encoderBuffer)){ 5526 errln("Error occured while decoding "+ charset.name()); 5527 } 5528 // Encoding 5529 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5530 // RoundTrip Test 5531 ByteBuffer roundTrip = encoderResult; 5532 CharBuffer roundTripResult = decode.decode(roundTrip); 5533 5534 encoderBuffer.position(0); 5535 if(!roundTripResult.equals(encoderBuffer)){ 5536 errln("Error occured while encoding "+ charset.name()); 5537 } 5538 }catch(Exception e){ 5539 errln("Exception while converting BOCU-1 thrown: " + e); 5540 } 5541 } 5542 5543 /* Test that ICU4C and ICU4J get the same ICU canonical name when given the same alias. */ 5544 @Test TestICUCanonicalNameConsistency()5545 public void TestICUCanonicalNameConsistency() { 5546 String[] alias = { 5547 "KSC_5601" 5548 }; 5549 String[] expected = { 5550 "windows-949-2000" 5551 }; 5552 5553 for (int i = 0; i < alias.length; i++) { 5554 String name = CharsetProviderICU.getICUCanonicalName(alias[i]); 5555 if (!name.equals(expected[i])) { 5556 errln("The ICU canonical name in ICU4J does not match that in ICU4C. Result: " + name + "Expected: " + expected[i]); 5557 } 5558 } 5559 } 5560 5561 /* Increase code coverage for CharsetICU and CharsetProviderICU*/ 5562 @Test TestCharsetICUCodeCoverage()5563 public void TestCharsetICUCodeCoverage() { 5564 CharsetProviderICU provider = new CharsetProviderICU(); 5565 5566 if (provider.charsetForName("UTF16", null) != null) { 5567 errln("charsetForName should have returned a null"); 5568 } 5569 5570 if (CharsetProviderICU.getJavaCanonicalName(null) != null) { 5571 errln("getJavaCanonicalName should have returned a null when null is given to it."); 5572 } 5573 5574 try { 5575 Charset testCharset = CharsetICU.forNameICU("bogus"); 5576 errln("UnsupportedCharsetException should be thrown for charset \"bogus\" - but got charset " + testCharset.name()); 5577 } catch (UnsupportedCharsetException ex) { 5578 logln("UnsupportedCharsetException was thrown for CharsetICU.forNameICU(\"bogus\")"); 5579 } 5580 5581 Charset charset = provider.charsetForName("UTF16"); 5582 5583 try { 5584 ((CharsetICU)charset).getUnicodeSet(null, 0); 5585 } catch (IllegalArgumentException ex) { 5586 return; 5587 } 5588 errln("IllegalArgumentException should have been thrown."); 5589 } 5590 5591 @Test TestCharsetLMBCS()5592 public void TestCharsetLMBCS() { 5593 String []lmbcsNames = { 5594 "LMBCS-1", 5595 "LMBCS-2", 5596 "LMBCS-3", 5597 "LMBCS-4", 5598 "LMBCS-5", 5599 "LMBCS-6", 5600 "LMBCS-8", 5601 "LMBCS-11", 5602 "LMBCS-16", 5603 "LMBCS-17", 5604 "LMBCS-18", 5605 "LMBCS-19" 5606 }; 5607 5608 char[] src = { 5609 0x0192, 0x0041, 0x0061, 0x00D0, 0x00F6, 0x0100, 0x0174, 0x02E4, 0x03F5, 0x03FB, 5610 0x05D3, 0x05D4, 0x05EA, 0x0684, 0x0685, 0x1801, 0x11B3, 0x11E8, 0x1F9A, 0x2EB4, 5611 0x3157, 0x3336, 0x3304, 0xD881, 0xDC88 5612 }; 5613 CharBuffer cbInput = CharBuffer.wrap(src); 5614 5615 CharsetProviderICU provider = new CharsetProviderICU(); 5616 5617 for (int i = 0; i < lmbcsNames.length; i++) { 5618 Charset charset = provider.charsetForName(lmbcsNames[i]); 5619 if (charset == null) { 5620 errln("Unable to create LMBCS charset: " + lmbcsNames[i]); 5621 return; 5622 } 5623 CharsetEncoder encoder = charset.newEncoder(); 5624 CharsetDecoder decoder = charset.newDecoder(); 5625 5626 try { 5627 cbInput.position(0); 5628 ByteBuffer bbTmp = encoder.encode(cbInput); 5629 CharBuffer cbOutput = decoder.decode(bbTmp); 5630 5631 if (!equals(cbInput, cbOutput)) { 5632 errln("Roundtrip test failed for charset: " + lmbcsNames[i]); 5633 } 5634 } catch (Exception ex) { 5635 if (i >= 8) { 5636 /* Expected exceptions */ 5637 continue; 5638 } 5639 errln("Exception thrown: " + ex + " while using charset: " + lmbcsNames[i]); 5640 } 5641 5642 } 5643 5644 // Test malformed 5645 CoderResult malformedResult = CoderResult.UNDERFLOW; 5646 byte[] malformedBytes = { 5647 (byte)0x61, (byte)0x01, (byte)0x29, (byte)0x81, (byte)0xa0, (byte)0x0f 5648 }; 5649 ByteBuffer malformedSrc = ByteBuffer.wrap(malformedBytes); 5650 CharBuffer malformedTrgt = CharBuffer.allocate(10); 5651 int[] malformedLimits = { 5652 2, 6 5653 }; 5654 CharsetDecoder malformedDecoderTest = provider.charsetForName("LMBCS-1").newDecoder(); 5655 for (int n = 0; n < malformedLimits.length; n++) { 5656 malformedDecoderTest.reset(); 5657 5658 malformedSrc.position(0); 5659 malformedSrc.limit(malformedLimits[n]); 5660 5661 malformedTrgt.clear(); 5662 5663 malformedResult = malformedDecoderTest.decode(malformedSrc,malformedTrgt, true); 5664 if (!malformedResult.isMalformed()) { 5665 errln("Malformed error should have resulted."); 5666 } 5667 } 5668 } 5669 5670 /* 5671 * This is a port of ICU4C TestAmbiguousConverter in cintltst. 5672 * Since there is no concept of ambiguous converters in ICU4J 5673 * this test is merely for code coverage reasons. 5674 */ 5675 @Test TestAmbiguousConverter()5676 public void TestAmbiguousConverter() { 5677 byte [] inBytes = { 5678 0x61, 0x5b, 0x5c 5679 }; 5680 ByteBuffer src = ByteBuffer.wrap(inBytes); 5681 CharBuffer trgt = CharBuffer.allocate(20); 5682 5683 CoderResult result = CoderResult.UNDERFLOW; 5684 CharsetProviderICU provider = new CharsetProviderICU(); 5685 String[] names = CharsetProviderICU.getAllNames(); 5686 5687 for (int i = 0; i < names.length; i++) { 5688 Charset charset = provider.charsetForName(names[i]); 5689 if (charset == null) { 5690 /* We don't care about any failures because not all converters are available. */ 5691 continue; 5692 } 5693 CharsetDecoder decoder = charset.newDecoder(); 5694 5695 src.position(0); 5696 trgt.clear(); 5697 5698 result = decoder.decode(src, trgt, true); 5699 if (result.isError()) { 5700 /* We don't care about any failures. */ 5701 continue; 5702 } 5703 } 5704 } 5705 5706 @Test TestIsFixedWidth()5707 public void TestIsFixedWidth(){ 5708 String[] fixedWidth = { 5709 "US-ASCII", 5710 "UTF32", 5711 "ibm-5478_P100-1995" 5712 }; 5713 5714 String[] notFixedWidth = { 5715 "GB18030", 5716 "UTF8", 5717 "windows-949-2000", 5718 "UTF16" 5719 }; 5720 CharsetProvider provider = new CharsetProviderICU(); 5721 Charset charset; 5722 5723 for (int i = 0; i < fixedWidth.length; i++) { 5724 charset = provider.charsetForName(fixedWidth[i]); 5725 5726 if (!((CharsetICU)charset).isFixedWidth()) { 5727 errln(fixedWidth[i] + " is a fixedWidth charset but returned false."); 5728 } 5729 } 5730 5731 for (int i = 0; i < notFixedWidth.length; i++) { 5732 charset = provider.charsetForName(notFixedWidth[i]); 5733 5734 if (((CharsetICU)charset).isFixedWidth()) { 5735 errln(notFixedWidth[i] + " is NOT a fixedWidth charset but returned true."); 5736 } 5737 } 5738 } 5739 5740 @Test TestBytesLengthForString()5741 public void TestBytesLengthForString() { 5742 CharsetProviderICU provider = new CharsetProviderICU(); 5743 String[] charsets = { 5744 "windows-949-2000", 5745 "ibm-1047_P100-1995,swaplfnl", 5746 "ibm-930_P120-1999", 5747 "ISCII,version=0", 5748 "ISO_2022,locale=ko,version=0" 5749 }; 5750 5751 int[] expected = { 5752 40, 5753 20, 5754 80, /* changed from 60 to 80 to reflect the updates by #9205 */ 5755 80, 5756 160 5757 }; 5758 5759 int stringLength = 10; 5760 int length; 5761 int maxCharSize; 5762 5763 for (int i = 0; i < charsets.length; i++) { 5764 maxCharSize = (int)provider.charsetForName(charsets[i]).newEncoder().maxBytesPerChar(); 5765 length = CharsetEncoderICU.getMaxBytesForString(stringLength, maxCharSize); 5766 5767 if (length != expected[i]) { 5768 errln("For charset " + charsets[i] + " with string length " + stringLength + ", expected max byte length is " + expected[i] + " but got " + length); 5769 } 5770 } 5771 } 5772 5773 /* 5774 * When converting slices of a larger CharBuffer, Charset88591 and CharsetASCII does not handle the buffer correctly when 5775 * an unmappable character occurs. 5776 * Ticket #8729 5777 */ 5778 @Test TestCharsetASCII8859BufferHandling()5779 public void TestCharsetASCII8859BufferHandling() { 5780 String firstLine = "C077693790=|MEMO=|00=|022=|Blanche st and the driveway grate was fault and rotated under my car=|\r\n"; 5781 String secondLine = "C077693790=|MEMO=|00=|023=|puncturing the fuel tank. I spoke to the store operator (Ram Reddi –=|\r\n"; 5782 5783 String charsetNames[] = { 5784 "ASCII", 5785 "ISO-8859-1" 5786 }; 5787 5788 CoderResult result = CoderResult.UNDERFLOW; 5789 5790 CharsetEncoder encoder; 5791 5792 ByteBuffer outBuffer = ByteBuffer.allocate(500); 5793 CharBuffer charBuffer = CharBuffer.allocate(firstLine.length() + secondLine.length()); 5794 charBuffer.put(firstLine); 5795 charBuffer.put(secondLine); 5796 charBuffer.flip(); 5797 5798 for (int i = 0; i < charsetNames.length; i++) { 5799 encoder = CharsetICU.forNameICU(charsetNames[i]).newEncoder(); 5800 5801 charBuffer.position(firstLine.length()); 5802 CharBuffer charBufferSlice = charBuffer.slice(); 5803 charBufferSlice.limit(secondLine.length() - 2); 5804 5805 5806 try { 5807 result = encoder.encode(charBufferSlice, outBuffer, false); 5808 if (!result.isUnmappable()) { 5809 errln("Result of encoding " + charsetNames[i] + " should be: \"Unmappable\". Instead got: " + result); 5810 } 5811 } catch (IllegalArgumentException ex) { 5812 errln("IllegalArgumentException should not have been thrown when encoding: " + charsetNames[i]); 5813 } 5814 } 5815 } 5816 5817 /* 5818 * When converting with the String method getBytes(), buffer overflow exception is thrown because 5819 * of the way ICU4J is calculating the max bytes per char. This should be changed only on the ICU4J 5820 * side to match what the Java method is expecting. The ICU4C size will be left unchanged. 5821 * Ticket #9205 5822 */ 5823 @Test TestBufferOverflowErrorUsingJavagetBytes()5824 public void TestBufferOverflowErrorUsingJavagetBytes() { 5825 String charsetName = "ibm-5035"; 5826 String testCase = "\u7d42"; 5827 5828 try { 5829 testCase.getBytes(charsetName); 5830 } catch (Exception ex) { 5831 errln("Error calling getBytes(): " + ex); 5832 } 5833 5834 } 5835 5836 // Test that all code points which have the default ignorable Unicode property 5837 // are ignored if they have no mapping. 5838 // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) 5839 // in CharsetCallback.java should be updated. 5840 // Keep in sync with ICU4C intltest/convtest.cpp. 5841 @Test TestDefaultIgnorableCallback()5842 public void TestDefaultIgnorableCallback() { 5843 String cnv_name = "euc-jp-2007"; 5844 String pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; 5845 String pattern_not_ignorable = 5846 "[[:^Default_Ignorable_Code_Point:]" + 5847 // For test performance, skip large ranges that will likely remain unassigned 5848 // for a long time, and private use code points. 5849 "-[\\U00040000-\\U000DFFFF]-[:Co:]" + 5850 "]"; 5851 UnicodeSet set_ignorable = new UnicodeSet(pattern_ignorable); 5852 UnicodeSet set_not_ignorable = new UnicodeSet(pattern_not_ignorable); 5853 CharsetEncoder encoder = CharsetICU.forNameICU(cnv_name).newEncoder(); 5854 5855 // set callback for the converter 5856 encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 5857 encoder.onMalformedInput(CodingErrorAction.REPLACE); 5858 5859 // test ignorable code points are ignored 5860 UnicodeSetIterator iter = new UnicodeSetIterator(set_ignorable); 5861 while (iter.next()) { 5862 encoder.reset(); 5863 int c = iter.codepoint; 5864 try { 5865 if(encoder.encode(CharBuffer.wrap(Character.toChars(c))).limit() > 0) { 5866 errln("Callback should have ignore default ignorable: U+" + Integer.toHexString(c)); 5867 } 5868 } catch (Exception ex) { 5869 errln("Error received converting +" + Integer.toHexString(c)); 5870 } 5871 } 5872 5873 // test non-ignorable code points are not ignored 5874 iter.reset(set_not_ignorable); 5875 while (iter.next()) { 5876 encoder.reset(); 5877 int c = iter.codepoint; 5878 try { 5879 if(encoder.encode(CharBuffer.wrap(Character.toChars(c))).limit() == 0) { 5880 errln("Callback should not have ignored: U+" + Integer.toHexString(c)); 5881 } 5882 } catch (Exception ex) { 5883 errln("Error received converting U+" + Integer.toHexString(c)); 5884 } 5885 } 5886 } 5887 } 5888