1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.tool; 10 11 import com.google.common.collect.ImmutableMap; 12 import com.google.common.collect.ImmutableSet; 13 import com.ibm.icu.dev.tool.shared.UOption; 14 import com.ibm.icu.dev.util.UnicodeMap; 15 import com.ibm.icu.impl.Relation; 16 import com.ibm.icu.impl.Utility; 17 import com.ibm.icu.lang.UCharacter; 18 import com.ibm.icu.lang.UScript; 19 import com.ibm.icu.text.BreakIterator; 20 import com.ibm.icu.text.Collator; 21 import com.ibm.icu.text.Normalizer; 22 import com.ibm.icu.text.RuleBasedCollator; 23 import com.ibm.icu.text.RuleBasedNumberFormat; 24 import com.ibm.icu.text.Transliterator; 25 import com.ibm.icu.text.UTF16; 26 import com.ibm.icu.text.UnicodeSet; 27 import com.ibm.icu.text.UnicodeSetIterator; 28 import com.ibm.icu.util.ULocale; 29 import java.io.File; 30 import java.io.IOException; 31 import java.io.PrintWriter; 32 import java.util.Collection; 33 import java.util.Comparator; 34 import java.util.Date; 35 import java.util.EnumSet; 36 import java.util.HashMap; 37 import java.util.HashSet; 38 import java.util.Locale; 39 import java.util.Map; 40 import java.util.Map.Entry; 41 import java.util.Set; 42 import java.util.TreeMap; 43 import java.util.TreeSet; 44 import java.util.regex.Matcher; 45 import org.unicode.cldr.draft.FileUtilities; 46 import org.unicode.cldr.tool.ShowData.DataShower; 47 import org.unicode.cldr.util.CLDRFile; 48 import org.unicode.cldr.util.CLDRFile.Status; 49 import org.unicode.cldr.util.CLDRPaths; 50 import org.unicode.cldr.util.CldrUtility; 51 import org.unicode.cldr.util.DtdData; 52 import org.unicode.cldr.util.DtdData.Attribute; 53 import org.unicode.cldr.util.DtdData.AttributeStatus; 54 import org.unicode.cldr.util.Factory; 55 import org.unicode.cldr.util.FileCopier; 56 import org.unicode.cldr.util.LanguageTagParser; 57 import org.unicode.cldr.util.LanguageTagParser.Fields; 58 import org.unicode.cldr.util.LocaleIDParser; 59 import org.unicode.cldr.util.PathHeader; 60 import org.unicode.cldr.util.PathHeader.PageId; 61 import org.unicode.cldr.util.PatternCache; 62 import org.unicode.cldr.util.SimpleFactory; 63 import org.unicode.cldr.util.StringId; 64 import org.unicode.cldr.util.TransliteratorUtilities; 65 import org.unicode.cldr.util.XPathParts; 66 import org.xml.sax.SAXException; 67 68 /** 69 * This is a simple class that walks through the CLDR hierarchy. It gathers together all the items 70 * from all the locales that share the same element chain, and thus presents a "sideways" view of 71 * the data, in files called by_type/X.html, where X is a type. X may be the concatenation of more 72 * than more than one element, where the file would otherwise be too large. 73 * 74 * @author medavis 75 */ 76 /* 77 * Notes: 78 * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3 79 * http://developers.sun.com/dev/coolstuff/xml/readme.html 80 * http://lists.xml.org/archives/xml-dev/200007/msg00284.html 81 * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html 82 */ 83 public class GenerateSidewaysView { 84 private static final boolean TOO_BIG_FOR_GITHUB = true; 85 private static final String DIR_NAME = "by_type"; 86 // debug flags 87 static final boolean DEBUG = false; 88 static final boolean DEBUG2 = false; 89 static final boolean DEBUG_SHOW_ADD = false; 90 static final boolean DEBUG_ELEMENT = false; 91 static final boolean DEBUG_SHOW_BAT = false; 92 93 static final boolean FIX_ZONE_ALIASES = true; 94 95 private static final int HELP1 = 0, 96 HELP2 = 1, 97 SOURCEDIR = 2, 98 DESTDIR = 3, 99 MATCH = 4, 100 SKIP = 5, 101 TZADIR = 6, 102 NONVALIDATING = 7, 103 SHOW_DTD = 8, 104 TRANSLIT = 9, 105 PATH = 10; 106 107 private static final UOption[] options = { 108 UOption.HELP_H(), 109 UOption.HELP_QUESTION_MARK(), 110 UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY), 111 UOption.DESTDIR() 112 .setDefault( 113 CLDRPaths.CHART_DIRECTORY 114 + DIR_NAME 115 + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/ 116 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 117 UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"), 118 UOption.create("tzadir", 't', UOption.REQUIRES_ARG) 119 .setDefault("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"), 120 UOption.create("nonvalidating", 'n', UOption.NO_ARG), 121 UOption.create("dtd", 'w', UOption.NO_ARG), 122 UOption.create("transliterate", 'y', UOption.NO_ARG), 123 UOption.create("path", 'p', UOption.REQUIRES_ARG), 124 }; 125 126 private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher(""); 127 // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF); 128 protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze(); 129 getFirstScript(UnicodeSet exemplars)130 static int getFirstScript(UnicodeSet exemplars) { 131 for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next(); ) { 132 int script = UScript.getScript(it.codepoint); 133 if (script == UScript.COMMON || script == UScript.INHERITED) { 134 continue; 135 } 136 return script; 137 } 138 return UScript.COMMON; 139 } 140 141 static Comparator<Object> UCA; 142 143 static { 144 RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 145 UCA2.setNumericCollation(true); 146 UCA2.setStrength(Collator.IDENTICAL); 147 UCA = 148 new org.unicode.cldr.util.MultiComparator( 149 UCA2, new UTF16.StringComparator(true, false, 0)); 150 } 151 152 private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<>(); 153 private static long startTime = System.currentTimeMillis(); 154 155 static RuleBasedCollator standardCollation = 156 (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); 157 158 static { 159 standardCollation.setStrength(Collator.IDENTICAL); 160 standardCollation.setNumericCollation(true); 161 } 162 163 private static CLDRFile english; 164 // private static DataShower dataShower = new DataShower(); 165 private static Matcher pathMatcher; 166 167 static final class OptionalPrinter { 168 PrintWriter printWriter; 169 print(String s)170 public void print(String s) { 171 if (printWriter != null) { 172 print(s); 173 } 174 } 175 println()176 public void println() { 177 print("\n"); 178 } 179 close()180 public void close() { 181 if (printWriter != null) { 182 close(); 183 } 184 } 185 } 186 main(String[] args)187 public static void main(String[] args) throws SAXException, IOException { 188 startTime = System.currentTimeMillis(); 189 ToolUtilities.registerExtraTransliterators(); 190 UOption.parseArgs(args, options); 191 192 pathMatcher = 193 options[PATH].value == null 194 ? null 195 : PatternCache.get(options[PATH].value).matcher(""); 196 197 File[] paths = { 198 new File(CLDRPaths.MAIN_DIRECTORY), 199 new File(CLDRPaths.ANNOTATIONS_DIRECTORY), 200 new File(CLDRPaths.SUBDIVISIONS_DIRECTORY) 201 }; 202 Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value); 203 204 // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value); 205 english = cldrFactory.make("en", true); 206 pathHeaderFactory = PathHeader.getFactory(english); 207 208 FileCopier.ensureDirectoryExists(options[DESTDIR].value); 209 FileCopier.copy( 210 GenerateSidewaysView.class, 211 "bytype-index.css", 212 options[DESTDIR].value, 213 "index.css"); 214 FormattedFileWriter.copyIncludeHtmls(options[DESTDIR].value); 215 216 // now get the info 217 218 loadInformation(cldrFactory); 219 String oldMain = ""; 220 PrintWriter out = null; 221 222 System.out.println("Getting types " + path_value_locales.size()); 223 // Set<String> types = new TreeSet<String>(); 224 // for (PathHeader path : path_value_locales.keySet()) { 225 // String main = getFileName2(path); 226 // if (!main.equals(oldMain)) { 227 // oldMain = main; 228 // types.add(main); 229 // } 230 // } 231 String headerString = getHeader(path_value_locales.keySet()); 232 FileCopier.copyAndReplace( 233 GenerateSidewaysView.class, 234 "bytype-index.html", 235 options[DESTDIR].value, 236 "index.html", 237 ImmutableMap.of( 238 "%header%", 239 headerString, 240 "%version%", 241 ToolConstants.CHART_DISPLAY_VERSION, 242 "%index%", 243 "../index.html", 244 "%index-title%", 245 "Main Charts Index", 246 "%date%", 247 CldrUtility.isoFormatDateOnly(new Date()))); 248 // FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", 249 // options[DESTDIR].value, "index.html", 250 // new String[] { "%header%", headerString }); 251 252 System.out.println( 253 "Printing files in " + new File(options[DESTDIR].value).getAbsolutePath()); 254 // Transliterator toLatin = Transliterator.getInstance("any-latin"); 255 toHTML = TransliteratorUtilities.toHTML; 256 // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]"); 257 258 String oldHeader = ""; 259 OptionalPrinter tsvFile = new OptionalPrinter(); 260 261 for (PathHeader path : path_value_locales.keySet()) { 262 String main = getFileName2(path, null); 263 if (!main.equals(oldMain)) { 264 oldMain = main; 265 out = 266 start( 267 out, 268 main, 269 headerString, 270 path.getSection() + ":" + path.getPage(), 271 tsvFile); 272 out.println("<table class='table'>"); 273 oldHeader = ""; 274 } 275 String key = path.getCode(); 276 String anchor = toHTML.transliterate(key); 277 278 String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path); 279 String englishValue = english.getStringValue(originalPath); 280 if (englishValue != null) { 281 englishValue = "English: ‹" + englishValue + "›"; 282 } else { 283 englishValue = ""; 284 } 285 286 String header = path.getHeader(); 287 if (!header.equals(oldHeader) && !header.equals("null")) { 288 out.println( 289 "<tr><th colSpan='2' class='pathHeader'>" 290 + CldrUtility.getDoubleLinkedText(header) 291 + "</th></tr>"); 292 oldHeader = header; 293 } 294 String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath())); 295 out.println( 296 "<tr>" 297 + "<th class='path'>" 298 + CldrUtility.getDoubleLinkedText(anchorId, anchor) 299 + "</th>" 300 + "<th class='path'>" 301 + toHTML.transliterate(englishValue) 302 + "</th>" 303 + "</tr>"); 304 Map<String, Set<String>> value_locales = path_value_locales.get(path); 305 for (String value : value_locales.keySet()) { 306 // String outValue = toHTML.transliterate(value); 307 // String transValue = value; 308 // try { 309 // transValue = toLatin.transliterate(value); 310 // } catch (RuntimeException e) { 311 // } 312 // if (!transValue.equals(value)) { 313 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + 314 // "</span>"; 315 // } 316 String valueClass = " class='value'"; 317 if (DataShower.getBidiStyle(value).length() != 0) { 318 valueClass = " class='rtl_value'"; 319 } 320 out.println( 321 "<tr><th" 322 + valueClass 323 + ">" 324 + DataShower.getPrettyValue(value) 325 + "</th><td class='td'>"); 326 tsvFile.print( 327 path.getSection() 328 + "\t" 329 + path.getPage() 330 + "\t" 331 + path.getHeader() 332 + "\t" 333 + path.getCode() 334 + "\t" 335 + value 336 + "\t"); 337 338 Set<String> locales = value_locales.get(value); 339 boolean first = true; 340 boolean containsRoot = locales.contains("root"); 341 for (String locale : locales) { 342 if (first) first = false; 343 else out.print(" "); 344 if (locale.endsWith("*")) { 345 locale = locale.substring(0, locale.length() - 1); 346 out.print("<i>\u00B7" + locale + "\u00B7</i>"); 347 tsvFile.print("\u00B7" + locale + "\u00B7"); 348 } else if (!containsRoot) { 349 out.print("\u00B7" + locale + "\u00B7"); 350 tsvFile.print("\u00B7" + locale + "\u00B7"); 351 } else if (locale.contains("_")) { 352 // not same as root, but need to test for parent 353 // if the parent is not in the same list, then we include anyway. 354 // Cf http://unicode.org/cldr/trac/ticket/7228 355 String parent = LocaleIDParser.getParent(locale); 356 if (!locales.contains(parent)) { 357 out.print("<b>\u00B7" + locale + "\u00B7</b>"); 358 tsvFile.print("\u00B7" + locale + "\u00B7"); 359 } 360 } 361 } 362 if (containsRoot) { 363 out.print("<b>\u00B7all\u00B7others\u00B7</b>"); 364 tsvFile.print("\u00B7all-others\u00B7"); 365 } 366 out.println("</td></tr>"); 367 tsvFile.println(); 368 } 369 } 370 for (String[] pair : EXEMPLARS) { 371 showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile); 372 } 373 finish(out, tsvFile); 374 finishAll(out, tsvFile); 375 System.out.println( 376 "Done in " 377 + new RuleBasedNumberFormat( 378 new ULocale("en"), RuleBasedNumberFormat.DURATION) 379 .format((System.currentTimeMillis() - startTime) / 1000.0)); 380 } 381 382 static final String[][] EXEMPLARS = { 383 {"//ldml/characters/exemplarCharacters", "main", "Main Exemplars"}, 384 { 385 "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", 386 "punctuation", 387 "Punctuation Exemplars" 388 }, 389 {"//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars"}, 390 // TODO look at numbers, auxiliary 391 }; 392 showExemplars( PrintWriter out, String headerString, String pathName, String variant, String title, OptionalPrinter tsvFile)393 private static PrintWriter showExemplars( 394 PrintWriter out, 395 String headerString, 396 String pathName, 397 String variant, 398 String title, 399 OptionalPrinter tsvFile) 400 throws IOException { 401 PathHeader ph = fixPath(pathName, null); 402 String filename = getFileName2(ph, variant); 403 out = start(out, filename, headerString, title, tsvFile); 404 Map<String, Set<String>> value_locales = path_value_locales.get(ph); 405 406 // TODO change logic so that aux characters characters work well. 407 408 Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<>(); 409 // UnicodeMap mapping = new UnicodeMap(); 410 UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]"); 411 412 // get the locale information 413 UnicodeSet totalExemplars = new UnicodeSet(); 414 for (String value : value_locales.keySet()) { 415 // flatten out UnicodeSet 416 UnicodeSet exemplars = new UnicodeSet(value); 417 if (variant.equals("main")) { 418 UnicodeSet extras = new UnicodeSet(); 419 for (String item : exemplars) { 420 extras.addAll(Normalizer.normalize(item, Normalizer.NFD)); 421 } 422 exemplars.addAll(extras); 423 } 424 totalExemplars.addAll(exemplars); 425 exemplars.removeAll(stuffToSkip); 426 427 Set<String> locales = value_locales.get(value); 428 // String script = UScript.getName(getFirstScript(exemplars)); 429 for (String locale : locales) { 430 checkTr(script_UnicodeMap); 431 String key = 432 locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale; 433 String script = LOCALE_TO_SCRIPT.get(key); 434 // try a few variants until we get the script 435 if (script == null && key.contains("_")) { 436 String simpleParent = LanguageTagParser.getSimpleParent(key); 437 script = LOCALE_TO_SCRIPT.get(simpleParent); 438 if (script == null && simpleParent.contains("_")) { 439 simpleParent = LanguageTagParser.getSimpleParent(simpleParent); 440 script = LOCALE_TO_SCRIPT.get(simpleParent); 441 } 442 } 443 if (script == null) { 444 script = UScript.getName(UScript.UNKNOWN); 445 } 446 Set<String> temp = new HashSet<>(); 447 temp.add(locale); 448 checkTr(script_UnicodeMap); 449 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 450 if (mapping == null) { 451 script_UnicodeMap.put(script, mapping = new UnicodeMap<>()); 452 } 453 checkTr(script_UnicodeMap); 454 mapping.composeWith(exemplars, temp, setComposer); 455 checkTr(script_UnicodeMap); 456 } 457 } 458 System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false)); 459 for (String script : script_UnicodeMap.keySet()) { 460 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 461 writeCharToLocaleMapping(out, script, mapping); 462 } 463 return out; 464 } 465 checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap)466 private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) { 467 UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic"); 468 if (unicodeMap == null) { 469 return; 470 } 471 Set<String> foo = unicodeMap.get(0x21); 472 if (foo == null) { 473 return; 474 } 475 if (foo.contains("tr")) { 476 System.out.println("huh?"); 477 } 478 } 479 writeCharToLocaleMapping( PrintWriter out, String script, UnicodeMap<Set<String>> mapping)480 private static void writeCharToLocaleMapping( 481 PrintWriter out, String script, UnicodeMap<Set<String>> mapping) { 482 BreakIterator charBreaks = 483 BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for 484 // script 485 System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet()); 486 if (script.equals("Hangul")) { // || script.equals("Common") 487 return; // skip these 488 } 489 // find out all the locales and all the characters 490 Set<String> allLocales = new TreeSet<>(UCA); 491 Set<String> allChars = new TreeSet<>(UCA); 492 Set<String> allStrings = new TreeSet<>(UCA); 493 for (Set<String> locales : mapping.getAvailableValues()) { 494 allLocales.addAll(locales); 495 UnicodeSet unicodeSet = mapping.keySet(locales); 496 for (String item : unicodeSet) { 497 charBreaks.setText(item); 498 int endFirst = charBreaks.next(); 499 if (endFirst == item.length()) { 500 allChars.add(item); 501 } else { 502 allStrings.add(item); 503 } 504 } 505 } 506 // get the columns, and show them 507 out.println("<table class='table' style='width:1%'>"); 508 out.println("<caption>" + script + "</caption>"); 509 exemplarHeader(out, allChars); 510 511 for (String locale : allLocales) { 512 String headerHeader = 513 "<th class='head'>" 514 + cleanLocale(locale, false) 515 + "</th><td class='head nowrap left'>" 516 + cleanLocale(locale, true) 517 + "</td>"; 518 out.println("<tr>"); 519 out.println(headerHeader); 520 521 for (String item : allChars) { 522 // String exemplarsWithoutBrackets = displayExemplars(item); 523 if (mapping.get(item).contains(locale)) { 524 out.println("<td class='cell'" + ">" + displayCharacter(item) + "</td>"); 525 } else { 526 out.println("<td class='empty'>\u00a0</td>"); 527 } 528 } 529 // now strings, if any 530 StringBuilder strings = new StringBuilder(); 531 int lastLineStart = 0; 532 for (String item : allStrings) { 533 // String exemplarsWithoutBrackets = displayExemplars(item); 534 if (mapping.get(item).contains(locale)) { 535 int str_len = strings.length(); 536 if (str_len != 0) { 537 if (str_len - lastLineStart > 20) { 538 strings.append(System.lineSeparator()); 539 lastLineStart = str_len; 540 } else { 541 strings.append(' '); 542 } 543 } 544 strings.append(displayCharacter(item)); 545 } 546 } 547 if (strings.length() == 0) { 548 out.println("<td class='empty'>\u00a0</td>"); 549 } else { 550 out.println( 551 "<td class='cell nowrap'>" 552 + displayCharacter(strings.toString()) 553 .replace(System.lineSeparator(), "<br>") 554 + "</td>"); 555 } 556 557 out.println(headerHeader); 558 out.println("</tr>"); 559 } 560 exemplarHeader(out, allChars); 561 out.println("</table>"); 562 out.flush(); 563 } 564 characterTitle(String item)565 private static String characterTitle(String item) { 566 return ("title='U+" 567 + toHTML.transform( 568 Utility.hex(item, 4, ", U+", true, new StringBuilder()) 569 + " " 570 + UCharacter.getName(item, ", ")) 571 + "'"); 572 } 573 exemplarHeader(PrintWriter out, Set<String> allChars)574 private static void exemplarHeader(PrintWriter out, Set<String> allChars) { 575 out.println("<tr>"); 576 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 577 for (String item : allChars) { 578 out.println( 579 "<th class='head' " 580 + characterTitle(item) 581 + ">" 582 + displayCharacter(item) 583 + "</th>"); 584 } 585 out.println("<th class='head'>Clusters</th>"); 586 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 587 out.println("</tr>"); 588 } 589 590 static final UnicodeSet NONSPACING = 591 new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze(); 592 displayCharacter(String item)593 public static String displayCharacter(String item) { 594 if (item.length() == 0) return "<i>none</i>"; 595 int ch = item.codePointAt(0); 596 if (NONSPACING.contains(ch)) { 597 item = "\u00a0" + item + "\u00a0"; 598 } 599 String result = toHTML.transform(item); 600 return result; 601 } 602 603 static LanguageTagParser cleanLocaleParser = new LanguageTagParser(); 604 static Set<Fields> allButScripts = EnumSet.allOf(Fields.class); 605 606 static { 607 allButScripts.remove(Fields.SCRIPT); 608 } 609 cleanLocale(String item, boolean name)610 private static String cleanLocale(String item, boolean name) { 611 if (item == null) { 612 return "<i>null</i>"; 613 } 614 boolean draft = item.endsWith("*"); 615 if (draft) { 616 item = item.substring(0, item.length() - 1); 617 } 618 cleanLocaleParser.set(item); 619 item = cleanLocaleParser.toString(allButScripts); 620 String core = item; 621 item = toHTML.transform(item); 622 if (name) { 623 item = english.getName(core); 624 item = item == null ? "<i>null</i>" : toHTML.transform(item); 625 } 626 if (draft) { 627 item = "<i>" + item + "</i>"; 628 } 629 return item; 630 } 631 632 // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet 633 // lastChars, Set locales) { 634 // String exemplarsWithoutBrackets = displayExemplars(lastChars); 635 // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>"); 636 // for (String item : allLocales) { 637 // String cleanItem; 638 // if (locales.contains(item)) { 639 // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>"; 640 // } else { 641 // cleanItem = "<td class='value'>\u00a0</td>"; 642 // } 643 // out.println(cleanItem); 644 // } 645 // out.println("</tr>"); 646 // } 647 648 // private static final StringTransform MyTransform = new StringTransform() { 649 // 650 // public String transform(String source) { 651 // StringBuilder builder = new StringBuilder(); 652 // int cp = 0; 653 // builder.append("<span title='"); 654 // String prefix = ""; 655 // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { 656 // cp = UTF16.charAt(source, i); 657 // if (i == 0) { 658 // if (COMBINING.contains(cp)) { 659 // prefix = "\u25CC"; 660 // } 661 // } else { 662 // builder.append(" + "); 663 // } 664 // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' 665 // ').append(UCharacter.getExtendedName(cp)); 666 // } 667 // builder.append("'>").append(prefix).append(source).append("</span>"); 668 // return builder.toString(); 669 // } 670 // 671 // }; 672 673 // private static String displayExemplars(UnicodeSet lastChars) { 674 // String exemplarsWithoutBrackets = new PrettyPrinter() 675 // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)) 676 // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT) 677 // .setStrength2(Collator.PRIMARY)) 678 // .setCompressRanges(true) 679 // .setToQuote(ALL_CHARS) 680 // .setQuoter(MyTransform) 681 // .format(lastChars); 682 // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, 683 // exemplarsWithoutBrackets.length() - 1); 684 // return exemplarsWithoutBrackets; 685 // } 686 687 // private static boolean isNextCharacter(String last, String value) { 688 // if (UTF16.hasMoreCodePointsThan(last, 1)) return false; 689 // if (UTF16.hasMoreCodePointsThan(value, 1)) return false; 690 // int lastChar = UTF16.charAt(last,0); 691 // int valueChar = UTF16.charAt(value,0); 692 // return lastChar + 1 == valueChar; 693 // } 694 695 static UnicodeMap.Composer<Set<String>> setComposer = 696 new UnicodeMap.Composer<>() { 697 @Override 698 public Set<String> compose( 699 int codepoint, String string, Set<String> a, Set<String> b) { 700 if (a == null) { 701 return b; 702 } else if (b == null) { 703 return a; 704 } else { 705 TreeSet<String> result = new TreeSet<>(a); 706 result.addAll(b); 707 return result; 708 } 709 } 710 }; 711 712 static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<>(); 713 loadInformation(Factory cldrFactory)714 private static void loadInformation(Factory cldrFactory) { 715 Set<String> alllocales = cldrFactory.getAvailable(); 716 String[] postFix = new String[] {""}; 717 // gather all information 718 // TODO tweek for value-laden attributes 719 for (String localeID : alllocales) { 720 System.out.println("Loading: " + localeID); 721 System.out.flush(); 722 723 CLDRFile cldrFile; 724 try { 725 cldrFile = cldrFactory.make(localeID, localeID.equals("root")); 726 } catch (IllegalArgumentException e) { 727 System.err.println("Couldn't open " + localeID); 728 continue; 729 } 730 if (cldrFile.isNonInheriting()) continue; 731 for (String path : cldrFile) { 732 if (pathMatcher != null && !pathMatcher.reset(path).matches()) { 733 continue; 734 } 735 if (altProposedMatcher.reset(path).matches()) { 736 continue; 737 } 738 if (path.indexOf("/alias") >= 0) continue; 739 if (path.indexOf("/identity") >= 0) continue; 740 if (path.indexOf("/references") >= 0) continue; 741 PathHeader ph = fixPath(path, postFix); 742 if (ph == null || ph.shouldHide()) { 743 continue; 744 } 745 String fullPath = cldrFile.getFullXPath(path); 746 String value = getValue(cldrFile, path, fullPath); 747 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) { 748 continue; 749 } 750 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0 751 || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) { 752 postFix[0] = "*"; 753 } 754 if (path.equals("//ldml/characters/exemplarCharacters")) { 755 UnicodeSet exemplars; 756 try { 757 exemplars = new UnicodeSet(value); 758 String script = UScript.getName(getFirstScript(exemplars)); 759 LOCALE_TO_SCRIPT.put(localeID, script); 760 } catch (Exception e) { 761 762 } 763 } 764 Map<String, Set<String>> value_locales = path_value_locales.get(ph); 765 if (value_locales == null) { 766 path_value_locales.put(ph, value_locales = new TreeMap<>(standardCollation)); 767 } 768 Set<String> locales = value_locales.get(value); 769 if (locales == null) { 770 value_locales.put(value, locales = new TreeSet<>()); 771 } 772 locales.add(localeID + postFix[0]); 773 } 774 } 775 Relation<String, String> sorted = 776 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 777 for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) { 778 sorted.put(s.getValue(), s.getKey()); 779 } 780 for (Entry<String, Set<String>> s : sorted.keyValuesSet()) { 781 System.out.println(s); 782 } 783 } 784 785 static PathHeader.Factory pathHeaderFactory; 786 787 /** 788 * @param path 789 * @param localePrefix 790 * @return 791 */ fixPath(String path, String[] localePrefix)792 private static PathHeader fixPath(String path, String[] localePrefix) { 793 if (localePrefix != null) { 794 localePrefix[0] = ""; 795 } 796 return pathHeaderFactory.fromPath(path); 797 } 798 799 /** 800 * @param parts 801 * @param skipAttributes 802 */ getValueAttributes(XPathParts parts)803 private static String getValueAttributes(XPathParts parts) { 804 String element = parts.getElement(-1); 805 Collection<String> attributes = parts.getAttributeKeys(-1); 806 DtdData dtdData = parts.getDtdData(); 807 StringBuilder sb = new StringBuilder(); 808 for (String attributeName : attributes) { 809 if (skipSet.contains(attributeName)) { 810 continue; 811 } 812 Attribute attribute = dtdData.getAttribute(element, attributeName); 813 AttributeStatus status = attribute.getStatus(); 814 switch (status) { 815 case distinguished: 816 case metadata: // skip 817 break; 818 case value: // keep 819 sb.append( 820 attributeName + "=" + parts.getAttributeValue(-1, attributeName) + " "); 821 break; 822 } 823 } 824 return sb.toString(); 825 } 826 827 static final Set<String> skipSet = ImmutableSet.of("draft", "alt"); 828 829 static Status status = new Status(); 830 831 /** */ getValue(CLDRFile cldrFile, String path, String fullPath)832 private static String getValue(CLDRFile cldrFile, String path, String fullPath) { 833 String value = cldrFile.getStringValue(path); 834 if (value == null) { 835 System.out.println("Null value for " + path); 836 return value; 837 } 838 cldrFile.getSourceLocaleID(path, status); 839 if (!path.equals(status.pathWhereFound)) { 840 // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]"; 841 value = null; 842 return value; 843 } 844 if (value.length() == 0) { 845 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 846 value = getValueAttributes(parts); 847 } 848 return value; 849 } 850 getFileName2(PathHeader header, String suffix)851 private static String getFileName2(PathHeader header, String suffix) { 852 String result = 853 (header.getSection() + "." + header.getPage()) 854 .replace(" ", "_") 855 .replace("/", "_") 856 .replace("(", "_") 857 .replace(")", "_"); 858 if (suffix != null) { 859 result += "." + suffix; 860 } 861 return result.toLowerCase(Locale.ENGLISH); 862 } 863 864 static String[] headerAndFooter = new String[2]; 865 private static Transliterator toHTML; 866 867 /** 868 * @param tsvFile TODO 869 * @param path2 870 */ start( PrintWriter out, String main, String headerString, String title, OptionalPrinter tsvFile)871 private static PrintWriter start( 872 PrintWriter out, 873 String main, 874 String headerString, 875 String title, 876 OptionalPrinter tsvFile) 877 throws IOException { 878 finish(out, tsvFile); 879 out = writeHeader(main, title, tsvFile); 880 out.println(headerString); 881 return out; 882 } 883 getHeader(Set<PathHeader> set)884 public static String getHeader(Set<PathHeader> set) { 885 StringBuffer out = new StringBuffer("<table class='simple'><tr>"); 886 String lastMain = ""; 887 String lastSub = ""; 888 for (PathHeader pathHeader : set) { 889 String mainName = pathHeader.getSection(); 890 String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage()); 891 if (!mainName.equals(lastMain)) { 892 if (lastMain.length() != 0) { 893 out.append("</tr>" + System.lineSeparator() + "<tr>"); 894 } 895 out.append( 896 "<th align='right' nowrap style='vertical-align: top'><b>" 897 + TransliteratorUtilities.toHTML.transform(mainName) 898 + ": </b></th><td>"); 899 lastMain = mainName; 900 lastSub = subName; 901 } else if (!subName.equals(lastSub)) { 902 out.append(" | "); 903 lastSub = subName; 904 } else { 905 continue; // identical, skip 906 } 907 out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>"); 908 if (pathHeader.getPageId() == PageId.Alphabetic_Information) { 909 for (String[] pair : EXEMPLARS) { 910 out.append( 911 " | <a href='" 912 + getFileName2(pathHeader, pair[1]) 913 + ".html'>" 914 + pair[2] 915 + "</a>"); 916 } 917 } 918 continue; 919 } 920 return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString(); 921 } 922 writeHeader(String main, String title, OptionalPrinter tsvFile)923 private static PrintWriter writeHeader(String main, String title, OptionalPrinter tsvFile) 924 throws IOException { 925 PrintWriter out; 926 out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html"); 927 if (!TOO_BIG_FOR_GITHUB && tsvFile.printWriter == null) { 928 tsvFile.printWriter = 929 FileUtilities.openUTF8Writer( 930 Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv"); 931 tsvFile.print("# By-Type Data\n"); 932 tsvFile.print("# Section\tPage\tHeader\tCode\tValue\tLocales\n"); 933 } 934 935 ShowData.getChartTemplate( 936 "By-Type Chart: " + title, 937 ToolConstants.CHART_DISPLAY_VERSION, 938 "", 939 headerAndFooter, 940 null, 941 false); 942 out.println(headerAndFooter[0]); 943 return out; 944 } 945 946 /** 947 * @param tsvFile TODO 948 */ finish(PrintWriter out, OptionalPrinter tsvFile)949 private static void finish(PrintWriter out, OptionalPrinter tsvFile) { 950 if (out == null) return; 951 out.println("</table>"); 952 out.println(headerAndFooter[1]); 953 out.close(); 954 } 955 finishAll(PrintWriter out, OptionalPrinter tsvFile)956 private static void finishAll(PrintWriter out, OptionalPrinter tsvFile) { 957 // TODO Auto-generated method stub 958 // tsvFile.println("# EOF"); 959 tsvFile.close(); 960 } 961 } 962