1 package org.unicode.cldr.test; 2 3 import com.ibm.icu.text.MessageFormat; 4 import com.ibm.icu.text.UnicodeSet; 5 import java.io.File; 6 import java.io.IOException; 7 import java.io.PrintWriter; 8 import java.util.ArrayList; 9 import java.util.EnumMap; 10 import java.util.HashMap; 11 import java.util.LinkedHashSet; 12 import java.util.List; 13 import java.util.Map; 14 import java.util.Set; 15 import java.util.regex.Matcher; 16 import java.util.regex.Pattern; 17 import org.unicode.cldr.test.CheckConsistentCasing.CasingType; 18 import org.unicode.cldr.test.CheckConsistentCasing.CasingTypeAndErrFlag; 19 import org.unicode.cldr.test.CheckConsistentCasing.Category; 20 import org.unicode.cldr.tool.Option.Options; 21 import org.unicode.cldr.util.CLDRFile; 22 import org.unicode.cldr.util.CLDRFile.WinningChoice; 23 import org.unicode.cldr.util.CLDRPaths; 24 import org.unicode.cldr.util.CldrUtility; 25 import org.unicode.cldr.util.Factory; 26 import org.unicode.cldr.util.LocaleIDParser; 27 import org.unicode.cldr.util.PatternCache; 28 import org.unicode.cldr.util.SimpleXMLSource; 29 import org.unicode.cldr.util.SupplementalDataInfo; 30 import org.unicode.cldr.util.XMLFileReader; 31 import org.unicode.cldr.util.XMLSource; 32 import org.unicode.cldr.util.XPathParts; 33 34 /** 35 * Calculates, reads, writes and returns casing information about locales for CheckConsistentCasing. 36 * Run main() to generate the casing information files which will be stored in common/casing. 37 * 38 * @author jchye 39 */ 40 public class CasingInfo { 41 private static final Options options = 42 new Options("This program is used to generate casing files for locales.") 43 .add( 44 "locales", 45 ".*", 46 ".*", 47 "A regex of the locales to generate casing information for") 48 .add( 49 "summary", 50 null, 51 "generates a summary of the casing for all locales that had casing generated for this run"); 52 private Map<String, Map<Category, CasingTypeAndErrFlag>> casing; 53 private List<File> casingDirs; 54 CasingInfo(Factory factory)55 public CasingInfo(Factory factory) { 56 casingDirs = new ArrayList<>(); 57 for (File f : factory.getSourceDirectories()) { 58 this.casingDirs.add(new File(f.getAbsolutePath() + "/../casing")); 59 } 60 casing = CldrUtility.newConcurrentHashMap(); 61 } 62 63 /** ONLY usable in command line tests. */ CasingInfo()64 public CasingInfo() { 65 casingDirs = new ArrayList<>(); 66 this.casingDirs.add(new File(CLDRPaths.CASING_DIRECTORY)); 67 casing = CldrUtility.newConcurrentHashMap(); 68 } 69 70 /** 71 * Returns casing information to be used for a specified locale. 72 * 73 * @param localeID 74 * @return 75 */ getLocaleCasing(String localeID)76 public Map<Category, CasingTypeAndErrFlag> getLocaleCasing(String localeID) { 77 // Check if the localeID contains casing first. 78 // If there isn't a casing file available for the locale, 79 // recurse over the locale's parents until something is found. 80 if (!casing.containsKey(localeID)) { 81 // Synchronize writes to casing map in an attempt to avoid NPEs (cldrbug 5051). 82 synchronized (casing) { 83 CasingHandler handler = loadFromXml(localeID); 84 if (handler != null) { 85 handler.addParsedResult(casing); 86 } 87 if (!casing.containsKey(localeID)) { 88 String parentID = LocaleIDParser.getSimpleParent(localeID); 89 if (!parentID.equals("root")) { 90 casing.put(localeID, getLocaleCasing(parentID)); 91 } 92 } 93 } 94 } 95 96 return casing.get(localeID); 97 } 98 99 /** 100 * Loads casing information about a specified locale from the casing XML, if it exists. 101 * 102 * @param localeID 103 */ loadFromXml(String localeID)104 private CasingHandler loadFromXml(String localeID) { 105 for (File casingDir : casingDirs) { 106 File casingFile = new File(casingDir, localeID + ".xml"); 107 if (casingFile.isFile()) { 108 CasingHandler handler = new CasingHandler(); 109 XMLFileReader xfr = new XMLFileReader().setHandler(handler); 110 xfr.read(casingFile.toString(), -1, true); 111 return handler; 112 } 113 } // Fail silently if file not found. 114 return null; 115 } 116 117 /** Calculates casing information about all languages from the locale data. */ generateCasingInformation(String localePattern)118 private Map<String, Boolean> generateCasingInformation(String localePattern) { 119 SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 120 Set<String> defaultContentLocales = supplementalDataInfo.getDefaultContentLocales(); 121 String sourceDirectory = CldrUtility.checkValidDirectory(CLDRPaths.MAIN_DIRECTORY); 122 Factory cldrFactory = Factory.make(sourceDirectory, localePattern); 123 Set<String> locales = new LinkedHashSet<>(cldrFactory.getAvailable()); 124 locales.removeAll(defaultContentLocales); // Skip all default content locales 125 UnicodeSet allCaps = new UnicodeSet("[:Lu:]"); 126 Map<String, Boolean> localeUsesCasing = new HashMap<>(); 127 LocaleIDParser parser = new LocaleIDParser(); 128 129 for (String localeID : locales) { 130 if (CLDRFile.isSupplementalName(localeID)) continue; 131 132 // We want country/script differences but not region differences 133 // (unless it's pt_PT, which we do want). 134 // Keep regional locales only if there isn't already a locale for its script, 135 // e.g. keep zh_Hans_HK because zh_Hans is a default locale. 136 parser.set(localeID); 137 if (parser.getRegion().length() > 0 && !localeID.equals("pt_PT")) { 138 System.out.println("Skipping regional locale " + localeID); 139 continue; 140 } 141 142 // Save casing information about the locale. 143 CLDRFile file = cldrFactory.make(localeID, true); 144 UnicodeSet examplars = file.getExemplarSet("", WinningChoice.NORMAL); 145 localeUsesCasing.put(localeID, examplars.containsSome(allCaps)); 146 createCasingXml(localeID, CheckConsistentCasing.getSamples(file)); 147 } 148 return localeUsesCasing; 149 } 150 151 /** 152 * Creates a CSV summary of casing information over all locales for verification. 153 * 154 * @param outputFile 155 */ createCasingSummary(String outputFile, Map<String, Boolean> localeUsesCasing)156 private void createCasingSummary(String outputFile, Map<String, Boolean> localeUsesCasing) { 157 PrintWriter out; 158 try { 159 out = new PrintWriter(outputFile); 160 } catch (IOException e) { 161 e.printStackTrace(); 162 return; 163 } 164 165 // Header 166 out.print(","); 167 for (Category category : Category.values()) { 168 out.print("," + category.toString().replace('_', '-')); 169 } 170 out.println(); 171 out.print("Locale ID,Case"); 172 for (int i = 0; i < Category.values().length; i++) { 173 out.print("," + i); 174 } 175 out.println(); 176 177 Set<String> locales = casing.keySet(); 178 for (String localeID : locales) { 179 // Write casing information about the locale to file. 180 out.print(localeID); 181 out.print(","); 182 out.print(localeUsesCasing.get(localeID) ? "Y" : "N"); 183 Map<Category, CasingTypeAndErrFlag> types = casing.get(localeID); 184 for (Category category : Category.values()) { 185 CasingTypeAndErrFlag value = types.get(category); 186 out.print("," + value == null ? null : value.type().toString().charAt(0)); 187 } 188 out.println(); 189 out.flush(); 190 } 191 out.close(); 192 } 193 194 /** Writes casing information for the specified locale to XML format. */ createCasingXml(String localeID, Map<Category, CasingType> localeCasing)195 private void createCasingXml(String localeID, Map<Category, CasingType> localeCasing) { 196 // Load any existing overrides over casing info. 197 CasingHandler handler = loadFromXml(localeID); 198 Map<Category, CasingType> overrides = 199 handler == null ? new EnumMap<>(Category.class) : handler.getOverrides(); 200 localeCasing.putAll(overrides); 201 202 XMLSource source = new SimpleXMLSource(localeID); 203 for (Category category : Category.values()) { 204 if (category == Category.NOT_USED) continue; 205 CasingType type = localeCasing.get(category); 206 if (overrides.containsKey(category)) { 207 String path = 208 MessageFormat.format( 209 "//ldml/metadata/casingData/casingItem[@type=\"{0}\"][@override=\"true\"]", 210 category); 211 source.putValueAtPath(path, type.toString()); 212 } else if (type != CasingType.other) { 213 String path = "//ldml/metadata/casingData/casingItem[@type=\"" + category + "\"]"; 214 source.putValueAtPath(path, type.toString()); 215 } 216 } 217 CLDRFile cldrFile = new CLDRFile(source); 218 File casingFile = new File(CLDRPaths.GEN_DIRECTORY + "/casing", localeID + ".xml"); 219 220 try { 221 PrintWriter out = new PrintWriter(casingFile); 222 cldrFile.write(out); 223 out.close(); 224 } catch (IOException e) { 225 e.printStackTrace(); 226 } 227 } 228 229 /** 230 * Generates all the casing information and writes it to XML. A CSV summary of casing 231 * information is written to file if a filename argument is provided. 232 * 233 * @param args 234 */ main(String[] args)235 public static void main(String[] args) { 236 CasingInfo casingInfo = new CasingInfo(); 237 options.parse(args, true); 238 Map<String, Boolean> localeUsesCasing = 239 casingInfo.generateCasingInformation(options.get("locales").getValue()); 240 if (options.get("summary").doesOccur()) { 241 casingInfo.createCasingSummary(args[0], localeUsesCasing); 242 } 243 } 244 245 /** XML handler for parsing casing files. */ 246 private class CasingHandler extends XMLFileReader.SimpleHandler { 247 private Pattern localePattern = 248 PatternCache.get("//ldml/identity/language\\[@type=\"(\\w+)\"\\]"); 249 private String localeID; 250 private Map<Category, CasingTypeAndErrFlag> caseMap = new EnumMap<>(Category.class); 251 private Map<Category, CasingType> overrideMap = new EnumMap<>(Category.class); 252 253 @Override handlePathValue(String path, String value)254 public void handlePathValue(String path, String value) { 255 // Parse casing info. 256 if (path.contains("casingItem")) { 257 XPathParts parts = XPathParts.getFrozenInstance(path); 258 Category category = 259 Category.valueOf(parts.getAttributeValue(-1, "type").replace('-', '_')); 260 CasingType casingType = CasingType.valueOf(value); 261 boolean errFlag = Boolean.parseBoolean(parts.getAttributeValue(-1, "forceError")); 262 for (CasingTypeAndErrFlag typeAndFlag : CasingTypeAndErrFlag.values()) { 263 if (casingType == typeAndFlag.type() && errFlag == typeAndFlag.flag()) { 264 caseMap.put(category, typeAndFlag); 265 break; 266 } 267 } 268 if (Boolean.valueOf(parts.getAttributeValue(-1, "override"))) { 269 overrideMap.put(category, casingType); 270 } 271 } else { 272 // Parse the locale that the casing is for. 273 Matcher matcher = localePattern.matcher(path); 274 if (matcher.matches()) { 275 localeID = matcher.group(1); 276 } 277 } 278 } 279 addParsedResult(Map<String, Map<Category, CasingTypeAndErrFlag>> map)280 public void addParsedResult(Map<String, Map<Category, CasingTypeAndErrFlag>> map) { 281 map.put(localeID, caseMap); 282 } 283 getOverrides()284 public Map<Category, CasingType> getOverrides() { 285 return overrideMap; 286 } 287 } 288 } 289