1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Objects; 5 import com.google.common.collect.ImmutableSet; 6 import com.google.common.collect.ImmutableSetMultimap; 7 import com.google.common.collect.Multimap; 8 import com.google.common.collect.Multimaps; 9 import com.google.common.collect.SetMultimap; 10 import com.google.common.collect.TreeMultimap; 11 import com.ibm.icu.impl.Row.R2; 12 import com.ibm.icu.util.ICUUncheckedIOException; 13 import java.io.IOException; 14 import java.lang.invoke.MethodHandles; 15 import java.util.Collection; 16 import java.util.Date; 17 import java.util.EnumMap; 18 import java.util.LinkedHashMap; 19 import java.util.List; 20 import java.util.Locale; 21 import java.util.Map; 22 import java.util.Map.Entry; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import org.unicode.cldr.draft.ScriptMetadata; 27 import org.unicode.cldr.util.CLDRPaths; 28 import org.unicode.cldr.util.CLDRTool; 29 import org.unicode.cldr.util.DtdType; 30 import org.unicode.cldr.util.StandardCodes; 31 import org.unicode.cldr.util.StandardCodes.LstrField; 32 import org.unicode.cldr.util.StandardCodes.LstrType; 33 import org.unicode.cldr.util.StringRange; 34 import org.unicode.cldr.util.StringRange.Adder; 35 import org.unicode.cldr.util.SupplementalDataInfo; 36 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 37 import org.unicode.cldr.util.TempPrintWriter; 38 import org.unicode.cldr.util.Validity; 39 import org.unicode.cldr.util.Validity.Status; 40 41 @CLDRTool( 42 alias = "generate-validity-data", 43 url = "http://cldr.unicode.org/development/updating-codes/update-validity-xml") 44 public class GenerateValidityXml { 45 46 private static final Validity VALIDITY = Validity.getInstance(); 47 private static Validity OLD_VALIDITY = 48 Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/"); 49 50 private static final Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG = 51 StandardCodes.getEnumLstreg(); 52 private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(); 53 54 private static class MyAdder implements Adder { 55 Appendable target; 56 boolean twoCodePoints = false; 57 long lastCodePoint = -1; 58 59 @Override add(String start, String end)60 public void add(String start, String end) { 61 try { 62 long firstCodePoint = start.codePointAt(0); 63 if (twoCodePoints) { 64 firstCodePoint <<= 22; 65 firstCodePoint |= start.codePointAt(1); 66 } 67 if (firstCodePoint == lastCodePoint) { 68 target.append(' '); 69 } else { 70 target.append("\n\t\t\t"); 71 } 72 target.append(start); 73 if (end != null) { 74 target.append('~').append(end); 75 } 76 lastCodePoint = firstCodePoint; 77 } catch (IOException e) { 78 throw new ICUUncheckedIOException(e); 79 } 80 } 81 reset(boolean b)82 public void reset(boolean b) { 83 lastCodePoint = -1; 84 twoCodePoints = b; 85 } 86 } 87 88 static Set<String> containment = SDI.getContainers(); 89 static Map<String, Map<LstrField, String>> codeToData = LSTREG.get(LstrType.region); 90 91 static class Info { 92 String mainComment; 93 // private Relation<Validity.Status, String> statusMap = Relation.of(new 94 // EnumMap<Validity.Status, Set<String>>(Validity.Status.class), TreeSet.class); 95 Map<String, Validity.Status> codeToStatus = new TreeMap<>(); 96 Map<Validity.Status, String> statusComment = new EnumMap<>(Status.class); 97 Set<String> newCodes = new TreeSet<>(); 98 99 static Map<String, Info> types = new LinkedHashMap<>(); 100 getInfo(String myType)101 static Info getInfo(String myType) { 102 Info info = types.get(myType); 103 if (info == null) { 104 types.put(myType, info = new Info()); 105 } 106 return info; 107 } 108 getStatusMap()109 public SetMultimap<Status, String> getStatusMap() { 110 TreeMultimap<Status, String> result = TreeMultimap.create(); 111 Multimaps.invertFrom(Multimaps.forMap(codeToStatus), result); 112 return ImmutableSetMultimap.copyOf(result); 113 } 114 put(String key, Status value)115 public void put(String key, Status value) { 116 codeToStatus.put(key, value); 117 } 118 remove(String key, Status value)119 public void remove(String key, Status value) { 120 codeToStatus.remove(key, value); 121 } 122 clear()123 public void clear() { 124 codeToStatus.clear(); 125 } 126 entrySet()127 public Set<Entry<String, Status>> entrySet() { 128 return codeToStatus.entrySet(); 129 } 130 get(String key)131 public Status get(String key) { 132 return codeToStatus.get(key); 133 } 134 putBest(String currency, Status newStatus)135 public void putBest(String currency, Status newStatus) { 136 Status oldStatus = get(currency); 137 if (oldStatus == null || newStatus.compareTo(oldStatus) < 0) { 138 put(currency, newStatus); 139 } 140 } 141 } 142 143 static final Map<String, Info> types = Info.types; 144 main(String[] args)145 public static void main(String[] args) throws IOException { 146 147 doLstr(types); 148 doSubdivisions(types); 149 doCurrency(types); 150 // write file 151 MyAdder adder = new MyAdder(); 152 for (Entry<String, Info> entry : types.entrySet()) { 153 String type = entry.getKey(); 154 final Info info = entry.getValue(); 155 Multimap<Status, String> subtypeMap = info.getStatusMap(); 156 try (TempPrintWriter output = 157 TempPrintWriter.openUTF8Writer( 158 CLDRPaths.COMMON_DIRECTORY, "validity/" + type + ".xml") 159 .skipCopyright(true)) { 160 adder.target = output; 161 output.append( 162 DtdType.supplementalData.header(MethodHandles.lookup().lookupClass()) 163 + "\t<version number=\"$Revision" 164 + "$\"/>\n" 165 + "\t<idValidity>\n"); 166 for (Entry<Status, Collection<String>> entry2 : subtypeMap.asMap().entrySet()) { 167 Validity.Status subtype = entry2.getKey(); 168 Set<String> set = (Set<String>) entry2.getValue(); 169 String comment = info.statusComment.get(entry2.getKey()); 170 if (comment != null) { 171 output.append("\t\t<!-- " + comment.replace("\n", "\n\t\t\t ") + " -->\n"); 172 } 173 output.append("\t\t<id type='" + type + "' idStatus='" + subtype + "'>"); 174 final int size = set.size(); 175 output.append( 176 "\t\t<!-- " 177 + size 178 + " item" 179 + (size > 1 ? "s" : "") // we know it’s English ;-) 180 + " -->"); 181 adder.reset(size > 600); // || type.equals("subdivision") 182 StringRange.compact(set, adder, true); 183 output.append("\n\t\t</id>\n"); 184 } 185 // if (!info.newCodes.isEmpty()) { 186 // output.append("\t\t<!-- Codes added this release:\n\t\t\t" + 187 // showCodes(info.newCodes, "\n\t\t\t") + "\n\t\t-->\n"); 188 // } 189 output.append("\t</idValidity>\n</supplementalData>\n"); 190 } 191 } 192 // System.out.println("TODO: add Unknown subdivisions, add private_use currencies, ..."); 193 } 194 showCodes(Set<String> newCodes, String linePrefix)195 private static String showCodes(Set<String> newCodes, String linePrefix) { 196 StringBuilder result = new StringBuilder(); 197 String last = ""; 198 for (String s : newCodes) { 199 String newPrefix = s.substring(0, s.indexOf('-')); 200 if (last.equals(newPrefix)) { 201 result.append(" "); 202 } else { 203 if (!last.isEmpty()) { 204 result.append(linePrefix); 205 } 206 last = newPrefix; 207 } 208 result.append(s); 209 } 210 return result.toString(); 211 } 212 doCurrency(Map<String, Info> types)213 private static void doCurrency(Map<String, Info> types) { 214 Info info = Info.getInfo("currency"); 215 Date now = new Date(); 216 Date eoy = new Date(now.getYear() + 1, 0, 1); // Dec 217 for (String region : SDI.getCurrencyTerritories()) { 218 for (CurrencyDateInfo data : SDI.getCurrencyDateInfo(region)) { 219 String currency = data.getCurrency(); 220 Date end = data.getEnd(); 221 boolean legalTender = data.isLegalTender(); 222 Status newStatus = 223 end.after(eoy) && legalTender ? Status.regular : Status.deprecated; 224 info.putBest(currency, newStatus); 225 } 226 } 227 info.put(LstrType.currency.unknown, Status.unknown); 228 // make sure we don't overlap. 229 // we want to keep any code that is valid in any territory, so 230 info.remove("XXX", Status.deprecated); 231 info.remove("XXX", Status.regular); 232 233 // just to make sure info never disappears 234 Map<String, Status> oldCodes = OLD_VALIDITY.getCodeToStatus(LstrType.currency); 235 for (Entry<String, Status> entry : oldCodes.entrySet()) { 236 String key = entry.getKey(); 237 Status oldStatus = entry.getValue(); 238 Status newStatus = info.get(key); 239 if (!Objects.equal(oldStatus, newStatus)) { 240 System.out.println( 241 "Status changed: " + key + ", " + oldStatus + " => " + newStatus); 242 } 243 } 244 245 info.statusComment.put( 246 Status.deprecated, 247 "Deprecated values are those that are not legal tender in some country after " 248 + (1900 + now.getYear()) 249 + ".\n" 250 + "More detailed usage information needed for some implementations is in supplemental data."); 251 } 252 doSubdivisions(Map<String, Info> types)253 private static void doSubdivisions(Map<String, Info> types) { 254 Info info = Info.getInfo("subdivision"); 255 Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get("subdivision"); 256 for (String container : SDI.getContainersForSubdivisions()) { 257 for (String contained : SDI.getContainedSubdivisions(container)) { 258 Status status = 259 aliases.containsKey(contained) 260 ? Validity.Status.deprecated 261 : Validity.Status.regular; 262 info.put(contained.toLowerCase(Locale.ROOT).replace("-", ""), status); 263 } 264 } 265 266 // find out which items were valid, but are no longer in the containment map 267 // add them as deprecated 268 Map<Status, Set<String>> oldSubdivisionData = 269 OLD_VALIDITY.getStatusToCodes(LstrType.subdivision); 270 for (Entry<Status, Set<String>> entry : oldSubdivisionData.entrySet()) { 271 for (String oldSdId : entry.getValue()) { 272 if (info.get(oldSdId) == null) { 273 info.put(oldSdId, Status.deprecated); 274 } 275 } 276 } 277 278 info.statusComment.put( 279 Status.deprecated, 280 "Deprecated values include those that are not formally deprecated in the country in question, but have their own region codes.\n" 281 + "It also include codes that were previously in CLDR, for compatibility."); 282 info.statusComment.put( 283 Status.unknown, 284 "Unknown/Undetermined subdivision codes (ZZZZ) are defined for all regular region codes."); 285 } 286 287 static final Set<String> VARIANTS = 288 Set.of( // variants 289 "Aran", 290 "Cyrs", 291 "Hans", 292 "Hant", 293 "Latf", 294 "Latg", 295 "Syre", 296 "Syrj", 297 "Syrn", 298 // composites 299 "Hanb", 300 "Jpan", 301 "Hrkt", 302 "Kore", 303 // subsets 304 "Jamo"); 305 doLstr(Map<String, Info> types)306 private static void doLstr(Map<String, Info> types) throws IOException { 307 Set<String> skippedScripts = new TreeSet<>(); 308 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : LSTREG.entrySet()) { 309 LstrType type = entry.getKey(); 310 if (!type.isLstr || !type.isUnicode) { 311 continue; 312 } 313 Info info = Info.getInfo(type.toString()); 314 Map<String, R2<List<String>, String>> aliases = 315 SDI.getLocaleAliasInfo() 316 .get(type == LstrType.region ? "territory" : type.toString()); 317 if (aliases == null) { 318 System.out.println("No aliases for: " + type); 319 } 320 // gather data 321 info.clear(); 322 for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) { 323 String code = entry2.getKey(); 324 if (type == LstrType.language && code.equals("aam") 325 || type == LstrType.variant && code.equals("arevela") 326 || type == LstrType.extlang && code.equals("lsg")) { 327 int debug = 0; 328 } 329 Map<LstrField, String> data = entry2.getValue(); 330 Validity.Status subtype = Validity.Status.regular; 331 if (code.equals(type.unknown)) { 332 subtype = Validity.Status.unknown; 333 } else if (type.specials.contains(code)) { 334 subtype = Validity.Status.special; 335 } else if (aliases != null && aliases.containsKey(code) 336 || data.containsKey(LstrField.Deprecated)) { 337 subtype = Validity.Status.deprecated; 338 } else if (data.get(LstrField.Description).startsWith("Private use")) { 339 subtype = Validity.Status.private_use; 340 } 341 switch (type) { 342 case language: 343 if (subtype == Status.private_use && code.compareTo("qfz") < 0) { 344 subtype = Status.reserved; 345 } else if (code.equals("root")) { 346 continue; 347 } 348 break; 349 case region: 350 if (containment.contains(code)) { 351 subtype = Validity.Status.macroregion; 352 } else if (code.equals("XA") || code.equals("XB")) { 353 subtype = Validity.Status.special; 354 } 355 switch (subtype) { 356 case regular: 357 Info subInfo = Info.getInfo("subdivision"); 358 subInfo.put(code.toLowerCase(Locale.ROOT) + "zzzz", Status.unknown); 359 break; 360 case private_use: 361 if (code.compareTo("X") < 0) { 362 subtype = Status.reserved; 363 } 364 break; 365 default: 366 break; 367 } 368 break; 369 case script: 370 switch (code) { 371 // extra specials 372 case "Qaag": 373 case "Zinh": 374 case "Zsye": 375 case "Zyyy": 376 subtype = Status.special; 377 break; 378 default: 379 switch (subtype) { 380 case private_use: 381 if (code.compareTo("Qaaq") < 0) { 382 subtype = Validity.Status.reserved; 383 } 384 break; 385 case regular: 386 ScriptMetadata.Info scriptInfo = 387 ScriptMetadata.getInfo(code); 388 if (scriptInfo == null && !VARIANTS.contains(code)) { 389 skippedScripts.add(code); 390 continue; 391 } 392 break; 393 default: // don't care about rest 394 break; 395 } 396 break; 397 } 398 break; 399 case variant: 400 if (VARIANT_EXTRAS.contains(code)) { 401 continue; 402 } 403 default: 404 break; 405 } 406 info.put(code, subtype); 407 } 408 } 409 System.out.println("Skipping non-Unicode scripts: " + Joiner.on(' ').join(skippedScripts)); 410 } 411 412 static final Set<String> VARIANT_EXTRAS = ImmutableSet.of("POSIX", "REVISED", "SAAHO"); 413 } 414