1 package org.unicode.cldr.util; 2 3 import com.ibm.icu.util.ICUUncheckedIOException; 4 import java.io.BufferedReader; 5 import java.io.IOException; 6 import java.util.ArrayList; 7 import java.util.Arrays; 8 import java.util.Collections; 9 import java.util.Comparator; 10 import java.util.HashSet; 11 import java.util.Iterator; 12 import java.util.List; 13 import java.util.Locale; 14 import java.util.Map; 15 import java.util.Set; 16 import java.util.TreeMap; 17 import java.util.TreeSet; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20 21 public class ZoneParser { 22 static final boolean DEBUG = false; 23 24 private String version; 25 26 private Map<String, String> zone_to_country; 27 28 private Map<String, Set<String>> country_to_zoneSet; 29 30 /** 31 * @return mapping from zone id to country. If a zone has no country, then XX is used. 32 */ getZoneToCounty()33 public Map<String, String> getZoneToCounty() { 34 if (zone_to_country == null) make_zone_to_country(); 35 return zone_to_country; 36 } 37 38 /** 39 * @return mapping from country to zoneid. If a zone has no country, then XX is used. 40 */ getCountryToZoneSet()41 public Map<String, Set<String>> getCountryToZoneSet() { 42 if (country_to_zoneSet == null) make_zone_to_country(); 43 return country_to_zoneSet; 44 } 45 46 /** 47 * @return map from tzids to a list: latitude, longitude, country, comment?. + = N or E 48 */ getZoneData()49 public Map<String, List<String>> getZoneData() { 50 if (zoneData == null) makeZoneData(); 51 return zoneData; 52 } 53 getDeprecatedZoneIDs()54 public List<String> getDeprecatedZoneIDs() { 55 return Arrays.asList(FIX_DEPRECATED_ZONE_DATA); 56 } 57 58 /** */ make_zone_to_country()59 private void make_zone_to_country() { 60 zone_to_country = new TreeMap<>(TZIDComparator); 61 country_to_zoneSet = new TreeMap<>(); 62 // Map aliasMap = getAliasMap(); 63 Map<String, List<String>> zoneData = getZoneData(); 64 for (String zone : zoneData.keySet()) { 65 String country = zoneData.get(zone).get(2); 66 zone_to_country.put(zone, country); 67 Set<String> s = country_to_zoneSet.get(country); 68 if (s == null) country_to_zoneSet.put(country, s = new TreeSet<>()); 69 s.add(zone); 70 } 71 /* 72 * Set territories = getAvailableCodes("territory"); for (Iterator it = 73 * territories.iterator(); it.hasNext();) { String code = (String) 74 * it.next(); String[] zones = TimeZone.getAvailableIDs(code); for (int i = 75 * 0; i < zones.length; ++i) { if (aliasMap.get(zones[i]) != null) continue; 76 * zone_to_country.put(zones[i], code); } } String[] zones = 77 * TimeZone.getAvailableIDs(); for (int i = 0; i < zones.length; ++i) { if 78 * (aliasMap.get(zones[i]) != null) continue; if 79 * (zone_to_country.get(zones[i]) == null) { zone_to_country.put(zones[i], 80 * NO_COUNTRY); } } for (Iterator it = zone_to_country.keySet().iterator(); 81 * it.hasNext();) { String tzid = (String) it.next(); String country = 82 * (String) zone_to_country.get(tzid); Set s = (Set) 83 * country_to_zoneSet.get(country); if (s == null) 84 * country_to_zoneSet.put(country, s = new TreeSet()); s.add(tzid); } 85 */ 86 // protect 87 zone_to_country = Collections.unmodifiableMap(zone_to_country); 88 country_to_zoneSet = CldrUtility.protectCollection(country_to_zoneSet); 89 } 90 91 /** 92 * private Map bogusZones = null; 93 * 94 * <p>private Map getAliasMap() { if (bogusZones == null) { try { bogusZones = new TreeMap(); 95 * BufferedReader in = Utility.getUTF8Data"TimeZoneAliases.txt"); while (true) { String line = 96 * in.readLine(); if (line == null) break; line = line.trim(); int pos = line.indexOf('#'); if 97 * (pos >= 0) { skippedAliases.add(line); line = line.substring(0,pos).trim(); } if 98 * (line.length() == 0) continue; List pieces = Utility.splitList(line,';', true); 99 * bogusZones.put(pieces.get(0), pieces.get(1)); } in.close(); } catch (IOException e) { throw 100 * new IllegalArgumentException("Can't find timezone aliases"); } } return bogusZones; } 101 */ 102 Map<String, List<String>> zoneData; 103 104 Set<String> skippedAliases = new TreeSet<>(); 105 106 /* 107 * # This file contains a table with the following columns: # 1. ISO 3166 108 * 2-character country code. See the file `iso3166.tab'. # 2. Latitude and 109 * longitude of the zone's principal location # in ISO 6709 110 * sign-degrees-minutes-seconds format, # either +-DDMM+-DDDMM or 111 * +-DDMMSS+-DDDMMSS, # first latitude (+ is north), then longitude (+ is 112 * east). # 3. Zone name used in value of TZ environment variable. # 4. 113 * Comments; present if and only if the country has multiple rows. # # Columns 114 * are separated by a single tab. 115 */ parseYear(String year, int defaultValue)116 static int parseYear(String year, int defaultValue) { 117 if ("only".startsWith(year)) return defaultValue; 118 if ("minimum".startsWith(year)) return Integer.MIN_VALUE; 119 if ("maximum".startsWith(year)) return Integer.MAX_VALUE; 120 return Integer.parseInt(year); 121 } 122 123 public static class Time { 124 public int seconds; 125 public byte type; 126 static final byte WALL = 0, STANDARD = 1, UNIVERSAL = 2; 127 Time(String in)128 Time(String in) { 129 if (in.equals("-")) return; // zero/WALL is the default 130 char suffix = in.charAt(in.length() - 1); 131 switch (suffix) { 132 case 'w': 133 in = in.substring(0, in.length() - 1); 134 break; 135 case 's': 136 in = in.substring(0, in.length() - 1); 137 type = STANDARD; 138 break; 139 case 'u': 140 case 'g': 141 case 'z': 142 in = in.substring(0, in.length() - 1); 143 type = UNIVERSAL; 144 break; 145 } 146 seconds = parseSeconds(in, false); 147 } 148 parseSeconds(String in, boolean allowNegative)149 public static int parseSeconds(String in, boolean allowNegative) { 150 boolean negative = false; 151 if (in.startsWith("-")) { 152 assert (allowNegative); 153 negative = true; 154 in = in.substring(1); 155 } 156 String[] pieces = in.split(":"); 157 int multiplier = 3600; 158 int result = 0; 159 for (int i = 0; i < pieces.length; ++i) { 160 result += multiplier * Integer.parseInt(pieces[i]); 161 multiplier /= 60; 162 assert (multiplier >= 0); 163 } 164 if (negative) result = -result; 165 return result; 166 } 167 168 @Override toString()169 public String toString() { 170 return BoilerplateUtilities.toStringHelper(this); 171 } 172 } 173 174 static final String[] months = { 175 "january", 176 "february", 177 "march", 178 "april", 179 "may", 180 "june", 181 "july", 182 "august", 183 "september", 184 "october", 185 "november", 186 "december" 187 }; 188 static final String[] weekdays = { 189 "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday" 190 }; 191 findStartsWith(String value, String[] array, boolean exact)192 static int findStartsWith(String value, String[] array, boolean exact) { 193 value = value.toLowerCase(Locale.ENGLISH); 194 for (int i = 0; i < array.length; ++i) { 195 if (array[i].startsWith(value)) return i; 196 } 197 throw new IllegalArgumentException("Can't find " + value + " in " + Arrays.asList(months)); 198 } 199 200 static Pattern dayPattern = 201 PatternCache.get("([0-9]+)|(last)([a-z]+)|([a-z]+)([<=>]+)([0-9]+)"); 202 static final String[] relations = {"<=", ">="}; 203 204 public static class Day implements Comparable<Object> { 205 public int number; 206 public byte relation; 207 public int weekDay; 208 static final byte NONE = 0, LEQ = 2, GEQ = 4; 209 Day(String value)210 Day(String value) { 211 value = value.toLowerCase(); 212 Matcher matcher = dayPattern.matcher(value); 213 if (!matcher.matches()) { 214 throw new IllegalArgumentException(); 215 } 216 if (matcher.group(1) != null) { 217 number = Integer.parseInt(matcher.group(1)); 218 return; 219 } 220 if (matcher.group(2) != null) { 221 weekDay = findStartsWith(matcher.group(3), weekdays, false); 222 number = 31; 223 relation = LEQ; 224 return; 225 } 226 if (matcher.group(4) != null) { 227 weekDay = findStartsWith(matcher.group(4), weekdays, false); 228 relation = (byte) findStartsWith(matcher.group(5), relations, false); 229 number = Integer.parseInt(matcher.group(6)); 230 return; 231 } 232 throw new IllegalArgumentException(); 233 } 234 235 @Override toString()236 public String toString() { 237 return BoilerplateUtilities.toStringHelper(this); 238 } 239 240 @Override compareTo(Object other)241 public int compareTo(Object other) { 242 return toString().compareTo(other.toString()); 243 } 244 } 245 246 /** 247 * A rule line has the form 248 * 249 * <p>Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S 250 * 251 * <p>For example: 252 * 253 * <p>Rule US 1967 1973 - Apr lastSun 2:00 1:00 D 254 * 255 * <p>The fields that make up a rule line are: 256 * 257 * <p>NAME Gives the (arbitrary) name of the set of rules this rule is part of. 258 * 259 * <p>FROM Gives the first year in which the rule applies. Any integer year can be supplied; the 260 * Gregorian calendar is assumed. The word minimum (or an abbreviation) means the minimum year 261 * representable as an integer. The word maximum (or an abbreviation) means the maximum year 262 * representable as an integer. Rules can describe times that are not representable as time 263 * values, with the unrepresentable times ignored; this allows rules to be portable among hosts 264 * with differing time value types. 265 * 266 * <p>TO Gives the final year in which the rule applies. In addition to minimum and maximum (as 267 * above), the word only (or an abbreviation) may be used to repeat the value of the FROM field. 268 * 269 * <p>TYPE Gives the type of year in which the rule applies. If TYPE is - then the rule applies 270 * in all years between FROM and TO inclusive. If TYPE is something else, then zic executes the 271 * command yearistype year type to check the type of a year: an exit status of zero is taken to 272 * mean that the year is of the given type; an exit status of one is taken to mean that the year 273 * is not of the given type. 274 * 275 * <p>IN Names the month in which the rule takes effect. Month names may be abbreviated. 276 * 277 * <p>ON Gives the day on which the rule takes effect. Recognized forms include: 278 * 279 * <p>5 the fifth of the month lastSun the last Sunday in the month lastMon the last Monday in 280 * the month Sun>=8 first Sunday on or after the eighth Sun<=25 last Sunday on or before the 281 * 25th 282 * 283 * <p>Names of days of the week may be abbreviated or spelled out in full. Note that there must 284 * be no spaces within the ON field. 285 * 286 * <p>AT Gives the time of day at which the rule takes effect. Recognized forms include: 287 * 288 * <p>2 time in hours 2:00 time in hours and minutes 15:00 24-hour format time (for times after 289 * noon) 1:28:14 time in hours, minutes, and seconds - equivalent to 0 290 * 291 * <p>where hour 0 is midnight at the start of the day, and hour 24 is midnight at the end of 292 * the day. Any of these forms may be followed by the letter w if the given time is local "wall 293 * clock" time, s if the given time is local "standard" time, or u (or g or z) if the given time 294 * is universal time; in the absence of an indicator, wall clock time is assumed. ** cannot be 295 * negative 296 * 297 * <p>SAVE Gives the amount of time to be added to local standard time when the rule is in 298 * effect. This field has the same format as the AT field (although, of course, the w and s 299 * suffixes are not used). ** can be positive or negative 300 * 301 * <p>LETTER/S Gives the "variable part" (for example, the "S" or "D" in "EST" or "EDT") of time 302 * zone abbreviations to be used when this rule is in effect. If this field is -, the variable 303 * part is null. 304 */ 305 public static class RuleLine { 306 public static Set<String> types = new TreeSet<>(); 307 public static Set<Day> days = new TreeSet<>(); 308 static Set<Integer> saves = new TreeSet<>(); 309 RuleLine(List<String> l)310 RuleLine(List<String> l) { 311 fromYear = parseYear(l.get(0), 0); 312 toYear = parseYear(l.get(1), fromYear); 313 type = l.get(2); 314 if (type.equals("-")) type = null; 315 month = 1 + findStartsWith(l.get(3), months, false); 316 day = new Day(l.get(4)); 317 time = new Time(l.get(5)); 318 save = Time.parseSeconds(l.get(6), true); 319 letter = l.get(7); 320 if (letter.equals("-")) letter = null; 321 if (type != null) types.add(type); 322 days.add(day); 323 } 324 325 @Override toString()326 public String toString() { 327 return BoilerplateUtilities.toStringHelper(this); 328 } 329 330 public int fromYear; 331 332 public int toYear; 333 334 public String type; 335 336 public int month; 337 338 public Day day; 339 340 public Time time; 341 342 public int save; 343 344 public String letter; 345 346 public static final int FIELD_COUNT = 8; // excluding Rule, Name 347 } 348 349 /** 350 * A zone line has the form 351 * 352 * <p>Zone NAME GMTOFF RULES/SAVE FORMAT [UNTIL] 353 * 354 * <p>For example: 355 * 356 * <p>Zone Australia/Adelaide 9:30 Aus CST 1971 Oct 31 2:00 357 * 358 * <p>The fields that make up a zone line are: 359 * 360 * <p>NAME The name of the time zone. This is the name used in creating the time conversion 361 * information file for the zone. 362 * 363 * <p>GMTOFF The amount of time to add to UTC to get standard time in this zone. This field has 364 * the same format as the AT and SAVE fields of rule lines; begin the field with a minus sign if 365 * time must be subtracted from UTC. 366 * 367 * <p>RULES/SAVE The name of the rule(s) that apply in the time zone or, alternately, an amount 368 * of time to add to local standard time. If this field is - then standard time always applies 369 * in the time zone. 370 * 371 * <p>FORMAT The format for time zone abbreviations in this time zone. The pair of characters %s 372 * is used to show where the "variable part" of the time zone abbreviation goes. Alternately, a 373 * slash (/) separates standard and daylight abbreviations. 374 * 375 * <p>UNTIL The time at which the UTC offset or the rule(s) change for a location. It is 376 * specified as a year, a month, a day, and a time of day. If this is specified, the time zone 377 * information is generated from the given UTC offset and rule change until the time specified. 378 * The month, day, and time of day have the same format as the IN, ON, and AT columns of a rule; 379 * trailing columns can be omitted, and default to the earliest possible value for the missing 380 * columns. 381 * 382 * <p>The next line must be a "continuation" line; this has the same form as a zone line except 383 * that the string "Zone" and the name are omitted, as the continuation line will place 384 * information starting at the time specified as the UNTIL field in the previous line in the 385 * file used by the previous line. Continuation lines may contain an UNTIL field, just as zone 386 * lines do, indicating that the next line is a further continuation. 387 */ 388 public static class ZoneLine { 389 public static Set<Day> untilDays = new TreeSet<>(); 390 public static Set<String> rulesSaves = new TreeSet<>(); 391 ZoneLine(List<String> l)392 ZoneLine(List<String> l) { 393 gmtOff = Time.parseSeconds(l.get(0), true); 394 rulesSave = l.get(1); 395 if (rulesSave.equals("-")) rulesSave = "0"; 396 else if (rulesSave.charAt(0) < 'A') 397 rulesSave = "" + Time.parseSeconds(rulesSave, false); 398 399 format = l.get(2); 400 switch (l.size()) { 401 case 7: 402 untilTime = new Time(l.get(6)); // fall through 403 case 6: 404 untilDay = new Day(l.get(5)); // fall through 405 untilDays.add(untilDay); 406 case 5: 407 untilMonth = 1 + findStartsWith(l.get(4), months, false); // fall through 408 case 4: 409 untilYear = parseYear(l.get(3), Integer.MAX_VALUE); // fall through 410 case 3: 411 break; // ok 412 default: 413 throw new IllegalArgumentException("Wrong field count: " + l); 414 } 415 rulesSaves.add(rulesSave); 416 } 417 418 @Override toString()419 public String toString() { 420 return BoilerplateUtilities.toStringHelper(this); 421 } 422 423 public int gmtOff; 424 425 public String rulesSave; 426 427 public String format; 428 429 public int untilYear = Integer.MAX_VALUE; // indicating continuation 430 431 public int untilMonth; 432 433 public Day untilDay; 434 435 public Time untilTime; 436 437 public String comment; 438 439 public static final int FIELD_COUNT = 3; // excluding Zone, Name 440 441 public static final int FIELD_COUNT_UNTIL = 7; // excluding Zone, Name 442 } 443 444 Map<String, List<RuleLine>> ruleID_rules = new TreeMap<>(); 445 446 Map<String, List<ZoneLine>> zone_rules = new TreeMap<>(); 447 448 Map<String, String> linkold_new = new TreeMap<>(); 449 450 Map<String, Set<String>> linkNew_oldSet = new TreeMap<>(); 451 452 public class Transition { 453 public long date; 454 public long offset; 455 public String abbreviation; 456 } 457 458 public class TransitionList { 459 addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear)460 void addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear) { 461 // add everything between the zonelines 462 if (lastZoneLine == null) { 463 return; 464 } 465 startYear = Math.max(startYear, lastZoneLine.untilYear); 466 endYear = Math.min(endYear, zoneLine.untilYear); 467 int gmtOffset = lastZoneLine.gmtOff; 468 for (int year = startYear; year <= endYear; ++year) { 469 resolveTime( 470 gmtOffset, 471 lastZoneLine.untilYear, 472 lastZoneLine.untilMonth, 473 lastZoneLine.untilDay, 474 lastZoneLine.untilTime); 475 } 476 } 477 resolveTime( int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime)478 private long resolveTime( 479 int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime) { 480 return 0; 481 } 482 } 483 getTransitions(String zoneID, int startYear, int endYear)484 public TransitionList getTransitions(String zoneID, int startYear, int endYear) { 485 TransitionList results = new TransitionList(); 486 List<ZoneLine> rules = zone_rules.get(zoneID); 487 ZoneLine lastZoneLine = null; 488 for (ZoneLine zoneLine : rules) { 489 results.addTransitions(lastZoneLine, zoneLine, startYear, endYear); 490 lastZoneLine = zoneLine; 491 } 492 return results; 493 } 494 getTZIDComparator()495 public Comparator<String> getTZIDComparator() { 496 return TZIDComparator; 497 } 498 499 private static List<String> errorData = 500 Arrays.asList( 501 new String[] { 502 String.valueOf(Double.MIN_VALUE), String.valueOf(Double.MIN_VALUE), "" 503 }); 504 505 private Comparator<String> TZIDComparator = 506 new Comparator<>() { 507 Map<String, List<String>> data = getZoneData(); 508 509 @Override 510 public int compare(String s1, String s2) { 511 List<String> data1 = getData(s1); 512 List<String> data2 = getData(s2); 513 int result; 514 // country 515 String country1 = data1.get(2); 516 String country2 = data2.get(2); 517 518 if ((result = country1.compareTo(country2)) != 0) return result; 519 // longitude 520 Double d1 = Double.valueOf(data1.get(1)); 521 Double d2 = Double.valueOf(data2.get(1)); 522 if ((result = d1.compareTo(d2)) != 0) return result; 523 // latitude 524 d1 = Double.valueOf(data1.get(0)); 525 d2 = Double.valueOf(data2.get(0)); 526 if ((result = d1.compareTo(d2)) != 0) return result; 527 // name 528 return s1.compareTo(s2); 529 } 530 531 /** 532 * Get timezone data for the given location Include work-arounds for missing time 533 * zones 534 * 535 * @param s the string like "Australia/Currie" 536 * @return a list of 4 strings for latitude, longitude, country, city 537 * <p>Reference: https://unicode-org.atlassian.net/browse/CLDR-14428 538 */ 539 private List<String> getData(String s) { 540 List<String> d = data.get(s); 541 if (d == null) { 542 String sNew = linkold_new.get(s); 543 if (sNew != null) { 544 d = data.get(sNew); 545 } 546 if (d == null) { 547 d = errorData; 548 } 549 } 550 return d; 551 } 552 }; 553 554 public static MapComparator<String> regionalCompare = new MapComparator<>(); 555 556 static { 557 regionalCompare.add("America"); 558 regionalCompare.add("Atlantic"); 559 regionalCompare.add("Europe"); 560 regionalCompare.add("Africa"); 561 regionalCompare.add("Asia"); 562 regionalCompare.add("Indian"); 563 regionalCompare.add("Australia"); 564 regionalCompare.add("Pacific"); 565 regionalCompare.add("Arctic"); 566 regionalCompare.add("Antarctica"); 567 regionalCompare.add("Etc"); 568 } 569 570 private static String[] TZFiles = { 571 "africa", 572 "antarctica", 573 "asia", 574 "australasia", 575 "backward", 576 "etcetera", 577 "europe", 578 "northamerica", 579 "southamerica" 580 }; 581 582 private static Map<String, String> FIX_UNSTABLE_TZIDS; 583 584 private static Set<String> SKIP_LINKS = 585 new HashSet<>( 586 Arrays.asList( 587 new String[] { 588 "America/Montreal", "America/Toronto", 589 "America/Santa_Isabel", "America/Tijuana" 590 })); 591 592 private static Set<String> PREFERRED_BASES = 593 new HashSet<>(Arrays.asList(new String[] {"Europe/London"})); 594 595 private static String[][] ADD_ZONE_ALIASES_DATA = { 596 {"Etc/UCT", "Etc/UTC"}, 597 {"EST", "Etc/GMT+5"}, 598 {"MST", "Etc/GMT+7"}, 599 {"HST", "Etc/GMT+10"}, 600 {"SystemV/AST4", "Etc/GMT+4"}, 601 {"SystemV/EST5", "Etc/GMT+5"}, 602 {"SystemV/CST6", "Etc/GMT+6"}, 603 {"SystemV/MST7", "Etc/GMT+7"}, 604 {"SystemV/PST8", "Etc/GMT+8"}, 605 {"SystemV/YST9", "Etc/GMT+9"}, 606 {"SystemV/HST10", "Etc/GMT+10"}, 607 }; 608 609 static String[] FIX_DEPRECATED_ZONE_DATA = { 610 "Africa/Timbuktu", 611 "America/Argentina/ComodRivadavia", 612 "America/Santa_Isabel", 613 "Europe/Belfast", 614 "Pacific/Yap", 615 "Antarctica/South_Pole", 616 "America/Shiprock", 617 "America/Montreal", 618 "Asia/Chongqing", 619 "Asia/Harbin", 620 "Asia/Kashgar" 621 }; 622 623 static { 624 // The format is <new name>, <old name> 625 String[][] FIX_UNSTABLE_TZID_DATA = 626 new String[][] { 627 {"America/Atikokan", "America/Coral_Harbour"}, 628 {"America/Argentina/Buenos_Aires", "America/Buenos_Aires"}, 629 {"America/Argentina/Catamarca", "America/Catamarca"}, 630 {"America/Argentina/Cordoba", "America/Cordoba"}, 631 {"America/Argentina/Jujuy", "America/Jujuy"}, 632 {"America/Argentina/Mendoza", "America/Mendoza"}, 633 {"America/Nuuk", "America/Godthab"}, 634 {"America/Kentucky/Louisville", "America/Louisville"}, 635 {"America/Indiana/Indianapolis", "America/Indianapolis"}, 636 {"Africa/Asmara", "Africa/Asmera"}, 637 {"Atlantic/Faroe", "Atlantic/Faeroe"}, 638 {"Asia/Kolkata", "Asia/Calcutta"}, 639 {"Asia/Ho_Chi_Minh", "Asia/Saigon"}, 640 {"Asia/Yangon", "Asia/Rangoon"}, 641 {"Asia/Kathmandu", "Asia/Katmandu"}, 642 {"Europe/Kyiv", "Europe/Kiev"}, 643 {"Pacific/Pohnpei", "Pacific/Ponape"}, 644 {"Pacific/Chuuk", "Pacific/Truk"}, 645 {"Pacific/Honolulu", "Pacific/Johnston"} 646 }; 647 FIX_UNSTABLE_TZIDS = CldrUtility.asMap(FIX_UNSTABLE_TZID_DATA); 648 } 649 650 // CLDR canonical zone IDs removed from zone.tab are defined here. 651 // When these zones are deprecated in CLDR, remove them from this array. 652 // See CLDR-16049 653 static final String[][] SUPPLEMENTAL_ZONE_ID_DATA = { 654 {"Europe/Uzhgorod", "UA", "+4837+02218"}, // 2022d 655 {"Europe/Zaporozhye", "UA", "+4750+03510"}, // 2022d 656 {"America/Nipigon", "CA", "+4901-08816"}, // 2022f 657 {"America/Rainy_River", "CA", "+4843-09434"}, // 2022f 658 {"America/Thunder_Bay", "CA", "+4823-08915"}, // 2022f 659 {"America/Pangnirtung", "CA", "+6608-06544"}, // 2022g 660 }; 661 662 /** */ makeZoneData()663 private void makeZoneData() { 664 try { 665 // get version 666 BufferedReader versionIn = CldrUtility.getUTF8Data("tzdb-version.txt"); 667 version = versionIn.readLine(); 668 if (!version.matches("[0-9]{4}[a-z]")) { 669 throw new IllegalArgumentException( 670 String.format( 671 "Bad Version number: %s, should be of the form 2007x", version)); 672 } 673 versionIn.close(); 674 675 // String deg = "([+-][0-9]+)";// 676 String deg = "([+-])([0-9][0-9][0-9]?)([0-9][0-9])([0-9][0-9])?"; // 677 Matcher m = PatternCache.get(deg + deg).matcher(""); 678 zoneData = new TreeMap<>(); 679 BufferedReader in = CldrUtility.getUTF8Data("zone.tab"); 680 while (true) { 681 String line = in.readLine(); 682 if (line == null) break; 683 line = line.trim(); 684 int pos = line.indexOf('#'); 685 if (pos >= 0) { 686 skippedAliases.add(line); 687 line = line.substring(0, pos).trim(); 688 } 689 if (line.length() == 0) continue; 690 List<String> pieces = CldrUtility.splitList(line, '\t', true); 691 String country = pieces.get(0); 692 String latLong = pieces.get(1); 693 String tzid = pieces.get(2); 694 String ntzid = FIX_UNSTABLE_TZIDS.get(tzid); 695 if (ntzid != null) tzid = ntzid; 696 String comment = pieces.size() < 4 ? null : (String) pieces.get(3); 697 pieces.clear(); 698 if (!m.reset(latLong).matches()) 699 throw new IllegalArgumentException("Bad zone.tab, lat/long format: " + line); 700 701 pieces.add(getDegrees(m, true).toString()); 702 pieces.add(getDegrees(m, false).toString()); 703 pieces.add(country); 704 if (comment != null) pieces.add(comment); 705 if (zoneData.containsKey(tzid)) 706 throw new IllegalArgumentException("Bad zone.tab, duplicate entry: " + line); 707 zoneData.put(tzid, pieces); 708 } 709 in.close(); 710 // add Etcs 711 for (int i = -14; i <= 12; ++i) { 712 List<String> pieces = new ArrayList<>(); 713 int latitude = 0; 714 int longitude = i * 15; 715 if (longitude <= -180) { 716 longitude += 360; 717 } 718 pieces.add(Double.toString(latitude)); // lat 719 // remember that the sign of the TZIDs is wrong 720 pieces.add(Double.toString(-longitude)); // long 721 pieces.add(StandardCodes.NO_COUNTRY); // country 722 723 zoneData.put("Etc/GMT" + (i == 0 ? "" : i < 0 ? "" + i : "+" + i), pieces); 724 } 725 // add Unknown / UTC 726 List<String> pieces = new ArrayList<>(); 727 pieces.add(Double.toString(0)); // lat 728 pieces.add(Double.toString(0)); // long 729 pieces.add(StandardCodes.NO_COUNTRY); // country 730 zoneData.put("Etc/Unknown", pieces); 731 zoneData.put("Etc/UTC", pieces); 732 733 // add extra zones 734 for (String[] zoneEntry : SUPPLEMENTAL_ZONE_ID_DATA) { 735 List<String> zarray = new ArrayList<>(); 736 if (!m.reset(zoneEntry[2]).matches()) { 737 throw new IllegalArgumentException( 738 "Bad zone.tab, lat/long format: " + zoneEntry[2]); 739 } 740 zarray.add(getDegrees(m, true).toString()); 741 zarray.add(getDegrees(m, false).toString()); 742 zarray.add(zoneEntry[1]); 743 zoneData.put(zoneEntry[0], zarray); 744 } 745 746 zoneData = CldrUtility.protectCollection(zoneData); // protect for later 747 748 // now get links 749 Pattern whitespace = PatternCache.get("\\s+"); 750 XEquivalenceClass<String, String> linkedItems = new XEquivalenceClass<>("None"); 751 for (int i = 0; i < TZFiles.length; ++i) { 752 in = CldrUtility.getUTF8Data(TZFiles[i]); 753 String zoneID = null; 754 while (true) { 755 String line = in.readLine(); 756 if (line == null) break; 757 String originalLine = line; 758 int commentPos = line.indexOf("#"); 759 String comment = null; 760 if (commentPos >= 0) { 761 comment = line.substring(commentPos + 1).trim(); 762 line = line.substring(0, commentPos); 763 } 764 line = line.trim(); 765 if (line.length() == 0) continue; 766 String[] items = whitespace.split(line); 767 if (zoneID != null || items[0].equals("Zone")) { 768 List<String> l = new ArrayList<>(); 769 l.addAll(Arrays.asList(items)); 770 771 // Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01 772 // 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time 773 if (zoneID == null) { 774 l.remove(0); // "Zone" 775 zoneID = l.get(0); 776 String ntzid = FIX_UNSTABLE_TZIDS.get(zoneID); 777 if (ntzid != null) zoneID = ntzid; 778 l.remove(0); 779 } 780 List<ZoneLine> zoneRules = zone_rules.get(zoneID); 781 if (zoneRules == null) { 782 zoneRules = new ArrayList<>(); 783 zone_rules.put(zoneID, zoneRules); 784 } 785 786 if (l.size() < ZoneLine.FIELD_COUNT 787 || l.size() > ZoneLine.FIELD_COUNT_UNTIL) { 788 System.out.println("***Zone incorrect field count:"); 789 System.out.println(l); 790 System.out.println(originalLine); 791 } 792 793 ZoneLine zoneLine = new ZoneLine(l); 794 zoneLine.comment = comment; 795 zoneRules.add(zoneLine); 796 if (l.size() == ZoneLine.FIELD_COUNT) { 797 zoneID = null; // no continuation line 798 } 799 } else if (items[0].equals("Rule")) { 800 // # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S 801 // Rule Algeria 1916 only - Jun 14 23:00s 1:00 S 802 803 String ruleID = items[1]; 804 List<RuleLine> ruleList = ruleID_rules.get(ruleID); 805 if (ruleList == null) { 806 ruleList = new ArrayList<>(); 807 ruleID_rules.put(ruleID, ruleList); 808 } 809 List<String> l = new ArrayList<>(); 810 l.addAll(Arrays.asList(items)); 811 l.remove(0); 812 l.remove(0); 813 if (l.size() != RuleLine.FIELD_COUNT) { 814 System.out.println("***Rule incorrect field count:"); 815 System.out.println(l); 816 } 817 if (comment != null) l.add(comment); 818 RuleLine ruleLine = new RuleLine(l); 819 ruleList.add(ruleLine); 820 821 } else if (items[0].equals("Link")) { 822 String old = items[2]; 823 String newOne = items[1]; 824 if (!(SKIP_LINKS.contains(old) && SKIP_LINKS.contains(newOne))) { 825 // System.out.println("Original " + old + "\t=>\t" + newOne); 826 linkedItems.add(old, newOne); 827 } 828 /* 829 * String conflict = (String) linkold_new.get(old); if (conflict != 830 * null) { System.out.println("Conflict with old: " + old + " => " + 831 * conflict + ", " + newOne); } System.out.println(old + "\t=>\t" + 832 * newOne); linkold_new.put(old, newOne); 833 */ 834 } else { 835 if (DEBUG) System.out.println("Unknown zone line: " + line); 836 } 837 } 838 in.close(); 839 } 840 // add in stuff that should be links 841 for (int i = 0; i < ADD_ZONE_ALIASES_DATA.length; ++i) { 842 linkedItems.add(ADD_ZONE_ALIASES_DATA[i][0], ADD_ZONE_ALIASES_DATA[i][1]); 843 } 844 845 Set<String> isCanonical = zoneData.keySet(); 846 847 // walk through the sets, and 848 // if any set contains two canonical items, split it. 849 // if any contains one, make it the primary 850 // if any contains zero, problem! 851 for (Set<String> equivalents : linkedItems.getEquivalenceSets()) { 852 Set<String> canonicals = new TreeSet<>(equivalents); 853 canonicals.retainAll(isCanonical); 854 if (canonicals.size() == 0) 855 throw new IllegalArgumentException("No canonicals in: " + equivalents); 856 if (canonicals.size() > 1) { 857 if (DEBUG) { 858 System.out.println("Too many canonicals in: " + equivalents); 859 System.out.println( 860 "\t*Don't* put these into the same equivalence class: " 861 + canonicals); 862 } 863 Set<String> remainder = new TreeSet<>(equivalents); 864 remainder.removeAll(isCanonical); 865 if (remainder.size() != 0) { 866 if (DEBUG) { 867 System.out.println( 868 "\tThe following should be equivalent to others: " + remainder); 869 } 870 } 871 } 872 { 873 String newOne; 874 // get the item that we want to hang all the aliases off of. 875 // normally this is the first (alphabetically) one, but 876 // it may be overridden with PREFERRED_BASES 877 Set<String> preferredItems = new HashSet<>(PREFERRED_BASES); 878 preferredItems.retainAll(canonicals); 879 if (preferredItems.size() > 0) { 880 newOne = preferredItems.iterator().next(); 881 } else { 882 newOne = canonicals.iterator().next(); 883 } 884 for (String oldOne : equivalents) { 885 if (canonicals.contains(oldOne)) continue; 886 // System.out.println("Mapping " + oldOne + "\t=>\t" + newOne); 887 linkold_new.put(oldOne, newOne); 888 } 889 } 890 } 891 892 /* 893 * // fix the links from old to new, to remove chains for (Iterator it = 894 * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem = 895 * it.next(); Object newItem = linkold_new.get(oldItem); while (true) { 896 * Object linkItem = linkold_new.get(newItem); if (linkItem == null) 897 * break; if (true) System.out.println("Connecting link chain: " + oldItem + 898 * "\t=> " + newItem + "\t=> " + linkItem); newItem = linkItem; 899 * linkold_new.put(oldItem, newItem); } } 900 * // reverse the links *from* canonical names for (Iterator it = 901 * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem = 902 * it.next(); if (!isCanonical.contains(oldItem)) continue; Object newItem = 903 * linkold_new.get(oldItem); } 904 * 905 * // fix unstable TZIDs Set itemsToRemove = new HashSet(); Map 906 * itemsToAdd = new HashMap(); for (Iterator it = 907 * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem = 908 * it.next(); Object newItem = linkold_new.get(oldItem); Object modOldItem = 909 * RESTORE_UNSTABLE_TZIDS.get(oldItem); Object modNewItem = 910 * FIX_UNSTABLE_TZIDS.get(newItem); if (modOldItem == null && modNewItem == 911 * null) continue; if (modOldItem == null) { // just fix old entry 912 * itemsToAdd.put(oldItem, modNewItem); continue; } // otherwise have to 913 * nuke and redo itemsToRemove.add(oldItem); if (modNewItem == null) 914 * modNewItem = newItem; itemsToAdd.put(modOldItem, modNewItem); } // now 915 * make fixes (we couldn't earlier because we were iterating 916 * Utility.removeAll(linkold_new, itemsToRemove); 917 * linkold_new.putAll(itemsToAdd); 918 * // now remove all links that are from canonical zones 919 * Utility.removeAll(linkold_new, zoneData.keySet()); 920 */ 921 922 // generate list of new to old 923 for (Iterator<String> it = linkold_new.keySet().iterator(); it.hasNext(); ) { 924 String oldZone = it.next(); 925 String newZone = linkold_new.get(oldZone); 926 Set<String> s = linkNew_oldSet.get(newZone); 927 if (s == null) linkNew_oldSet.put(newZone, s = new HashSet<>()); 928 s.add(oldZone); 929 } 930 931 // PROTECT EVERYTHING 932 linkNew_oldSet = CldrUtility.protectCollection(linkNew_oldSet); 933 linkold_new = CldrUtility.protectCollection(linkold_new); 934 ruleID_rules = CldrUtility.protectCollection(ruleID_rules); 935 zone_rules = CldrUtility.protectCollection(zone_rules); 936 // TODO protect zone info later 937 } catch (IOException e) { 938 throw new ICUUncheckedIOException("Can't find timezone aliases: " + e.toString(), e); 939 } 940 } 941 942 /** 943 * @param m 944 */ 945 private Double getDegrees(Matcher m, boolean lat) { 946 int startIndex = lat ? 1 : 5; 947 double amount = 948 Integer.parseInt(m.group(startIndex + 1)) 949 + Integer.parseInt(m.group(startIndex + 2)) / 60.0; 950 if (m.group(startIndex + 3) != null) 951 amount += Integer.parseInt(m.group(startIndex + 3)) / 3600.0; 952 if (m.group(startIndex).equals("-")) amount = -amount; 953 return amount; 954 } 955 956 /** 957 * @return Returns the linkold_new. 958 */ 959 public Map<String, String> getZoneLinkold_new() { 960 getZoneData(); 961 return linkold_new; 962 } 963 964 /** 965 * @return Returns the linkold_new. 966 */ 967 public Map<String, Set<String>> getZoneLinkNew_OldSet() { 968 getZoneData(); 969 return linkNew_oldSet; 970 } 971 972 /** 973 * @return Returns the ruleID_rules. 974 */ 975 public Map<String, List<RuleLine>> getZoneRuleID_rules() { 976 getZoneData(); 977 return ruleID_rules; 978 } 979 980 /** 981 * @return Returns the zone_rules. 982 */ 983 public Map<String, List<ZoneLine>> getZone_rules() { 984 getZoneData(); 985 return zone_rules; 986 } 987 988 public String getVersion() { 989 return version; 990 } 991 } 992