1 package org.unicode.cldr.util; 2 3 import com.ibm.icu.impl.PatternTokenizer; 4 import com.ibm.icu.text.DateTimePatternGenerator.FormatParser; 5 import com.ibm.icu.text.UnicodeSet; 6 import java.util.Collections; 7 import java.util.EnumSet; 8 import java.util.Set; 9 10 public class DateTimeCanonicalizer { 11 12 public enum DateTimePatternType { 13 NA, 14 STOCK, 15 AVAILABLE, 16 INTERVAL, 17 GMT; 18 19 public static final Set<DateTimePatternType> STOCK_AVAILABLE_INTERVAL_PATTERNS = 20 Collections.unmodifiableSet( 21 EnumSet.of( 22 DateTimePatternType.STOCK, 23 DateTimePatternType.AVAILABLE, 24 DateTimePatternType.INTERVAL)); 25 fromPath(String path)26 public static DateTimePatternType fromPath(String path) { 27 return !path.contains("/dates") 28 ? DateTimePatternType.NA 29 : path.contains("/pattern") 30 && (path.contains("/dateFormats") 31 || path.contains("/timeFormats") 32 || path.contains("/dateTimeFormatLength")) 33 ? DateTimePatternType.STOCK 34 : path.contains("/dateFormatItem") 35 ? DateTimePatternType.AVAILABLE 36 : path.contains("/intervalFormatItem") 37 ? DateTimePatternType.INTERVAL 38 : path.contains("/timeZoneNames/hourFormat") 39 ? DateTimePatternType.GMT 40 : DateTimePatternType.NA; 41 } 42 } 43 44 private boolean fixYears = false; // true to fix the years to y 45 46 private FormatParser formatDateParser = new FormatParser(); 47 48 // TODO make ICU's FormatParser.PatternTokenizer public (and clean up API) 49 50 private transient PatternTokenizer tokenizer = 51 new PatternTokenizer() 52 .setSyntaxCharacters(new UnicodeSet("[a-zA-Z]")) 53 .setExtraQuotingCharacters( 54 new UnicodeSet("[[[:script=Latn:][:script=Cyrl:]]&[[:L:][:M:]]]")) 55 // .setEscapeCharacters(new UnicodeSet("[^\\u0020-\\u007E]")) // WARNING: 56 // DateFormat doesn't accept \\uXXXX 57 .setUsingQuote(true); 58 DateTimeCanonicalizer(boolean fixYears)59 public DateTimeCanonicalizer(boolean fixYears) { 60 this.fixYears = fixYears; 61 } 62 getCanonicalDatePattern( String path, String value, DateTimePatternType datetimePatternType)63 public String getCanonicalDatePattern( 64 String path, String value, DateTimePatternType datetimePatternType) { 65 formatDateParser.set(value); 66 67 // ensure that all y fields are single y, except for the stock short, which can be y or yy. 68 String newValue; 69 if (fixYears) { 70 StringBuilder result = new StringBuilder(); 71 for (Object item : formatDateParser.getItems()) { 72 String itemString = item.toString(); 73 if (item instanceof String) { 74 result.append(tokenizer.quoteLiteral(itemString)); 75 } else if (!itemString.startsWith("y") 76 || (datetimePatternType == DateTimePatternType.STOCK 77 && path.contains("short") 78 && itemString.equals("yy"))) { 79 result.append(itemString); 80 } else { 81 result.append('y'); 82 } 83 } 84 newValue = result.toString(); 85 } else { 86 newValue = formatDateParser.toString(); 87 } 88 89 if (!value.equals(newValue)) { 90 value = newValue; 91 } 92 return value; 93 } 94 } 95