1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu; 4 5 import static com.google.common.base.Preconditions.checkArgument; 6 import static com.google.common.base.Preconditions.checkNotNull; 7 import static com.google.common.collect.ImmutableSet.toImmutableSet; 8 import static org.unicode.cldr.api.CldrDataType.LDML; 9 10 import java.util.HashMap; 11 import java.util.Map; 12 import java.util.Set; 13 14 import org.unicode.cldr.api.CldrData; 15 import org.unicode.cldr.api.CldrDataSupplier; 16 import org.unicode.cldr.api.CldrDataType; 17 import org.unicode.cldr.api.CldrDraftStatus; 18 import org.unicode.cldr.api.CldrPath; 19 import org.unicode.cldr.api.CldrValue; 20 21 import com.google.common.collect.ImmutableMap; 22 import com.google.common.collect.ImmutableSet; 23 import com.google.common.collect.ImmutableTable; 24 import com.google.common.collect.Table; 25 26 /** 27 * A factory for data suppliers which can filter CLDR values by substituting values from one path 28 * to another. The replaced value must retain the original "target" path but will have the value 29 * and value attributes of the "source". A value will only be replaced if both the source and 30 * target paths have associated values. The replacement retains its original position in the value 31 * ordering. 32 * 33 * <p>This class DOES NOT transform supplemental or BCP-47 data, because the use of "alt" values 34 * is completely different for that data (it would require merging specific attributes together). 35 * 36 * <p>Note that this is not a general purpose transformation of CLDR data, since it is generally 37 * not possible to "move" values between arbitrary paths. Target and source paths must be in the 38 * same "namespace" (i.e. share the same element names) but attributes can differ. 39 * 40 * <p>Note also that the mapping is not recursive, so mapping {@code A -> B} and {@code B -> C} 41 * will NOT cause {@code A} to be mapped to {@code C}. 42 * 43 * <p>Typically this class is expected to be used for selecting alternate values of locale data 44 * based on the {@code "alt"} path attribute (e.g. selecting the short form of a region name). 45 */ 46 public final class AlternateLocaleData { 47 /** 48 * Returns a wrapped data supplier which will transform any {@link CldrValue}s according to the 49 * supplied {@link CldrPath} mapping. Keys in the path map are the "target" paths of values to 50 * be modified, and the values in the map are the "source" paths from which the replacement 51 * values are obtained. For each map entry, the target and source paths must be in the same 52 * namespace (i.e. have the same path element names). 53 */ transform( CldrDataSupplier src, Map<CldrPath, CldrPath> globalAltPaths, Table<String, CldrPath, CldrPath> localeAltPaths)54 public static CldrDataSupplier transform( 55 CldrDataSupplier src, 56 Map<CldrPath, CldrPath> globalAltPaths, 57 Table<String, CldrPath, CldrPath> localeAltPaths) { 58 return new CldrDataFilter(src, globalAltPaths, localeAltPaths); 59 } 60 61 private static final class CldrDataFilter extends CldrDataSupplier { 62 private final CldrDataSupplier src; 63 // Mapping from target (destination) to source path. This is necessary since two targets 64 // could come from the same source). 65 private final ImmutableMap<CldrPath, CldrPath> globalAltPaths; 66 private final ImmutableTable<String, CldrPath, CldrPath> localeAltPaths; 67 CldrDataFilter( CldrDataSupplier src, Map<CldrPath, CldrPath> globalAltPaths, Table<String, CldrPath, CldrPath> localeAltPaths)68 CldrDataFilter( 69 CldrDataSupplier src, 70 Map<CldrPath, CldrPath> globalAltPaths, 71 Table<String, CldrPath, CldrPath> localeAltPaths) { 72 this.src = checkNotNull(src); 73 this.globalAltPaths = ImmutableMap.copyOf(globalAltPaths); 74 this.localeAltPaths = ImmutableTable.copyOf(localeAltPaths); 75 this.globalAltPaths 76 .forEach((t, s) -> checkArgument(hasSameNamespace(checkLdml(t), checkLdml(s)), 77 "alternate paths must have the same namespace: target=%s, source=%s", t, s)); 78 this.localeAltPaths.cellSet() 79 .forEach(c -> checkArgument( 80 hasSameNamespace(checkLdml(c.getColumnKey()), checkLdml(c.getValue())), 81 "alternate paths must have the same namespace: locale=%s, target=%s, source=%s", 82 c.getRowKey(), c.getColumnKey(), c.getValue())); 83 } 84 85 @Override withDraftStatusAtLeast(CldrDraftStatus draftStatus)86 public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) { 87 return new CldrDataFilter( 88 src.withDraftStatusAtLeast(draftStatus), globalAltPaths, localeAltPaths); 89 } 90 91 @Override getDataForLocale(String localeId, CldrResolution resolution)92 public CldrData getDataForLocale(String localeId, CldrResolution resolution) { 93 return new AltData(src.getDataForLocale(localeId, resolution), localeId); 94 } 95 96 @Override getAvailableLocaleIds()97 public Set<String> getAvailableLocaleIds() { 98 return src.getAvailableLocaleIds(); 99 } 100 101 @Override getDataForType(CldrDataType type)102 public CldrData getDataForType(CldrDataType type) { 103 return src.getDataForType(type); 104 } 105 106 private final class AltData extends FilteredData { 107 // Calculated per locale/data instance to make lookup as fast as possible. 108 private final ImmutableMap<CldrPath, CldrPath> altPaths; 109 // Any source paths which are not also target paths are removed. This is legacy 110 // behaviour inherited from the original build tools, the reason for which is not 111 // known. If it becomes desirable to retain the source values in their original 112 // locations, this can just be removed. 113 private final ImmutableSet<CldrPath> toRemove; 114 AltData(CldrData srcData, String localeId)115 AltData(CldrData srcData, String localeId) { 116 super(srcData); 117 ImmutableMap<CldrPath, CldrPath> altPaths = globalAltPaths; 118 if (!localeAltPaths.row(localeId).isEmpty()) { 119 Map<CldrPath, CldrPath> combinedPaths = new HashMap<>(); 120 // Locale specific path mappings overwrite global ones. 121 combinedPaths.putAll(globalAltPaths); 122 combinedPaths.putAll(localeAltPaths.row(localeId)); 123 altPaths = ImmutableMap.copyOf(combinedPaths); 124 } 125 this.altPaths = altPaths; 126 this.toRemove = altPaths.entrySet().stream() 127 // Only remove source paths that are not also target paths... 128 .filter(e -> !this.altPaths.containsKey(e.getValue())) 129 // ... and if the target path it will be transformed to actually exists. 130 .filter(e -> getSourceData().get(e.getKey()) != null) 131 // The value in the mapping is the source path (it's target->source for lookup). 132 .map(Map.Entry::getValue) 133 .collect(toImmutableSet()); 134 } 135 136 @Override filter(CldrValue value)137 protected CldrValue filter(CldrValue value) { 138 CldrPath altPath = altPaths.get(value.getPath()); 139 if (altPath != null) { 140 CldrValue altValue = getSourceData().get(altPath); 141 if (altValue != null) { 142 return altValue.replacePath(value.getPath()); 143 } 144 } 145 return toRemove.contains(value.getPath()) ? null : value; 146 } 147 } 148 } 149 hasSameNamespace(CldrPath x, CldrPath y)150 private static boolean hasSameNamespace(CldrPath x, CldrPath y) { 151 if (x.getLength() != y.getLength()) { 152 return false; 153 } 154 do { 155 if (!x.getName().equals(y.getName())) { 156 return false; 157 } 158 x = x.getParent(); 159 y = y.getParent(); 160 } while (x != null); 161 return true; 162 } 163 checkLdml(CldrPath path)164 private static CldrPath checkLdml(CldrPath path) { 165 checkArgument(path.getDataType() == LDML, "only locale data (LDML) is supported: %s", path); 166 return path; 167 } 168 AlternateLocaleData()169 private AlternateLocaleData() {} 170 } 171