1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Splitter; 5 import com.ibm.icu.util.Output; 6 import java.util.ArrayList; 7 import java.util.Iterator; 8 import java.util.List; 9 import org.unicode.cldr.util.SupplementalDataInfo.UnitIdComponentType; 10 11 public class UnitParser { 12 private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance(); 13 private static final CLDRConfig info = CLDR_CONFIG; 14 private static final SupplementalDataInfo SDI = info.getSupplementalDataInfo(); 15 public static final Splitter DASH_SPLITTER = Splitter.on('-'); 16 public static final Joiner DASH_JOIN = Joiner.on('-'); 17 18 private String bufferedItem = null; 19 private UnitIdComponentType bufferedType = null; 20 private Iterator<String> source; 21 set(Iterator<String> source)22 public UnitParser set(Iterator<String> source) { 23 bufferedItem = null; 24 this.source = source; 25 return this; 26 } 27 set(Iterable<String> source)28 public UnitParser set(Iterable<String> source) { 29 return set(source.iterator()); 30 } 31 set(String source)32 public UnitParser set(String source) { 33 return set(UnitParser.DASH_SPLITTER.split(source).iterator()); 34 } 35 36 private enum State { 37 start, 38 havePrefix, 39 haveBaseOrSuffix 40 } 41 getRemaining()42 public List<Pair<UnitIdComponentType, String>> getRemaining() { 43 List<Pair<UnitIdComponentType, String>> result = new ArrayList<>(); 44 Output<UnitIdComponentType> type = new Output<>(); 45 while (true) { 46 String item = nextParse(type); 47 if (item == null) { 48 return result; 49 } 50 result.add(Pair.of(type.value, item)); 51 } 52 } 53 54 /** 55 * Parses the next segment in the source from set. 56 * 57 * @param output returns type type of the item 58 * @return a unit segment of the form: prefix* base suffix*, and, per, or power; or null if no 59 * more remaining 60 */ nextParse(Output<UnitIdComponentType> type)61 public String nextParse(Output<UnitIdComponentType> type) { 62 String output = null; 63 State state = State.start; 64 UnitIdComponentType outputType = null; 65 66 while (true) { 67 if (bufferedItem == null) { 68 if (!source.hasNext()) { 69 break; 70 } 71 bufferedItem = source.next(); 72 bufferedType = SDI.getUnitIdComponentType(bufferedItem); 73 } 74 switch (bufferedType) { 75 case prefix: 76 switch (state) { 77 case start: 78 state = State.havePrefix; 79 break; 80 case havePrefix: // ok, continue 81 break; 82 case haveBaseOrSuffix: 83 type.value = 84 outputType == UnitIdComponentType.suffix 85 ? UnitIdComponentType.base 86 : outputType; 87 return output; 88 } 89 break; 90 case base: 91 switch (state) { 92 case start: 93 case havePrefix: 94 state = State.haveBaseOrSuffix; 95 break; 96 case haveBaseOrSuffix: // have stuff to return 97 type.value = 98 outputType == UnitIdComponentType.suffix 99 ? UnitIdComponentType.base 100 : outputType; 101 return output; 102 } 103 break; 104 case suffix: 105 switch (state) { 106 case start: 107 case havePrefix: 108 throw new IllegalArgumentException( 109 "Unit suffix must follow base: " 110 + output 111 + " ❌ " 112 + bufferedItem); 113 case haveBaseOrSuffix: // ok, continue 114 break; 115 } 116 break; 117 case and: 118 case per: 119 case power: 120 switch (state) { 121 case start: // return this item 122 output = bufferedItem; 123 bufferedItem = null; 124 type.value = bufferedType; 125 return output; 126 case havePrefix: 127 throw new IllegalArgumentException( 128 "Unit prefix must be followed with base: " 129 + output 130 + " ❌ " 131 + bufferedItem); 132 case haveBaseOrSuffix: // have stuff to return 133 type.value = 134 outputType == UnitIdComponentType.suffix 135 ? UnitIdComponentType.base 136 : outputType; 137 return output; 138 } 139 break; 140 } 141 output = output == null ? bufferedItem : output + "-" + bufferedItem; 142 bufferedItem = null; 143 outputType = bufferedType; 144 } 145 switch (state) { 146 default: 147 case start: 148 return null; 149 case havePrefix: 150 throw new IllegalArgumentException( 151 "Unit prefix must be followed with base: " + output + " ❌ " + bufferedItem); 152 case haveBaseOrSuffix: // have stuff to return 153 type.value = 154 outputType == UnitIdComponentType.suffix 155 ? UnitIdComponentType.base 156 : outputType; 157 return output; 158 } 159 } 160 161 // TODO create from custom map getUnitIdComponentType(String part)162 public UnitIdComponentType getUnitIdComponentType(String part) { 163 return SDI.getUnitIdComponentType(part); 164 } 165 } 166