xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitParser.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Splitter;
5 import com.ibm.icu.util.Output;
6 import java.util.ArrayList;
7 import java.util.Iterator;
8 import java.util.List;
9 import org.unicode.cldr.util.SupplementalDataInfo.UnitIdComponentType;
10 
11 public class UnitParser {
12     private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
13     private static final CLDRConfig info = CLDR_CONFIG;
14     private static final SupplementalDataInfo SDI = info.getSupplementalDataInfo();
15     public static final Splitter DASH_SPLITTER = Splitter.on('-');
16     public static final Joiner DASH_JOIN = Joiner.on('-');
17 
18     private String bufferedItem = null;
19     private UnitIdComponentType bufferedType = null;
20     private Iterator<String> source;
21 
set(Iterator<String> source)22     public UnitParser set(Iterator<String> source) {
23         bufferedItem = null;
24         this.source = source;
25         return this;
26     }
27 
set(Iterable<String> source)28     public UnitParser set(Iterable<String> source) {
29         return set(source.iterator());
30     }
31 
set(String source)32     public UnitParser set(String source) {
33         return set(UnitParser.DASH_SPLITTER.split(source).iterator());
34     }
35 
36     private enum State {
37         start,
38         havePrefix,
39         haveBaseOrSuffix
40     }
41 
getRemaining()42     public List<Pair<UnitIdComponentType, String>> getRemaining() {
43         List<Pair<UnitIdComponentType, String>> result = new ArrayList<>();
44         Output<UnitIdComponentType> type = new Output<>();
45         while (true) {
46             String item = nextParse(type);
47             if (item == null) {
48                 return result;
49             }
50             result.add(Pair.of(type.value, item));
51         }
52     }
53 
54     /**
55      * Parses the next segment in the source from set.
56      *
57      * @param output returns type type of the item
58      * @return a unit segment of the form: prefix* base suffix*, and, per, or power; or null if no
59      *     more remaining
60      */
nextParse(Output<UnitIdComponentType> type)61     public String nextParse(Output<UnitIdComponentType> type) {
62         String output = null;
63         State state = State.start;
64         UnitIdComponentType outputType = null;
65 
66         while (true) {
67             if (bufferedItem == null) {
68                 if (!source.hasNext()) {
69                     break;
70                 }
71                 bufferedItem = source.next();
72                 bufferedType = SDI.getUnitIdComponentType(bufferedItem);
73             }
74             switch (bufferedType) {
75                 case prefix:
76                     switch (state) {
77                         case start:
78                             state = State.havePrefix;
79                             break;
80                         case havePrefix: // ok, continue
81                             break;
82                         case haveBaseOrSuffix:
83                             type.value =
84                                     outputType == UnitIdComponentType.suffix
85                                             ? UnitIdComponentType.base
86                                             : outputType;
87                             return output;
88                     }
89                     break;
90                 case base:
91                     switch (state) {
92                         case start:
93                         case havePrefix:
94                             state = State.haveBaseOrSuffix;
95                             break;
96                         case haveBaseOrSuffix: // have stuff to return
97                             type.value =
98                                     outputType == UnitIdComponentType.suffix
99                                             ? UnitIdComponentType.base
100                                             : outputType;
101                             return output;
102                     }
103                     break;
104                 case suffix:
105                     switch (state) {
106                         case start:
107                         case havePrefix:
108                             throw new IllegalArgumentException(
109                                     "Unit suffix must follow base: "
110                                             + output
111                                             + " ❌ "
112                                             + bufferedItem);
113                         case haveBaseOrSuffix: // ok, continue
114                             break;
115                     }
116                     break;
117                 case and:
118                 case per:
119                 case power:
120                     switch (state) {
121                         case start: // return this item
122                             output = bufferedItem;
123                             bufferedItem = null;
124                             type.value = bufferedType;
125                             return output;
126                         case havePrefix:
127                             throw new IllegalArgumentException(
128                                     "Unit prefix must be followed with base: "
129                                             + output
130                                             + " ❌ "
131                                             + bufferedItem);
132                         case haveBaseOrSuffix: // have stuff to return
133                             type.value =
134                                     outputType == UnitIdComponentType.suffix
135                                             ? UnitIdComponentType.base
136                                             : outputType;
137                             return output;
138                     }
139                     break;
140             }
141             output = output == null ? bufferedItem : output + "-" + bufferedItem;
142             bufferedItem = null;
143             outputType = bufferedType;
144         }
145         switch (state) {
146             default:
147             case start:
148                 return null;
149             case havePrefix:
150                 throw new IllegalArgumentException(
151                         "Unit prefix must be followed with base: " + output + " ❌ " + bufferedItem);
152             case haveBaseOrSuffix: // have stuff to return
153                 type.value =
154                         outputType == UnitIdComponentType.suffix
155                                 ? UnitIdComponentType.base
156                                 : outputType;
157                 return output;
158         }
159     }
160 
161     // TODO create from custom map
getUnitIdComponentType(String part)162     public UnitIdComponentType getUnitIdComponentType(String part) {
163         return SDI.getUnitIdComponentType(part);
164     }
165 }
166