xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/UnLiteracyParser.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.ibm.icu.number.LocalizedNumberFormatter;
4 import com.ibm.icu.number.NumberFormatter;
5 import java.util.HashMap;
6 import java.util.Locale;
7 import java.util.Map;
8 import java.util.Map.Entry;
9 import java.util.TreeMap;
10 import org.unicode.cldr.util.XMLFileReader;
11 import org.unicode.cldr.util.XPathParts;
12 
13 public class UnLiteracyParser extends XMLFileReader.SimpleHandler {
14 
15     private static final String VALUE = "Value";
16     private static final String RELIABILITY = "Reliability";
17     private static final String LITERACY = "Literacy";
18     private static final String YEAR = "Year";
19     private static final String COUNTRY_OR_AREA = "Country or Area";
20     private static final String AGE = "Age";
21     static final String LITERATE = "Literate";
22     static final String ILLITERATE = "Illiterate";
23     private static final String UNKNOWN = "Unknown";
24     private static final String TOTAL = "Total";
25     // Debug stuff
main(String args[])26     public static void main(String args[]) {
27         final UnLiteracyParser ulp = new UnLiteracyParser().read();
28         for (final Entry<String, PerCountry> e : ulp.perCountry.entrySet()) {
29             final String country = e.getKey();
30             final String latest = e.getValue().latest();
31             final PerYear py = e.getValue().perYear.get(latest);
32 
33             Long literate = py.total(LITERATE);
34             Long illiterate = py.total(ILLITERATE);
35             Long unknown = py.total(UNKNOWN);
36             Long total = py.total(TOTAL);
37 
38             System.out.println(
39                     country
40                             + "\t"
41                             + latest
42                             + "\t"
43                             + literate
44                             + "/"
45                             + illiterate
46                             + ", "
47                             + unknown
48                             + " = "
49                             + total);
50             if ((literate + illiterate + unknown) != total) {
51                 System.out.println(
52                         "- doesn't add up for "
53                                 + country
54                                 + " - total is "
55                                 + (literate + illiterate + unknown));
56             }
57         }
58     }
59 
60     int recCount = 0;
61 
62     // Reading stuff
63     public static final String UN_LITERACY = "external/un_literacy.xml";
64 
read()65     UnLiteracyParser read() {
66         System.out.println("* Reading " + UN_LITERACY);
67         new XMLFileReader()
68                 .setHandler(this)
69                 .readCLDRResource(UN_LITERACY, XMLFileReader.CONTENT_HANDLER, false);
70         // get the final record
71         handleNewRecord();
72         LocalizedNumberFormatter nf = NumberFormatter.with().locale(Locale.ENGLISH);
73         System.out.println(
74                 "* Read "
75                         + nf.format(recCount)
76                         + " record(s) with "
77                         + nf.format(perCountry.size())
78                         + " region(s) from "
79                         + UN_LITERACY);
80         return this;
81     }
82 
83     // Parsing stuff
84     @Override
handlePathValue(String path, String value)85     public void handlePathValue(String path, String value) {
86         if (!path.startsWith("//ROOT/data/record")) {
87             return;
88         }
89         final String field = XPathParts.getFrozenInstance(path).getAttributeValue(-1, "name");
90         handleField(field, value);
91     }
92 
93     @Override
handleElement(CharSequence path)94     public void handleElement(CharSequence path) {
95         if ("//ROOT/data/record".equals(path.toString())) {
96             handleNewRecord();
97         }
98     }
99 
100     // Data ingestion
101     final Map<String, String> thisRecord = new HashMap<String, String>();
102 
handleField(String field, String value)103     private void handleField(String field, String value) {
104         final String old = thisRecord.put(field, value);
105         if (old != null) {
106             throw new IllegalArgumentException(
107                     "Duplicate field " + field + ", context: " + thisRecord);
108         }
109     }
110 
handleNewRecord()111     private void handleNewRecord() {
112         if (!thisRecord.isEmpty() && validate()) {
113             recCount++;
114             handleRecord();
115         }
116 
117         thisRecord.clear();
118     }
119 
validate()120     boolean validate() {
121         try {
122             assertEqual("Area", "Total");
123             assertEqual("Sex", "Both Sexes");
124 
125             assertPresent(AGE);
126             assertPresent(COUNTRY_OR_AREA);
127             assertPresent(LITERACY);
128             assertPresent(VALUE);
129             assertPresent(YEAR);
130             assertPresent(RELIABILITY);
131 
132             return true;
133         } catch (Throwable t) {
134             final String context = thisRecord.toString();
135             throw new IllegalArgumentException("While parsing " + context, t);
136         }
137     }
138 
assertPresent(String field)139     void assertPresent(String field) {
140         String value = get(field);
141         if (value == null) {
142             throw new NullPointerException("Missing field: " + field);
143         } else if (value.isEmpty()) {
144             throw new NullPointerException("Empty field: " + field);
145         }
146     }
147 
assertEqual(String field, String expected)148     void assertEqual(String field, String expected) {
149         assertPresent(field);
150         String value = get(field);
151         if (!value.equals(expected)) {
152             throw new NullPointerException(
153                     "Expected " + field + "=" + expected + " but got " + value);
154         }
155     }
156 
get(String field)157     private final String get(String field) {
158         final String value = thisRecord.get(field);
159         if (value == null) return value;
160         return value.trim();
161     }
162 
handleRecord()163     private void handleRecord() {
164         final String country = get(COUNTRY_OR_AREA);
165         final String year = get(YEAR);
166         final String age = get(AGE);
167         final String literacy = get(LITERACY);
168         final String reliability = get(RELIABILITY);
169         final PerAge pa =
170                 perCountry
171                         .computeIfAbsent(country, (String c) -> new PerCountry())
172                         .perYear
173                         .computeIfAbsent(year, (String y) -> new PerYear())
174                         .perAge
175                         .computeIfAbsent(age, (String a) -> new PerAge());
176 
177         if (pa.reliability == null) {
178             pa.reliability = reliability;
179         } else if (!pa.reliability.equals(reliability)) {
180             throw new IllegalArgumentException(
181                     "Inconsistent reliability " + reliability + " for " + thisRecord);
182         }
183         final Long old = pa.perLiteracy.put(literacy, getLongValue());
184         if (old != null) {
185             System.err.println("Duplicate record " + country + " " + year + " " + age);
186         }
187     }
188 
getLongValue()189     private long getLongValue() {
190         final String value = get(VALUE);
191         if (value.contains(
192                 ".")) { // yes. some of the data has decimal points. Ignoring the fractional part.
193             return Long.parseLong(value.split("\\.")[0]);
194         } else {
195             return Long.parseLong(value);
196         }
197     }
198 
199     final Map<String, PerCountry> perCountry = new TreeMap<String, PerCountry>();
200 
201     final class PerCountry {
202         final Map<String, PerYear> perYear = new TreeMap<String, PerYear>();
203 
latest()204         public String latest() {
205             final String y[] = perYear.keySet().toArray(new String[0]);
206             return y[y.length - 1];
207         }
208     }
209 
210     final class PerYear {
211         final Map<String, PerAge> perAge = new TreeMap<String, PerAge>();
212 
total(String literacy)213         Long total(String literacy) {
214             return perAge.values().stream()
215                     .map((pa) -> pa.perLiteracy.getOrDefault(literacy, 0L))
216                     .reduce(0L, (Long a, Long b) -> a + b);
217         }
218     }
219 
220     final class PerAge {
221         final Map<String, Long> perLiteracy = new TreeMap<String, Long>();
222         String reliability = null;
223     }
224 }
225