1 package org.unicode.cldr.tool; 2 3 import com.ibm.icu.number.LocalizedNumberFormatter; 4 import com.ibm.icu.number.NumberFormatter; 5 import java.util.HashMap; 6 import java.util.Locale; 7 import java.util.Map; 8 import java.util.Map.Entry; 9 import java.util.TreeMap; 10 import org.unicode.cldr.util.XMLFileReader; 11 import org.unicode.cldr.util.XPathParts; 12 13 public class UnLiteracyParser extends XMLFileReader.SimpleHandler { 14 15 private static final String VALUE = "Value"; 16 private static final String RELIABILITY = "Reliability"; 17 private static final String LITERACY = "Literacy"; 18 private static final String YEAR = "Year"; 19 private static final String COUNTRY_OR_AREA = "Country or Area"; 20 private static final String AGE = "Age"; 21 static final String LITERATE = "Literate"; 22 static final String ILLITERATE = "Illiterate"; 23 private static final String UNKNOWN = "Unknown"; 24 private static final String TOTAL = "Total"; 25 // Debug stuff main(String args[])26 public static void main(String args[]) { 27 final UnLiteracyParser ulp = new UnLiteracyParser().read(); 28 for (final Entry<String, PerCountry> e : ulp.perCountry.entrySet()) { 29 final String country = e.getKey(); 30 final String latest = e.getValue().latest(); 31 final PerYear py = e.getValue().perYear.get(latest); 32 33 Long literate = py.total(LITERATE); 34 Long illiterate = py.total(ILLITERATE); 35 Long unknown = py.total(UNKNOWN); 36 Long total = py.total(TOTAL); 37 38 System.out.println( 39 country 40 + "\t" 41 + latest 42 + "\t" 43 + literate 44 + "/" 45 + illiterate 46 + ", " 47 + unknown 48 + " = " 49 + total); 50 if ((literate + illiterate + unknown) != total) { 51 System.out.println( 52 "- doesn't add up for " 53 + country 54 + " - total is " 55 + (literate + illiterate + unknown)); 56 } 57 } 58 } 59 60 int recCount = 0; 61 62 // Reading stuff 63 public static final String UN_LITERACY = "external/un_literacy.xml"; 64 read()65 UnLiteracyParser read() { 66 System.out.println("* Reading " + UN_LITERACY); 67 new XMLFileReader() 68 .setHandler(this) 69 .readCLDRResource(UN_LITERACY, XMLFileReader.CONTENT_HANDLER, false); 70 // get the final record 71 handleNewRecord(); 72 LocalizedNumberFormatter nf = NumberFormatter.with().locale(Locale.ENGLISH); 73 System.out.println( 74 "* Read " 75 + nf.format(recCount) 76 + " record(s) with " 77 + nf.format(perCountry.size()) 78 + " region(s) from " 79 + UN_LITERACY); 80 return this; 81 } 82 83 // Parsing stuff 84 @Override handlePathValue(String path, String value)85 public void handlePathValue(String path, String value) { 86 if (!path.startsWith("//ROOT/data/record")) { 87 return; 88 } 89 final String field = XPathParts.getFrozenInstance(path).getAttributeValue(-1, "name"); 90 handleField(field, value); 91 } 92 93 @Override handleElement(CharSequence path)94 public void handleElement(CharSequence path) { 95 if ("//ROOT/data/record".equals(path.toString())) { 96 handleNewRecord(); 97 } 98 } 99 100 // Data ingestion 101 final Map<String, String> thisRecord = new HashMap<String, String>(); 102 handleField(String field, String value)103 private void handleField(String field, String value) { 104 final String old = thisRecord.put(field, value); 105 if (old != null) { 106 throw new IllegalArgumentException( 107 "Duplicate field " + field + ", context: " + thisRecord); 108 } 109 } 110 handleNewRecord()111 private void handleNewRecord() { 112 if (!thisRecord.isEmpty() && validate()) { 113 recCount++; 114 handleRecord(); 115 } 116 117 thisRecord.clear(); 118 } 119 validate()120 boolean validate() { 121 try { 122 assertEqual("Area", "Total"); 123 assertEqual("Sex", "Both Sexes"); 124 125 assertPresent(AGE); 126 assertPresent(COUNTRY_OR_AREA); 127 assertPresent(LITERACY); 128 assertPresent(VALUE); 129 assertPresent(YEAR); 130 assertPresent(RELIABILITY); 131 132 return true; 133 } catch (Throwable t) { 134 final String context = thisRecord.toString(); 135 throw new IllegalArgumentException("While parsing " + context, t); 136 } 137 } 138 assertPresent(String field)139 void assertPresent(String field) { 140 String value = get(field); 141 if (value == null) { 142 throw new NullPointerException("Missing field: " + field); 143 } else if (value.isEmpty()) { 144 throw new NullPointerException("Empty field: " + field); 145 } 146 } 147 assertEqual(String field, String expected)148 void assertEqual(String field, String expected) { 149 assertPresent(field); 150 String value = get(field); 151 if (!value.equals(expected)) { 152 throw new NullPointerException( 153 "Expected " + field + "=" + expected + " but got " + value); 154 } 155 } 156 get(String field)157 private final String get(String field) { 158 final String value = thisRecord.get(field); 159 if (value == null) return value; 160 return value.trim(); 161 } 162 handleRecord()163 private void handleRecord() { 164 final String country = get(COUNTRY_OR_AREA); 165 final String year = get(YEAR); 166 final String age = get(AGE); 167 final String literacy = get(LITERACY); 168 final String reliability = get(RELIABILITY); 169 final PerAge pa = 170 perCountry 171 .computeIfAbsent(country, (String c) -> new PerCountry()) 172 .perYear 173 .computeIfAbsent(year, (String y) -> new PerYear()) 174 .perAge 175 .computeIfAbsent(age, (String a) -> new PerAge()); 176 177 if (pa.reliability == null) { 178 pa.reliability = reliability; 179 } else if (!pa.reliability.equals(reliability)) { 180 throw new IllegalArgumentException( 181 "Inconsistent reliability " + reliability + " for " + thisRecord); 182 } 183 final Long old = pa.perLiteracy.put(literacy, getLongValue()); 184 if (old != null) { 185 System.err.println("Duplicate record " + country + " " + year + " " + age); 186 } 187 } 188 getLongValue()189 private long getLongValue() { 190 final String value = get(VALUE); 191 if (value.contains( 192 ".")) { // yes. some of the data has decimal points. Ignoring the fractional part. 193 return Long.parseLong(value.split("\\.")[0]); 194 } else { 195 return Long.parseLong(value); 196 } 197 } 198 199 final Map<String, PerCountry> perCountry = new TreeMap<String, PerCountry>(); 200 201 final class PerCountry { 202 final Map<String, PerYear> perYear = new TreeMap<String, PerYear>(); 203 latest()204 public String latest() { 205 final String y[] = perYear.keySet().toArray(new String[0]); 206 return y[y.length - 1]; 207 } 208 } 209 210 final class PerYear { 211 final Map<String, PerAge> perAge = new TreeMap<String, PerAge>(); 212 total(String literacy)213 Long total(String literacy) { 214 return perAge.values().stream() 215 .map((pa) -> pa.perLiteracy.getOrDefault(literacy, 0L)) 216 .reduce(0L, (Long a, Long b) -> a + b); 217 } 218 } 219 220 final class PerAge { 221 final Map<String, Long> perLiteracy = new TreeMap<String, Long>(); 222 String reliability = null; 223 } 224 } 225