xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/DoctypeXmlStreamWrapper.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.io.PushbackInputStream;
6 import java.io.PushbackReader;
7 import java.io.Reader;
8 import java.io.UnsupportedEncodingException;
9 import java.nio.charset.StandardCharsets;
10 import java.util.Arrays;
11 import org.unicode.cldr.icu.LDMLConstants;
12 import org.xml.sax.InputSource;
13 
14 public class DoctypeXmlStreamWrapper {
15     private static final String DOCTYPE = "<!DOCTYPE";
16     private static final char DOCTYPE_CHARS[] = DOCTYPE.toCharArray();
17     private static final byte DOCTYPE_BYTES[] = DOCTYPE.getBytes(StandardCharsets.UTF_8);
18     // the string to look for:  xmlns="
19     private static final String XMLNS_EQUALS = LDMLConstants.XMLNS + "=\"";
20     /**
21      * Size of the input buffer, needs to be able to handle any expansion when the header is updated
22      */
23     public static int BUFFER_MAX_SIZE = 1024;
24     /** Size of the first read, needs to contain xmlns="..." and be less than BUFFER_MAX_SIZE */
25     public static int BUFFER_READ_SIZE = 512;
26 
27     /**
28      * Wrap an InputSource in something that will automatically insert a DTD reference in place of
29      * an xmlns directive.
30      *
31      * @throws IOException
32      */
wrap(InputSource src)33     public static InputSource wrap(InputSource src) throws IOException {
34         Reader r = src.getCharacterStream();
35         InputStream is = src.getByteStream();
36         if (r != null) {
37             src.setCharacterStream(wrap(r));
38         } else if (is != null) {
39             src.setByteStream(wrap(is, src.getEncoding()));
40         } else {
41             throw new NullPointerException(
42                     "Internal error: Character and Byte stream are both null");
43         }
44         return src;
45     }
46 
47     /** wrap a byte oriented stream */
wrap(InputStream src, String encoding)48     public static InputStream wrap(InputStream src, String encoding) throws IOException {
49         if (encoding == null) {
50             encoding = "UTF-8";
51         }
52         PushbackInputStream pr = new PushbackInputStream(src, BUFFER_MAX_SIZE);
53         byte inbuf[] = pr.readNBytes(BUFFER_READ_SIZE);
54         if (!hasDocType(inbuf, encoding)) {
55             inbuf = fixup(inbuf, encoding).getBytes(encoding);
56         }
57         pr.unread(inbuf);
58         return pr;
59     }
60 
61     /** wrap a char oriented stream */
wrap(Reader src)62     public static Reader wrap(Reader src) throws IOException {
63         PushbackReader pr = new PushbackReader(src, BUFFER_MAX_SIZE);
64         char inbuf[] = new char[BUFFER_READ_SIZE];
65         int readlen = pr.read(inbuf);
66         if (!hasDocType(inbuf, readlen)) {
67             char buf2[] = Arrays.copyOf(inbuf, readlen);
68             inbuf = fixup(new String(buf2)).toCharArray();
69             readlen = inbuf.length;
70         }
71         pr.unread(inbuf, 0, readlen);
72         return pr;
73     }
74 
75     /** Fix an input byte array, including the DOCTYPE */
fixup(byte[] inbuf, String encoding)76     private static String fixup(byte[] inbuf, String encoding) {
77         try {
78             final String s = new String(inbuf, encoding);
79             return fixup(s);
80         } catch (UnsupportedEncodingException e) {
81             throw new RuntimeException("While parsing " + encoding, e);
82         }
83     }
84 
85     /** Fix an input String, including DOCTYPE */
fixup(final String s)86     private static String fixup(final String s) {
87         // exit if nothing matches
88         for (final DtdType d : DtdType.values()) {
89             if (s.contains(XMLNS_EQUALS + d.getNsUrl())) {
90                 return fixup(s, d);
91             }
92         }
93         // couldn't fix it, just pass through
94         return s;
95     }
96 
97     /** Fix an input String given a specific DtdType. */
fixup(String s, DtdType d)98     private static String fixup(String s, DtdType d) {
99         int n = s.indexOf("?>");
100         if (n == -1) {
101             throw new IllegalArgumentException("Invalid XML prefix: ?> not found.");
102         }
103         n += 2; // move the cut-point to the end of the "?>" sequence
104 
105         final String doctype = "\n" + d.getDoctype() + "\n";
106         final String s2 = s.substring(0, n) + doctype + s.substring(n);
107         return s2;
108     }
109 
hasDocType(byte[] inbuf, String encoding)110     private static final boolean hasDocType(byte[] inbuf, String encoding) {
111         if (inbuf == null || inbuf.length == 0) return false;
112 
113         // Try as utf-8/ASCII bytes - this will be the common case
114         if (arrayContains(inbuf, inbuf.length, DOCTYPE_BYTES)) return true;
115 
116         // break out here
117         if (encoding == null || encoding.equals("UTF-8")) return false;
118 
119         // Try 2, with encoding
120         try {
121             final String s = new String(inbuf, encoding);
122             return s.contains(DOCTYPE);
123         } catch (UnsupportedEncodingException e) {
124             throw new RuntimeException("While parsing " + encoding, e);
125         }
126     }
127 
hasDocType(char[] inbuf, int readlen)128     private static final boolean hasDocType(char[] inbuf, int readlen) {
129         if (inbuf == null || readlen <= 0) {
130             return false;
131         }
132         return arrayContains(inbuf, readlen, DOCTYPE_CHARS);
133     }
134 
arrayContains(char[] inbuf, int inlen, char[] testbuf)135     private static boolean arrayContains(char[] inbuf, int inlen, char[] testbuf) {
136         final int testlen = testbuf.length;
137         int t = 0;
138         for (int i = 0; i < inlen; i++) {
139             if (inbuf[i] == testbuf[t]) {
140                 t++;
141                 if (t == testlen) return true;
142             } else {
143                 t = 0;
144             }
145         }
146         return false;
147     }
148 
arrayContains(byte[] inbuf, int inlen, byte[] testbuf)149     private static boolean arrayContains(byte[] inbuf, int inlen, byte[] testbuf) {
150         final int testlen = testbuf.length;
151         int t = 0;
152         for (int i = 0; i < inlen; i++) {
153             if (inbuf[i] == testbuf[t]) {
154                 t++;
155                 if (t == testlen) return true;
156             } else {
157                 t = 0;
158             }
159         }
160         return false;
161     }
162 }
163