1 package org.unicode.cldr.util; 2 3 import java.io.IOException; 4 import java.io.InputStream; 5 import java.io.PushbackInputStream; 6 import java.io.PushbackReader; 7 import java.io.Reader; 8 import java.io.UnsupportedEncodingException; 9 import java.nio.charset.StandardCharsets; 10 import java.util.Arrays; 11 import org.unicode.cldr.icu.LDMLConstants; 12 import org.xml.sax.InputSource; 13 14 public class DoctypeXmlStreamWrapper { 15 private static final String DOCTYPE = "<!DOCTYPE"; 16 private static final char DOCTYPE_CHARS[] = DOCTYPE.toCharArray(); 17 private static final byte DOCTYPE_BYTES[] = DOCTYPE.getBytes(StandardCharsets.UTF_8); 18 // the string to look for: xmlns=" 19 private static final String XMLNS_EQUALS = LDMLConstants.XMLNS + "=\""; 20 /** 21 * Size of the input buffer, needs to be able to handle any expansion when the header is updated 22 */ 23 public static int BUFFER_MAX_SIZE = 1024; 24 /** Size of the first read, needs to contain xmlns="..." and be less than BUFFER_MAX_SIZE */ 25 public static int BUFFER_READ_SIZE = 512; 26 27 /** 28 * Wrap an InputSource in something that will automatically insert a DTD reference in place of 29 * an xmlns directive. 30 * 31 * @throws IOException 32 */ wrap(InputSource src)33 public static InputSource wrap(InputSource src) throws IOException { 34 Reader r = src.getCharacterStream(); 35 InputStream is = src.getByteStream(); 36 if (r != null) { 37 src.setCharacterStream(wrap(r)); 38 } else if (is != null) { 39 src.setByteStream(wrap(is, src.getEncoding())); 40 } else { 41 throw new NullPointerException( 42 "Internal error: Character and Byte stream are both null"); 43 } 44 return src; 45 } 46 47 /** wrap a byte oriented stream */ wrap(InputStream src, String encoding)48 public static InputStream wrap(InputStream src, String encoding) throws IOException { 49 if (encoding == null) { 50 encoding = "UTF-8"; 51 } 52 PushbackInputStream pr = new PushbackInputStream(src, BUFFER_MAX_SIZE); 53 byte inbuf[] = pr.readNBytes(BUFFER_READ_SIZE); 54 if (!hasDocType(inbuf, encoding)) { 55 inbuf = fixup(inbuf, encoding).getBytes(encoding); 56 } 57 pr.unread(inbuf); 58 return pr; 59 } 60 61 /** wrap a char oriented stream */ wrap(Reader src)62 public static Reader wrap(Reader src) throws IOException { 63 PushbackReader pr = new PushbackReader(src, BUFFER_MAX_SIZE); 64 char inbuf[] = new char[BUFFER_READ_SIZE]; 65 int readlen = pr.read(inbuf); 66 if (!hasDocType(inbuf, readlen)) { 67 char buf2[] = Arrays.copyOf(inbuf, readlen); 68 inbuf = fixup(new String(buf2)).toCharArray(); 69 readlen = inbuf.length; 70 } 71 pr.unread(inbuf, 0, readlen); 72 return pr; 73 } 74 75 /** Fix an input byte array, including the DOCTYPE */ fixup(byte[] inbuf, String encoding)76 private static String fixup(byte[] inbuf, String encoding) { 77 try { 78 final String s = new String(inbuf, encoding); 79 return fixup(s); 80 } catch (UnsupportedEncodingException e) { 81 throw new RuntimeException("While parsing " + encoding, e); 82 } 83 } 84 85 /** Fix an input String, including DOCTYPE */ fixup(final String s)86 private static String fixup(final String s) { 87 // exit if nothing matches 88 for (final DtdType d : DtdType.values()) { 89 if (s.contains(XMLNS_EQUALS + d.getNsUrl())) { 90 return fixup(s, d); 91 } 92 } 93 // couldn't fix it, just pass through 94 return s; 95 } 96 97 /** Fix an input String given a specific DtdType. */ fixup(String s, DtdType d)98 private static String fixup(String s, DtdType d) { 99 int n = s.indexOf("?>"); 100 if (n == -1) { 101 throw new IllegalArgumentException("Invalid XML prefix: ?> not found."); 102 } 103 n += 2; // move the cut-point to the end of the "?>" sequence 104 105 final String doctype = "\n" + d.getDoctype() + "\n"; 106 final String s2 = s.substring(0, n) + doctype + s.substring(n); 107 return s2; 108 } 109 hasDocType(byte[] inbuf, String encoding)110 private static final boolean hasDocType(byte[] inbuf, String encoding) { 111 if (inbuf == null || inbuf.length == 0) return false; 112 113 // Try as utf-8/ASCII bytes - this will be the common case 114 if (arrayContains(inbuf, inbuf.length, DOCTYPE_BYTES)) return true; 115 116 // break out here 117 if (encoding == null || encoding.equals("UTF-8")) return false; 118 119 // Try 2, with encoding 120 try { 121 final String s = new String(inbuf, encoding); 122 return s.contains(DOCTYPE); 123 } catch (UnsupportedEncodingException e) { 124 throw new RuntimeException("While parsing " + encoding, e); 125 } 126 } 127 hasDocType(char[] inbuf, int readlen)128 private static final boolean hasDocType(char[] inbuf, int readlen) { 129 if (inbuf == null || readlen <= 0) { 130 return false; 131 } 132 return arrayContains(inbuf, readlen, DOCTYPE_CHARS); 133 } 134 arrayContains(char[] inbuf, int inlen, char[] testbuf)135 private static boolean arrayContains(char[] inbuf, int inlen, char[] testbuf) { 136 final int testlen = testbuf.length; 137 int t = 0; 138 for (int i = 0; i < inlen; i++) { 139 if (inbuf[i] == testbuf[t]) { 140 t++; 141 if (t == testlen) return true; 142 } else { 143 t = 0; 144 } 145 } 146 return false; 147 } 148 arrayContains(byte[] inbuf, int inlen, byte[] testbuf)149 private static boolean arrayContains(byte[] inbuf, int inlen, byte[] testbuf) { 150 final int testlen = testbuf.length; 151 int t = 0; 152 for (int i = 0; i < inlen; i++) { 153 if (inbuf[i] == testbuf[t]) { 154 t++; 155 if (t == testlen) return true; 156 } else { 157 t = 0; 158 } 159 } 160 return false; 161 } 162 } 163