1
2 /* Copyright 1998, 2011 by the Massachusetts Institute of Technology.
3 *
4 * Permission to use, copy, modify, and distribute this
5 * software and its documentation for any purpose and without
6 * fee is hereby granted, provided that the above copyright
7 * notice appear in all copies and that both that copyright
8 * notice and this permission notice appear in supporting
9 * documentation, and that the name of M.I.T. not be used in
10 * advertising or publicity pertaining to distribution of the
11 * software without specific, written prior permission.
12 * M.I.T. makes no representations about the suitability of
13 * this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
15 */
16
17 #include "ares_setup.h"
18
19 #ifdef HAVE_NETINET_IN_H
20 # include <netinet/in.h>
21 #endif
22
23 #include "ares_nameser.h"
24
25 #include "ares.h"
26 #include "ares_nowarn.h"
27 #include "ares_private.h" /* for the memdebug */
28
29 /* Maximum number of indirections allowed for a name */
30 #define MAX_INDIRS 50
31
32 static int name_length(const unsigned char *encoded, const unsigned char *abuf,
33 int alen, int is_hostname);
34
35 /* Reserved characters for names that need to be escaped */
is_reservedch(int ch)36 static int is_reservedch(int ch)
37 {
38 switch (ch) {
39 case '"':
40 case '.':
41 case ';':
42 case '\\':
43 case '(':
44 case ')':
45 case '@':
46 case '$':
47 return 1;
48 default:
49 break;
50 }
51
52 return 0;
53 }
54
ares__isprint(int ch)55 static int ares__isprint(int ch)
56 {
57 if (ch >= 0x20 && ch <= 0x7E)
58 return 1;
59 return 0;
60 }
61
62 /* Character set allowed by hostnames. This is to include the normal
63 * domain name character set plus:
64 * - underscores which are used in SRV records.
65 * - Forward slashes such as are used for classless in-addr.arpa
66 * delegation (CNAMEs)
67 * - Asterisks may be used for wildcard domains in CNAMEs as seen in the
68 * real world.
69 * While RFC 2181 section 11 does state not to do validation,
70 * that applies to servers, not clients. Vulnerabilities have been
71 * reported when this validation is not performed. Security is more
72 * important than edge-case compatibility (which is probably invalid
73 * anyhow). */
is_hostnamech(int ch)74 static int is_hostnamech(int ch)
75 {
76 /* [A-Za-z0-9-*._/]
77 * Don't use isalnum() as it is locale-specific
78 */
79 if (ch >= 'A' && ch <= 'Z')
80 return 1;
81 if (ch >= 'a' && ch <= 'z')
82 return 1;
83 if (ch >= '0' && ch <= '9')
84 return 1;
85 if (ch == '-' || ch == '.' || ch == '_' || ch == '/' || ch == '*')
86 return 1;
87
88 return 0;
89 }
90
91 /* Expand an RFC1035-encoded domain name given by encoded. The
92 * containing message is given by abuf and alen. The result given by
93 * *s, which is set to a NUL-terminated allocated buffer. *enclen is
94 * set to the length of the encoded name (not the length of the
95 * expanded name; the goal is to tell the caller how many bytes to
96 * move forward to get past the encoded name).
97 *
98 * In the simple case, an encoded name is a series of labels, each
99 * composed of a one-byte length (limited to values between 0 and 63
100 * inclusive) followed by the label contents. The name is terminated
101 * by a zero-length label.
102 *
103 * In the more complicated case, a label may be terminated by an
104 * indirection pointer, specified by two bytes with the high bits of
105 * the first byte (corresponding to INDIR_MASK) set to 11. With the
106 * two high bits of the first byte stripped off, the indirection
107 * pointer gives an offset from the beginning of the containing
108 * message with more labels to decode. Indirection can happen an
109 * arbitrary number of times, so we have to detect loops.
110 *
111 * Since the expanded name uses '.' as a label separator, we use
112 * backslashes to escape periods or backslashes in the expanded name.
113 *
114 * If the result is expected to be a hostname, then no escaped data is allowed
115 * and will return error.
116 */
117
ares__expand_name_validated(const unsigned char * encoded,const unsigned char * abuf,int alen,char ** s,long * enclen,int is_hostname)118 int ares__expand_name_validated(const unsigned char *encoded,
119 const unsigned char *abuf,
120 int alen, char **s, long *enclen,
121 int is_hostname)
122 {
123 int len, indir = 0;
124 char *q;
125 const unsigned char *p;
126 union {
127 ares_ssize_t sig;
128 size_t uns;
129 } nlen;
130
131 nlen.sig = name_length(encoded, abuf, alen, is_hostname);
132 if (nlen.sig < 0)
133 return ARES_EBADNAME;
134
135 *s = ares_malloc(nlen.uns + 1);
136 if (!*s)
137 return ARES_ENOMEM;
138 q = *s;
139
140 if (nlen.uns == 0) {
141 /* RFC2181 says this should be ".": the root of the DNS tree.
142 * Since this function strips trailing dots though, it becomes ""
143 */
144 q[0] = '\0';
145
146 /* indirect root label (like 0xc0 0x0c) is 2 bytes long (stupid, but
147 valid) */
148 if ((*encoded & INDIR_MASK) == INDIR_MASK)
149 *enclen = 2L;
150 else
151 *enclen = 1L; /* the caller should move one byte to get past this */
152
153 return ARES_SUCCESS;
154 }
155
156 /* No error-checking necessary; it was all done by name_length(). */
157 p = encoded;
158 while (*p)
159 {
160 if ((*p & INDIR_MASK) == INDIR_MASK)
161 {
162 if (!indir)
163 {
164 *enclen = aresx_uztosl(p + 2U - encoded);
165 indir = 1;
166 }
167 p = abuf + ((*p & ~INDIR_MASK) << 8 | *(p + 1));
168 }
169 else
170 {
171 int name_len = *p;
172 len = name_len;
173 p++;
174
175 while (len--)
176 {
177 /* Output as \DDD for consistency with RFC1035 5.1, except
178 * for the special case of a root name response */
179 if (!ares__isprint(*p) && !(name_len == 1 && *p == 0))
180 {
181 *q++ = '\\';
182 *q++ = (char)('0' + *p / 100);
183 *q++ = (char)('0' + (*p % 100) / 10);
184 *q++ = (char)('0' + (*p % 10));
185 }
186 else if (is_reservedch(*p))
187 {
188 *q++ = '\\';
189 *q++ = *p;
190 }
191 else
192 {
193 *q++ = *p;
194 }
195 p++;
196 }
197 *q++ = '.';
198 }
199 }
200
201 if (!indir)
202 *enclen = aresx_uztosl(p + 1U - encoded);
203
204 /* Nuke the trailing period if we wrote one. */
205 if (q > *s)
206 *(q - 1) = 0;
207 else
208 *q = 0; /* zero terminate; LCOV_EXCL_LINE: empty names exit above */
209
210 return ARES_SUCCESS;
211 }
212
213
ares_expand_name(const unsigned char * encoded,const unsigned char * abuf,int alen,char ** s,long * enclen)214 int ares_expand_name(const unsigned char *encoded, const unsigned char *abuf,
215 int alen, char **s, long *enclen)
216 {
217 return ares__expand_name_validated(encoded, abuf, alen, s, enclen, 0);
218 }
219
220 /* Return the length of the expansion of an encoded domain name, or
221 * -1 if the encoding is invalid.
222 */
name_length(const unsigned char * encoded,const unsigned char * abuf,int alen,int is_hostname)223 static int name_length(const unsigned char *encoded, const unsigned char *abuf,
224 int alen, int is_hostname)
225 {
226 int n = 0, offset, indir = 0, top;
227
228 /* Allow the caller to pass us abuf + alen and have us check for it. */
229 if (encoded >= abuf + alen)
230 return -1;
231
232 while (*encoded)
233 {
234 top = (*encoded & INDIR_MASK);
235 if (top == INDIR_MASK)
236 {
237 /* Check the offset and go there. */
238 if (encoded + 1 >= abuf + alen)
239 return -1;
240 offset = (*encoded & ~INDIR_MASK) << 8 | *(encoded + 1);
241 if (offset >= alen)
242 return -1;
243 encoded = abuf + offset;
244
245 /* If we've seen more indirects than the message length,
246 * then there's a loop.
247 */
248 ++indir;
249 if (indir > alen || indir > MAX_INDIRS)
250 return -1;
251 }
252 else if (top == 0x00)
253 {
254 int name_len = *encoded;
255 offset = name_len;
256 if (encoded + offset + 1 >= abuf + alen)
257 return -1;
258 encoded++;
259
260 while (offset--)
261 {
262 if (!ares__isprint(*encoded) && !(name_len == 1 && *encoded == 0))
263 {
264 if (is_hostname)
265 return -1;
266 n += 4;
267 }
268 else if (is_reservedch(*encoded))
269 {
270 if (is_hostname)
271 return -1;
272 n += 2;
273 }
274 else
275 {
276 if (is_hostname && !is_hostnamech(*encoded))
277 return -1;
278 n += 1;
279 }
280 encoded++;
281 }
282
283 n++;
284 }
285 else
286 {
287 /* RFC 1035 4.1.4 says other options (01, 10) for top 2
288 * bits are reserved.
289 */
290 return -1;
291 }
292 }
293
294 /* If there were any labels at all, then the number of dots is one
295 * less than the number of labels, so subtract one.
296 */
297 return (n) ? n - 1 : n;
298 }
299
300 /* Like ares_expand_name_validated but returns EBADRESP in case of invalid
301 * input. */
ares__expand_name_for_response(const unsigned char * encoded,const unsigned char * abuf,int alen,char ** s,long * enclen,int is_hostname)302 int ares__expand_name_for_response(const unsigned char *encoded,
303 const unsigned char *abuf, int alen,
304 char **s, long *enclen, int is_hostname)
305 {
306 int status = ares__expand_name_validated(encoded, abuf, alen, s, enclen,
307 is_hostname);
308 if (status == ARES_EBADNAME)
309 status = ARES_EBADRESP;
310 return status;
311 }
312