1 
2 /* Copyright 1998, 2011 by the Massachusetts Institute of Technology.
3  *
4  * Permission to use, copy, modify, and distribute this
5  * software and its documentation for any purpose and without
6  * fee is hereby granted, provided that the above copyright
7  * notice appear in all copies and that both that copyright
8  * notice and this permission notice appear in supporting
9  * documentation, and that the name of M.I.T. not be used in
10  * advertising or publicity pertaining to distribution of the
11  * software without specific, written prior permission.
12  * M.I.T. makes no representations about the suitability of
13  * this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  */
16 
17 #include "ares_setup.h"
18 
19 #ifdef HAVE_NETINET_IN_H
20 #  include <netinet/in.h>
21 #endif
22 
23 #include "ares_nameser.h"
24 
25 #include "ares.h"
26 #include "ares_nowarn.h"
27 #include "ares_private.h" /* for the memdebug */
28 
29 /* Maximum number of indirections allowed for a name */
30 #define MAX_INDIRS 50
31 
32 static int name_length(const unsigned char *encoded, const unsigned char *abuf,
33                        int alen, int is_hostname);
34 
35 /* Reserved characters for names that need to be escaped */
is_reservedch(int ch)36 static int is_reservedch(int ch)
37 {
38   switch (ch) {
39     case '"':
40     case '.':
41     case ';':
42     case '\\':
43     case '(':
44     case ')':
45     case '@':
46     case '$':
47       return 1;
48     default:
49       break;
50   }
51 
52   return 0;
53 }
54 
ares__isprint(int ch)55 static int ares__isprint(int ch)
56 {
57   if (ch >= 0x20 && ch <= 0x7E)
58     return 1;
59   return 0;
60 }
61 
62 /* Character set allowed by hostnames.  This is to include the normal
63  * domain name character set plus:
64  *  - underscores which are used in SRV records.
65  *  - Forward slashes such as are used for classless in-addr.arpa
66  *    delegation (CNAMEs)
67  *  - Asterisks may be used for wildcard domains in CNAMEs as seen in the
68  *    real world.
69  * While RFC 2181 section 11 does state not to do validation,
70  * that applies to servers, not clients.  Vulnerabilities have been
71  * reported when this validation is not performed.  Security is more
72  * important than edge-case compatibility (which is probably invalid
73  * anyhow). */
is_hostnamech(int ch)74 static int is_hostnamech(int ch)
75 {
76   /* [A-Za-z0-9-*._/]
77    * Don't use isalnum() as it is locale-specific
78    */
79   if (ch >= 'A' && ch <= 'Z')
80     return 1;
81   if (ch >= 'a' && ch <= 'z')
82     return 1;
83   if (ch >= '0' && ch <= '9')
84     return 1;
85   if (ch == '-' || ch == '.' || ch == '_' || ch == '/' || ch == '*')
86     return 1;
87 
88   return 0;
89 }
90 
91 /* Expand an RFC1035-encoded domain name given by encoded.  The
92  * containing message is given by abuf and alen.  The result given by
93  * *s, which is set to a NUL-terminated allocated buffer.  *enclen is
94  * set to the length of the encoded name (not the length of the
95  * expanded name; the goal is to tell the caller how many bytes to
96  * move forward to get past the encoded name).
97  *
98  * In the simple case, an encoded name is a series of labels, each
99  * composed of a one-byte length (limited to values between 0 and 63
100  * inclusive) followed by the label contents.  The name is terminated
101  * by a zero-length label.
102  *
103  * In the more complicated case, a label may be terminated by an
104  * indirection pointer, specified by two bytes with the high bits of
105  * the first byte (corresponding to INDIR_MASK) set to 11.  With the
106  * two high bits of the first byte stripped off, the indirection
107  * pointer gives an offset from the beginning of the containing
108  * message with more labels to decode.  Indirection can happen an
109  * arbitrary number of times, so we have to detect loops.
110  *
111  * Since the expanded name uses '.' as a label separator, we use
112  * backslashes to escape periods or backslashes in the expanded name.
113  *
114  * If the result is expected to be a hostname, then no escaped data is allowed
115  * and will return error.
116  */
117 
ares__expand_name_validated(const unsigned char * encoded,const unsigned char * abuf,int alen,char ** s,long * enclen,int is_hostname)118 int ares__expand_name_validated(const unsigned char *encoded,
119                                 const unsigned char *abuf,
120                                 int alen, char **s, long *enclen,
121                                 int is_hostname)
122 {
123   int len, indir = 0;
124   char *q;
125   const unsigned char *p;
126   union {
127     ares_ssize_t sig;
128      size_t uns;
129   } nlen;
130 
131   nlen.sig = name_length(encoded, abuf, alen, is_hostname);
132   if (nlen.sig < 0)
133     return ARES_EBADNAME;
134 
135   *s = ares_malloc(nlen.uns + 1);
136   if (!*s)
137     return ARES_ENOMEM;
138   q = *s;
139 
140   if (nlen.uns == 0) {
141     /* RFC2181 says this should be ".": the root of the DNS tree.
142      * Since this function strips trailing dots though, it becomes ""
143      */
144     q[0] = '\0';
145 
146     /* indirect root label (like 0xc0 0x0c) is 2 bytes long (stupid, but
147        valid) */
148     if ((*encoded & INDIR_MASK) == INDIR_MASK)
149       *enclen = 2L;
150     else
151       *enclen = 1L;  /* the caller should move one byte to get past this */
152 
153     return ARES_SUCCESS;
154   }
155 
156   /* No error-checking necessary; it was all done by name_length(). */
157   p = encoded;
158   while (*p)
159     {
160       if ((*p & INDIR_MASK) == INDIR_MASK)
161         {
162           if (!indir)
163             {
164               *enclen = aresx_uztosl(p + 2U - encoded);
165               indir = 1;
166             }
167           p = abuf + ((*p & ~INDIR_MASK) << 8 | *(p + 1));
168         }
169       else
170         {
171           int name_len = *p;
172           len = name_len;
173           p++;
174 
175           while (len--)
176             {
177               /* Output as \DDD for consistency with RFC1035 5.1, except
178                * for the special case of a root name response  */
179               if (!ares__isprint(*p) && !(name_len == 1 && *p == 0))
180                 {
181                   *q++ = '\\';
182                   *q++ = (char)('0' + *p / 100);
183                   *q++ = (char)('0' + (*p % 100) / 10);
184                   *q++ = (char)('0' + (*p % 10));
185                 }
186               else if (is_reservedch(*p))
187                 {
188                   *q++ = '\\';
189                   *q++ = *p;
190                 }
191               else
192                 {
193                   *q++ = *p;
194                 }
195               p++;
196             }
197           *q++ = '.';
198         }
199      }
200 
201   if (!indir)
202     *enclen = aresx_uztosl(p + 1U - encoded);
203 
204   /* Nuke the trailing period if we wrote one. */
205   if (q > *s)
206     *(q - 1) = 0;
207   else
208     *q = 0; /* zero terminate; LCOV_EXCL_LINE: empty names exit above */
209 
210   return ARES_SUCCESS;
211 }
212 
213 
ares_expand_name(const unsigned char * encoded,const unsigned char * abuf,int alen,char ** s,long * enclen)214 int ares_expand_name(const unsigned char *encoded, const unsigned char *abuf,
215                      int alen, char **s, long *enclen)
216 {
217   return ares__expand_name_validated(encoded, abuf, alen, s, enclen, 0);
218 }
219 
220 /* Return the length of the expansion of an encoded domain name, or
221  * -1 if the encoding is invalid.
222  */
name_length(const unsigned char * encoded,const unsigned char * abuf,int alen,int is_hostname)223 static int name_length(const unsigned char *encoded, const unsigned char *abuf,
224                        int alen, int is_hostname)
225 {
226   int n = 0, offset, indir = 0, top;
227 
228   /* Allow the caller to pass us abuf + alen and have us check for it. */
229   if (encoded >= abuf + alen)
230     return -1;
231 
232   while (*encoded)
233     {
234       top = (*encoded & INDIR_MASK);
235       if (top == INDIR_MASK)
236         {
237           /* Check the offset and go there. */
238           if (encoded + 1 >= abuf + alen)
239             return -1;
240           offset = (*encoded & ~INDIR_MASK) << 8 | *(encoded + 1);
241           if (offset >= alen)
242             return -1;
243           encoded = abuf + offset;
244 
245           /* If we've seen more indirects than the message length,
246            * then there's a loop.
247            */
248           ++indir;
249           if (indir > alen || indir > MAX_INDIRS)
250             return -1;
251         }
252       else if (top == 0x00)
253         {
254           int name_len = *encoded;
255           offset = name_len;
256           if (encoded + offset + 1 >= abuf + alen)
257             return -1;
258           encoded++;
259 
260           while (offset--)
261             {
262               if (!ares__isprint(*encoded) && !(name_len == 1 && *encoded == 0))
263                 {
264                   if (is_hostname)
265                     return -1;
266                   n += 4;
267                 }
268               else if (is_reservedch(*encoded))
269                 {
270                   if (is_hostname)
271                     return -1;
272                   n += 2;
273                 }
274               else
275                 {
276                   if (is_hostname && !is_hostnamech(*encoded))
277                     return -1;
278                   n += 1;
279                 }
280               encoded++;
281             }
282 
283           n++;
284         }
285       else
286         {
287           /* RFC 1035 4.1.4 says other options (01, 10) for top 2
288            * bits are reserved.
289            */
290           return -1;
291         }
292     }
293 
294   /* If there were any labels at all, then the number of dots is one
295    * less than the number of labels, so subtract one.
296    */
297   return (n) ? n - 1 : n;
298 }
299 
300 /* Like ares_expand_name_validated  but returns EBADRESP in case of invalid
301  * input. */
ares__expand_name_for_response(const unsigned char * encoded,const unsigned char * abuf,int alen,char ** s,long * enclen,int is_hostname)302 int ares__expand_name_for_response(const unsigned char *encoded,
303                                    const unsigned char *abuf, int alen,
304                                    char **s, long *enclen, int is_hostname)
305 {
306   int status = ares__expand_name_validated(encoded, abuf, alen, s, enclen,
307     is_hostname);
308   if (status == ARES_EBADNAME)
309     status = ARES_EBADRESP;
310   return status;
311 }
312