xref: /aosp_15_r20/external/libxml2/nanohttp.c (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1 /*
2  * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3  *             focuses on size, streamability, reentrancy and portability
4  *
5  * This is clearly not a general purpose HTTP implementation
6  * If you look for one, check:
7  *         http://www.w3.org/Library/
8  *
9  * See Copyright for the status of this software.
10  *
11  * [email protected]
12  */
13 
14 #define IN_LIBXML
15 #include "libxml.h"
16 
17 #ifdef LIBXML_HTTP_ENABLED
18 #include <string.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 
23 #include <fcntl.h>
24 
25 #ifdef _WIN32
26 
27 #include <io.h>
28 #include <wsockcompat.h>
29 #define XML_SOCKLEN_T int
30 
31 #else /* _WIN32 */
32 
33 #include <netdb.h>
34 #include <netinet/in.h>
35 #include <sys/socket.h>
36 #include <sys/time.h>
37 #include <unistd.h>
38 
39 #ifdef HAVE_POLL_H
40   #include <poll.h>
41 #else
42   #include <sys/select.h>
43 #endif
44 
45 /* This can be disabled if you don't have getaddrinfo */
46 #define SUPPORT_IP6
47 #define XML_SOCKLEN_T socklen_t
48 
49 #endif /* _WIN32 */
50 
51 #ifdef LIBXML_ZLIB_ENABLED
52 #include <zlib.h>
53 #endif
54 
55 #include <libxml/xmlerror.h>
56 #include <libxml/xmlmemory.h>
57 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */
58 #include <libxml/nanohttp.h>
59 #include <libxml/uri.h>
60 
61 #include "private/error.h"
62 #include "private/io.h"
63 
64 /**
65  * A couple portability macros
66  */
67 #ifndef _WINSOCKAPI_
68 #define closesocket(s) close(s)
69 #define SOCKET int
70 #define INVALID_SOCKET (-1)
71 #endif
72 
73 #define GETHOSTBYNAME_ARG_CAST (char *)
74 #define SEND_ARG2_CAST (char *)
75 
76 #define XML_NANO_HTTP_MAX_REDIR	10
77 
78 #define XML_NANO_HTTP_CHUNK	4096
79 
80 #define XML_NANO_HTTP_CLOSED	0
81 #define XML_NANO_HTTP_WRITE	1
82 #define XML_NANO_HTTP_READ	2
83 #define XML_NANO_HTTP_NONE	4
84 
85 #define __xmlIOErr(domain, code, extra) ((void) 0)
86 
87 typedef struct xmlNanoHTTPCtxt {
88     char *protocol;	/* the protocol name */
89     char *hostname;	/* the host name */
90     int port;		/* the port */
91     char *path;		/* the path within the URL */
92     char *query;	/* the query string */
93     SOCKET fd;		/* the file descriptor for the socket */
94     int state;		/* WRITE / READ / CLOSED */
95     char *out;		/* buffer sent (zero terminated) */
96     char *outptr;	/* index within the buffer sent */
97     char *in;		/* the receiving buffer */
98     char *content;	/* the start of the content */
99     char *inptr;	/* the next byte to read from network */
100     char *inrptr;	/* the next byte to give back to the client */
101     int inlen;		/* len of the input buffer */
102     int last;		/* return code for last operation */
103     int returnValue;	/* the protocol return value */
104     int version;        /* the protocol version */
105     int ContentLength;  /* specified content length from HTTP header */
106     char *contentType;	/* the MIME type for the input */
107     char *location;	/* the new URL in case of redirect */
108     char *authHeader;	/* contents of {WWW,Proxy}-Authenticate header */
109     char *encoding;	/* encoding extracted from the contentType */
110     char *mimeType;	/* Mime-Type extracted from the contentType */
111 #ifdef LIBXML_ZLIB_ENABLED
112     z_stream *strm;	/* Zlib stream object */
113     int usesGzip;	/* "Content-Encoding: gzip" was detected */
114 #endif
115 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
116 
117 static int initialized = 0;
118 static char *proxy = NULL;	 /* the proxy name if any */
119 static int proxyPort;	/* the proxy port if any */
120 static unsigned int timeout = 60;/* the select() timeout in seconds */
121 
122 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
123 
124 /**
125  * xmlHTTPErrMemory:
126  * @extra:  extra information
127  *
128  * Handle an out of memory condition
129  */
130 static void
xmlHTTPErrMemory(void)131 xmlHTTPErrMemory(void)
132 {
133     xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_HTTP, NULL);
134 }
135 
136 /**
137  * A portability function
138  */
socket_errno(void)139 static int socket_errno(void) {
140 #ifdef _WINSOCKAPI_
141     int err = WSAGetLastError();
142     switch(err) {
143         case WSAECONNRESET:
144             return(ECONNRESET);
145         case WSAEINPROGRESS:
146             return(EINPROGRESS);
147         case WSAEINTR:
148             return(EINTR);
149         case WSAESHUTDOWN:
150             return(ESHUTDOWN);
151         case WSAEWOULDBLOCK:
152             return(EWOULDBLOCK);
153         default:
154             return(err);
155     }
156 #else
157     return(errno);
158 #endif
159 }
160 
161 /**
162  * xmlNanoHTTPInit:
163  *
164  * Initialize the HTTP protocol layer.
165  * Currently it just checks for proxy information
166  */
167 
168 void
xmlNanoHTTPInit(void)169 xmlNanoHTTPInit(void) {
170     const char *env;
171 #ifdef _WINSOCKAPI_
172     WSADATA wsaData;
173 #endif
174 
175     if (initialized)
176 	return;
177 
178 #ifdef _WINSOCKAPI_
179     if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
180 	return;
181 #endif
182 
183     if (proxy == NULL) {
184 	proxyPort = 80;
185 	env = getenv("no_proxy");
186 	if (env && ((env[0] == '*') && (env[1] == 0)))
187 	    goto done;
188 	env = getenv("http_proxy");
189 	if (env != NULL) {
190 	    xmlNanoHTTPScanProxy(env);
191 	    goto done;
192 	}
193 	env = getenv("HTTP_PROXY");
194 	if (env != NULL) {
195 	    xmlNanoHTTPScanProxy(env);
196 	    goto done;
197 	}
198     }
199 done:
200     initialized = 1;
201 }
202 
203 /**
204  * xmlNanoHTTPCleanup:
205  *
206  * Cleanup the HTTP protocol layer.
207  */
208 
209 void
xmlNanoHTTPCleanup(void)210 xmlNanoHTTPCleanup(void) {
211     if (proxy != NULL) {
212 	xmlFree(proxy);
213 	proxy = NULL;
214     }
215 #ifdef _WINSOCKAPI_
216     if (initialized)
217 	WSACleanup();
218 #endif
219     initialized = 0;
220     return;
221 }
222 
223 /**
224  * xmlNanoHTTPScanURL:
225  * @ctxt:  an HTTP context
226  * @URL:  The URL used to initialize the context
227  *
228  * (Re)Initialize an HTTP context by parsing the URL and finding
229  * the protocol host port and path it indicates.
230  */
231 
232 static void
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt,const char * URL)233 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
234     xmlURIPtr uri;
235     int len;
236 
237     /*
238      * Clear any existing data from the context
239      */
240     if (ctxt->protocol != NULL) {
241         xmlFree(ctxt->protocol);
242 	ctxt->protocol = NULL;
243     }
244     if (ctxt->hostname != NULL) {
245         xmlFree(ctxt->hostname);
246 	ctxt->hostname = NULL;
247     }
248     if (ctxt->path != NULL) {
249         xmlFree(ctxt->path);
250 	ctxt->path = NULL;
251     }
252     if (ctxt->query != NULL) {
253         xmlFree(ctxt->query);
254 	ctxt->query = NULL;
255     }
256     if (URL == NULL) return;
257 
258     uri = xmlParseURIRaw(URL, 1);
259     if (uri == NULL)
260 	return;
261 
262     if ((uri->scheme == NULL) || (uri->server == NULL)) {
263 	xmlFreeURI(uri);
264 	return;
265     }
266 
267     ctxt->protocol = xmlMemStrdup(uri->scheme);
268     /* special case of IPv6 addresses, the [] need to be removed */
269     if ((uri->server != NULL) && (*uri->server == '[')) {
270         len = strlen(uri->server);
271 	if ((len > 2) && (uri->server[len - 1] == ']')) {
272 	    ctxt->hostname = (char *) xmlCharStrndup(uri->server + 1, len -2);
273 	} else
274 	    ctxt->hostname = xmlMemStrdup(uri->server);
275     } else
276 	ctxt->hostname = xmlMemStrdup(uri->server);
277     if (uri->path != NULL)
278 	ctxt->path = xmlMemStrdup(uri->path);
279     else
280 	ctxt->path = xmlMemStrdup("/");
281     if (uri->query != NULL)
282 	ctxt->query = xmlMemStrdup(uri->query);
283     if (uri->port != 0)
284 	ctxt->port = uri->port;
285 
286     xmlFreeURI(uri);
287 }
288 
289 /**
290  * xmlNanoHTTPScanProxy:
291  * @URL:  The proxy URL used to initialize the proxy context
292  *
293  * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
294  * the protocol host port it indicates.
295  * Should be like http://myproxy/ or http://myproxy:3128/
296  * A NULL URL cleans up proxy information.
297  */
298 
299 void
xmlNanoHTTPScanProxy(const char * URL)300 xmlNanoHTTPScanProxy(const char *URL) {
301     xmlURIPtr uri;
302 
303     if (proxy != NULL) {
304         xmlFree(proxy);
305 	proxy = NULL;
306     }
307     proxyPort = 0;
308 
309     if (URL == NULL) return;
310 
311     uri = xmlParseURIRaw(URL, 1);
312     if ((uri == NULL) || (uri->scheme == NULL) ||
313 	(strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
314 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
315 	if (uri != NULL)
316 	    xmlFreeURI(uri);
317 	return;
318     }
319 
320     proxy = xmlMemStrdup(uri->server);
321     if (uri->port != 0)
322 	proxyPort = uri->port;
323 
324     xmlFreeURI(uri);
325 }
326 
327 /**
328  * xmlNanoHTTPNewCtxt:
329  * @URL:  The URL used to initialize the context
330  *
331  * Allocate and initialize a new HTTP context.
332  *
333  * Returns an HTTP context or NULL in case of error.
334  */
335 
336 static xmlNanoHTTPCtxtPtr
xmlNanoHTTPNewCtxt(const char * URL)337 xmlNanoHTTPNewCtxt(const char *URL) {
338     xmlNanoHTTPCtxtPtr ret;
339 
340     ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
341     if (ret == NULL) {
342         xmlHTTPErrMemory();
343         return(NULL);
344     }
345 
346     memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
347     ret->port = 80;
348     ret->returnValue = 0;
349     ret->fd = INVALID_SOCKET;
350     ret->ContentLength = -1;
351 
352     xmlNanoHTTPScanURL(ret, URL);
353 
354     return(ret);
355 }
356 
357 /**
358  * xmlNanoHTTPFreeCtxt:
359  * @ctxt:  an HTTP context
360  *
361  * Frees the context after closing the connection.
362  */
363 
364 static void
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt)365 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
366     if (ctxt == NULL) return;
367     if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
368     if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
369     if (ctxt->path != NULL) xmlFree(ctxt->path);
370     if (ctxt->query != NULL) xmlFree(ctxt->query);
371     if (ctxt->out != NULL) xmlFree(ctxt->out);
372     if (ctxt->in != NULL) xmlFree(ctxt->in);
373     if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
374     if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
375     if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
376     if (ctxt->location != NULL) xmlFree(ctxt->location);
377     if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
378 #ifdef LIBXML_ZLIB_ENABLED
379     if (ctxt->strm != NULL) {
380 	inflateEnd(ctxt->strm);
381 	xmlFree(ctxt->strm);
382     }
383 #endif
384 
385     ctxt->state = XML_NANO_HTTP_NONE;
386     if (ctxt->fd != INVALID_SOCKET) closesocket(ctxt->fd);
387     ctxt->fd = INVALID_SOCKET;
388     xmlFree(ctxt);
389 }
390 
391 /**
392  * xmlNanoHTTPSend:
393  * @ctxt:  an HTTP context
394  *
395  * Send the input needed to initiate the processing on the server side
396  * Returns number of bytes sent or -1 on error.
397  */
398 
399 static int
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt,const char * xmt_ptr,int outlen)400 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char *xmt_ptr, int outlen)
401 {
402     int total_sent = 0;
403 #ifdef HAVE_POLL_H
404     struct pollfd p;
405 #else
406     struct timeval tv;
407     fd_set wfd;
408 #endif
409 
410     if ((ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL)) {
411         while (total_sent < outlen) {
412             int nsent = send(ctxt->fd, SEND_ARG2_CAST (xmt_ptr + total_sent),
413                              outlen - total_sent, 0);
414 
415             if (nsent > 0)
416                 total_sent += nsent;
417             else if ((nsent == -1) &&
418 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
419                      (socket_errno() != EAGAIN) &&
420 #endif
421                      (socket_errno() != EWOULDBLOCK)) {
422                 __xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
423                 if (total_sent == 0)
424                     total_sent = -1;
425                 break;
426             } else {
427                 /*
428                  * No data sent
429                  * Since non-blocking sockets are used, wait for
430                  * socket to be writable or default timeout prior
431                  * to retrying.
432                  */
433 #ifndef HAVE_POLL_H
434 #ifndef _WINSOCKAPI_
435                 if (ctxt->fd > FD_SETSIZE)
436                     return -1;
437 #endif
438 
439                 tv.tv_sec = timeout;
440                 tv.tv_usec = 0;
441                 FD_ZERO(&wfd);
442 #ifdef _MSC_VER
443 #pragma warning(push)
444 #pragma warning(disable: 4018)
445 #endif
446                 FD_SET(ctxt->fd, &wfd);
447 #ifdef _MSC_VER
448 #pragma warning(pop)
449 #endif
450                 (void) select(ctxt->fd + 1, NULL, &wfd, NULL, &tv);
451 #else
452                 p.fd = ctxt->fd;
453                 p.events = POLLOUT;
454                 (void) poll(&p, 1, timeout * 1000);
455 #endif /* !HAVE_POLL_H */
456             }
457         }
458     }
459 
460     return total_sent;
461 }
462 
463 /**
464  * xmlNanoHTTPRecv:
465  * @ctxt:  an HTTP context
466  *
467  * Read information coming from the HTTP connection.
468  * This is a blocking call (but it blocks in select(), not read()).
469  *
470  * Returns the number of byte read or -1 in case of error.
471  */
472 
473 static int
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)474 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)
475 {
476 #ifdef HAVE_POLL_H
477     struct pollfd p;
478 #else
479     fd_set rfd;
480     struct timeval tv;
481 #endif
482 
483 
484     while (ctxt->state & XML_NANO_HTTP_READ) {
485         if (ctxt->in == NULL) {
486             ctxt->in = xmlMalloc(65000);
487             if (ctxt->in == NULL) {
488                 xmlHTTPErrMemory();
489                 ctxt->last = -1;
490                 return (-1);
491             }
492             ctxt->inlen = 65000;
493             ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
494         }
495         if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
496             int delta = ctxt->inrptr - ctxt->in;
497             int len = ctxt->inptr - ctxt->inrptr;
498 
499             memmove(ctxt->in, ctxt->inrptr, len);
500             ctxt->inrptr -= delta;
501             ctxt->content -= delta;
502             ctxt->inptr -= delta;
503         }
504         if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
505             int d_inptr = ctxt->inptr - ctxt->in;
506             int d_content = ctxt->content - ctxt->in;
507             int d_inrptr = ctxt->inrptr - ctxt->in;
508             char *tmp_ptr = ctxt->in;
509 
510             ctxt->inlen *= 2;
511             ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
512             if (ctxt->in == NULL) {
513                 xmlHTTPErrMemory();
514                 xmlFree(tmp_ptr);
515                 ctxt->last = -1;
516                 return (-1);
517             }
518             ctxt->inptr = ctxt->in + d_inptr;
519             ctxt->content = ctxt->in + d_content;
520             ctxt->inrptr = ctxt->in + d_inrptr;
521         }
522         ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
523         if (ctxt->last > 0) {
524             ctxt->inptr += ctxt->last;
525             return (ctxt->last);
526         }
527         if (ctxt->last == 0) {
528             return (0);
529         }
530         if (ctxt->last == -1) {
531             switch (socket_errno()) {
532                 case EINPROGRESS:
533                 case EWOULDBLOCK:
534 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
535                 case EAGAIN:
536 #endif
537                     break;
538 
539                 case ECONNRESET:
540                 case ESHUTDOWN:
541                     return (0);
542 
543                 default:
544                     __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
545                     return (-1);
546             }
547         }
548 #ifdef HAVE_POLL_H
549         p.fd = ctxt->fd;
550         p.events = POLLIN;
551         if ((poll(&p, 1, timeout * 1000) < 1)
552 #if defined(EINTR)
553             && (errno != EINTR)
554 #endif
555             )
556             return (0);
557 #else /* !HAVE_POLL_H */
558 #ifndef _WINSOCKAPI_
559         if (ctxt->fd > FD_SETSIZE)
560             return 0;
561 #endif
562 
563         tv.tv_sec = timeout;
564         tv.tv_usec = 0;
565         FD_ZERO(&rfd);
566 
567 #ifdef _MSC_VER
568 #pragma warning(push)
569 #pragma warning(disable: 4018)
570 #endif
571 
572         FD_SET(ctxt->fd, &rfd);
573 
574 #ifdef _MSC_VER
575 #pragma warning(pop)
576 #endif
577 
578         if ((select(ctxt->fd + 1, &rfd, NULL, NULL, &tv) < 1)
579 #if defined(EINTR)
580             && (socket_errno() != EINTR)
581 #endif
582             )
583             return (0);
584 #endif /* !HAVE_POLL_H */
585     }
586     return (0);
587 }
588 
589 /**
590  * xmlNanoHTTPReadLine:
591  * @ctxt:  an HTTP context
592  *
593  * Read one line in the HTTP server output, usually for extracting
594  * the HTTP protocol information from the answer header.
595  *
596  * Returns a newly allocated string with a copy of the line, or NULL
597  *         which indicate the end of the input.
598  */
599 
600 static char *
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt)601 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
602     char buf[4096];
603     char *bp = buf;
604     int	rc;
605 
606     while (bp - buf < 4095) {
607 	if (ctxt->inrptr == ctxt->inptr) {
608 	    if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
609 		if (bp == buf)
610 		    return(NULL);
611 		else
612 		    *bp = 0;
613 		return(xmlMemStrdup(buf));
614 	    }
615 	    else if ( rc == -1 ) {
616 	        return ( NULL );
617 	    }
618 	}
619 	*bp = *ctxt->inrptr++;
620 	if (*bp == '\n') {
621 	    *bp = 0;
622 	    return(xmlMemStrdup(buf));
623 	}
624 	if (*bp != '\r')
625 	    bp++;
626     }
627     buf[4095] = 0;
628     return(xmlMemStrdup(buf));
629 }
630 
631 
632 /**
633  * xmlNanoHTTPScanAnswer:
634  * @ctxt:  an HTTP context
635  * @line:  an HTTP header line
636  *
637  * Try to extract useful information from the server answer.
638  * We currently parse and process:
639  *  - The HTTP revision/ return code
640  *  - The Content-Type, Mime-Type and charset used
641  *  - The Location for redirect processing.
642  *
643  * Returns -1 in case of failure, the file descriptor number otherwise
644  */
645 
646 static void
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt,const char * line)647 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
648     const char *cur = line;
649 
650     if (line == NULL) return;
651 
652     if (!strncmp(line, "HTTP/", 5)) {
653         int version = 0;
654 	int ret = 0;
655 
656 	cur += 5;
657 	while ((*cur >= '0') && (*cur <= '9')) {
658 	    version *= 10;
659 	    version += *cur - '0';
660 	    cur++;
661 	}
662 	if (*cur == '.') {
663 	    cur++;
664 	    if ((*cur >= '0') && (*cur <= '9')) {
665 		version *= 10;
666 		version += *cur - '0';
667 		cur++;
668 	    }
669 	    while ((*cur >= '0') && (*cur <= '9'))
670 		cur++;
671 	} else
672 	    version *= 10;
673 	if ((*cur != ' ') && (*cur != '\t')) return;
674 	while ((*cur == ' ') || (*cur == '\t')) cur++;
675 	if ((*cur < '0') || (*cur > '9')) return;
676 	while ((*cur >= '0') && (*cur <= '9')) {
677 	    ret *= 10;
678 	    ret += *cur - '0';
679 	    cur++;
680 	}
681 	if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
682 	ctxt->returnValue = ret;
683         ctxt->version = version;
684     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
685         const xmlChar *charset, *last, *mime;
686         cur += 13;
687 	while ((*cur == ' ') || (*cur == '\t')) cur++;
688 	if (ctxt->contentType != NULL)
689 	    xmlFree(ctxt->contentType);
690 	ctxt->contentType = xmlMemStrdup(cur);
691 	mime = (const xmlChar *) cur;
692 	last = mime;
693 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
694 	       (*last != ';') && (*last != ','))
695 	    last++;
696 	if (ctxt->mimeType != NULL)
697 	    xmlFree(ctxt->mimeType);
698 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
699 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
700 	if (charset != NULL) {
701 	    charset += 8;
702 	    last = charset;
703 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
704 	           (*last != ';') && (*last != ','))
705 		last++;
706 	    if (ctxt->encoding != NULL)
707 	        xmlFree(ctxt->encoding);
708 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
709 	}
710     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
711         const xmlChar *charset, *last, *mime;
712         cur += 12;
713 	if (ctxt->contentType != NULL) return;
714 	while ((*cur == ' ') || (*cur == '\t')) cur++;
715 	ctxt->contentType = xmlMemStrdup(cur);
716 	mime = (const xmlChar *) cur;
717 	last = mime;
718 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
719 	       (*last != ';') && (*last != ','))
720 	    last++;
721 	if (ctxt->mimeType != NULL)
722 	    xmlFree(ctxt->mimeType);
723 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
724 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
725 	if (charset != NULL) {
726 	    charset += 8;
727 	    last = charset;
728 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
729 	           (*last != ';') && (*last != ','))
730 		last++;
731 	    if (ctxt->encoding != NULL)
732 	        xmlFree(ctxt->encoding);
733 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
734 	}
735     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
736         cur += 9;
737 	while ((*cur == ' ') || (*cur == '\t')) cur++;
738 	if (ctxt->location != NULL)
739 	    xmlFree(ctxt->location);
740 	if (*cur == '/') {
741 	    xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
742 	    xmlChar *tmp_loc =
743 	        xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
744 	    ctxt->location =
745 	        (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
746 	} else {
747 	    ctxt->location = xmlMemStrdup(cur);
748 	}
749     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
750         cur += 17;
751 	while ((*cur == ' ') || (*cur == '\t')) cur++;
752 	if (ctxt->authHeader != NULL)
753 	    xmlFree(ctxt->authHeader);
754 	ctxt->authHeader = xmlMemStrdup(cur);
755     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
756         cur += 19;
757 	while ((*cur == ' ') || (*cur == '\t')) cur++;
758 	if (ctxt->authHeader != NULL)
759 	    xmlFree(ctxt->authHeader);
760 	ctxt->authHeader = xmlMemStrdup(cur);
761 #ifdef LIBXML_ZLIB_ENABLED
762     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
763 	cur += 17;
764 	while ((*cur == ' ') || (*cur == '\t')) cur++;
765 	if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
766 	    ctxt->usesGzip = 1;
767 
768 	    ctxt->strm = xmlMalloc(sizeof(z_stream));
769 
770 	    if (ctxt->strm != NULL) {
771 		ctxt->strm->zalloc = Z_NULL;
772 		ctxt->strm->zfree = Z_NULL;
773 		ctxt->strm->opaque = Z_NULL;
774 		ctxt->strm->avail_in = 0;
775 		ctxt->strm->next_in = Z_NULL;
776 
777 		inflateInit2( ctxt->strm, 31 );
778 	    }
779 	}
780 #endif
781     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
782 	cur += 15;
783 	ctxt->ContentLength = strtol( cur, NULL, 10 );
784     }
785 }
786 
787 /**
788  * xmlNanoHTTPConnectAttempt:
789  * @addr:  a socket address structure
790  *
791  * Attempt a connection to the given IP:port endpoint. It forces
792  * non-blocking semantic on the socket, and allow 60 seconds for
793  * the host to answer.
794  *
795  * Returns -1 in case of failure, the file descriptor number otherwise
796  */
797 
798 static SOCKET
xmlNanoHTTPConnectAttempt(struct sockaddr * addr)799 xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
800 {
801 #ifndef HAVE_POLL_H
802     fd_set wfd;
803 #ifdef _WINSOCKAPI_
804     fd_set xfd;
805 #endif
806     struct timeval tv;
807 #else /* !HAVE_POLL_H */
808     struct pollfd p;
809 #endif /* !HAVE_POLL_H */
810     int status;
811 
812     int addrlen;
813 
814     SOCKET s;
815 
816 #ifdef SUPPORT_IP6
817     if (addr->sa_family == AF_INET6) {
818         s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
819         addrlen = sizeof(struct sockaddr_in6);
820     } else
821 #endif
822     {
823         s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
824         addrlen = sizeof(struct sockaddr_in);
825     }
826     if (s == INVALID_SOCKET) {
827         __xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
828         return INVALID_SOCKET;
829     }
830 #ifdef _WINSOCKAPI_
831     {
832         u_long one = 1;
833 
834         status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
835     }
836 #else /* _WINSOCKAPI_ */
837 #if defined(VMS)
838     {
839         int enable = 1;
840 
841         status = ioctl(s, FIONBIO, &enable);
842     }
843 #else /* VMS */
844     if ((status = fcntl(s, F_GETFL, 0)) != -1) {
845 #ifdef O_NONBLOCK
846         status |= O_NONBLOCK;
847 #else /* O_NONBLOCK */
848 #ifdef F_NDELAY
849         status |= F_NDELAY;
850 #endif /* F_NDELAY */
851 #endif /* !O_NONBLOCK */
852         status = fcntl(s, F_SETFL, status);
853     }
854     if (status < 0) {
855         __xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
856         closesocket(s);
857         return INVALID_SOCKET;
858     }
859 #endif /* !VMS */
860 #endif /* !_WINSOCKAPI_ */
861 
862     if (connect(s, addr, addrlen) == -1) {
863         switch (socket_errno()) {
864             case EINPROGRESS:
865             case EWOULDBLOCK:
866                 break;
867             default:
868                 __xmlIOErr(XML_FROM_HTTP, 0,
869                            "error connecting to HTTP server");
870                 closesocket(s);
871                 return INVALID_SOCKET;
872         }
873     }
874 #ifndef HAVE_POLL_H
875     tv.tv_sec = timeout;
876     tv.tv_usec = 0;
877 
878 #ifdef _MSC_VER
879 #pragma warning(push)
880 #pragma warning(disable: 4018)
881 #endif
882 #ifndef _WINSOCKAPI_
883     if (s > FD_SETSIZE)
884         return INVALID_SOCKET;
885 #endif
886     FD_ZERO(&wfd);
887     FD_SET(s, &wfd);
888 
889 #ifdef _WINSOCKAPI_
890     FD_ZERO(&xfd);
891     FD_SET(s, &xfd);
892 
893     switch (select(s + 1, NULL, &wfd, &xfd, &tv))
894 #else
895     switch (select(s + 1, NULL, &wfd, NULL, &tv))
896 #endif
897 #ifdef _MSC_VER
898 #pragma warning(pop)
899 #endif
900 
901 #else /* !HAVE_POLL_H */
902     p.fd = s;
903     p.events = POLLOUT;
904     switch (poll(&p, 1, timeout * 1000))
905 #endif /* !HAVE_POLL_H */
906 
907     {
908         case 0:
909             /* Time out */
910             __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
911             closesocket(s);
912             return INVALID_SOCKET;
913         case -1:
914             /* Ermm.. ?? */
915             __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
916             closesocket(s);
917             return INVALID_SOCKET;
918     }
919 
920 #ifndef HAVE_POLL_H
921     if (FD_ISSET(s, &wfd)
922 #ifdef _WINSOCKAPI_
923         || FD_ISSET(s, &xfd)
924 #endif
925         )
926 #else /* !HAVE_POLL_H */
927     if (p.revents == POLLOUT)
928 #endif /* !HAVE_POLL_H */
929     {
930         XML_SOCKLEN_T len;
931 
932         len = sizeof(status);
933 #ifdef SO_ERROR
934         if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &status, &len) <
935             0) {
936             /* Solaris error code */
937             __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
938             closesocket(s);
939             return INVALID_SOCKET;
940         }
941 #endif
942         if (status) {
943             __xmlIOErr(XML_FROM_HTTP, 0,
944                        "Error connecting to remote host");
945             closesocket(s);
946             errno = status;
947             return INVALID_SOCKET;
948         }
949     } else {
950         /* pbm */
951         __xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
952         closesocket(s);
953         return INVALID_SOCKET;
954     }
955 
956     return (s);
957 }
958 
959 /**
960  * xmlNanoHTTPConnectHost:
961  * @host:  the host name
962  * @port:  the port number
963  *
964  * Attempt a connection to the given host:port endpoint. It tries
965  * the multiple IP provided by the DNS if available.
966  *
967  * Returns -1 in case of failure, the file descriptor number otherwise
968  */
969 
970 static SOCKET
xmlNanoHTTPConnectHost(const char * host,int port)971 xmlNanoHTTPConnectHost(const char *host, int port)
972 {
973     struct sockaddr *addr = NULL;
974     struct sockaddr_in sockin;
975 
976 #ifdef SUPPORT_IP6
977     struct sockaddr_in6 sockin6;
978 #endif
979     SOCKET s;
980 
981     memset (&sockin, 0, sizeof(sockin));
982 
983 #if defined(SUPPORT_IP6)
984     {
985 	int status;
986 	struct addrinfo hints, *res, *result;
987 
988         memset (&sockin6, 0, sizeof(sockin6));
989 
990 	result = NULL;
991 	memset (&hints, 0,sizeof(hints));
992 	hints.ai_socktype = SOCK_STREAM;
993 
994 	status = getaddrinfo (host, NULL, &hints, &result);
995 	if (status) {
996 	    __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
997 	    return INVALID_SOCKET;
998 	}
999 
1000 	for (res = result; res; res = res->ai_next) {
1001 	    if (res->ai_family == AF_INET) {
1002 		if ((size_t)res->ai_addrlen > sizeof(sockin)) {
1003 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1004 		    freeaddrinfo (result);
1005 		    return INVALID_SOCKET;
1006 		}
1007 		memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1008 		sockin.sin_port = htons (port);
1009 		addr = (struct sockaddr *)&sockin;
1010 	    } else if (res->ai_family == AF_INET6) {
1011 		if ((size_t)res->ai_addrlen > sizeof(sockin6)) {
1012 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1013 		    freeaddrinfo (result);
1014 		    return INVALID_SOCKET;
1015 		}
1016 		memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1017 		sockin6.sin6_port = htons (port);
1018 		addr = (struct sockaddr *)&sockin6;
1019 	    } else
1020 		continue;              /* for */
1021 
1022 	    s = xmlNanoHTTPConnectAttempt (addr);
1023 	    if (s != INVALID_SOCKET) {
1024 		freeaddrinfo (result);
1025 		return (s);
1026 	    }
1027 	}
1028 
1029 	if (result)
1030 	    freeaddrinfo (result);
1031     }
1032 #else
1033     {
1034         struct hostent *h;
1035         struct in_addr ia;
1036         int i;
1037 
1038 	h = gethostbyname (GETHOSTBYNAME_ARG_CAST host);
1039 	if (h == NULL) {
1040 
1041 /*
1042  * Okay, I got fed up by the non-portability of this error message
1043  * extraction code. it work on Linux, if it work on your platform
1044  * and one want to enable it, send me the defined(foobar) needed
1045  */
1046 #if defined(HOST_NOT_FOUND) && defined(__linux__)
1047 	    const char *h_err_txt = "";
1048 
1049 	    switch (h_errno) {
1050 		case HOST_NOT_FOUND:
1051 		    h_err_txt = "Authoritative host not found";
1052 		    break;
1053 
1054 		case TRY_AGAIN:
1055 		    h_err_txt =
1056 			"Non-authoritative host not found or server failure.";
1057 		    break;
1058 
1059 		case NO_RECOVERY:
1060 		    h_err_txt =
1061 			"Non-recoverable errors:  FORMERR, REFUSED, or NOTIMP.";
1062 		    break;
1063 
1064 #ifdef NO_ADDRESS
1065 		case NO_ADDRESS:
1066 		    h_err_txt =
1067 			"Valid name, no data record of requested type.";
1068 		    break;
1069 #endif
1070 
1071 		default:
1072 		    h_err_txt = "No error text defined.";
1073 		    break;
1074 	    }
1075 	    __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1076 #else
1077 	    __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1078 #endif
1079 	    return INVALID_SOCKET;
1080 	}
1081 
1082 	for (i = 0; h->h_addr_list[i]; i++) {
1083 	    if (h->h_addrtype == AF_INET) {
1084 		/* A records (IPv4) */
1085 		if ((unsigned int) h->h_length > sizeof(ia)) {
1086 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1087 		    return INVALID_SOCKET;
1088 		}
1089 		memcpy (&ia, h->h_addr_list[i], h->h_length);
1090 		sockin.sin_family = h->h_addrtype;
1091 		sockin.sin_addr = ia;
1092 		sockin.sin_port = (unsigned short)htons ((unsigned short)port);
1093 		addr = (struct sockaddr *) &sockin;
1094 	    } else
1095 		break;              /* for */
1096 
1097 	    s = xmlNanoHTTPConnectAttempt (addr);
1098 	    if (s != INVALID_SOCKET)
1099 		return (s);
1100 	}
1101     }
1102 #endif
1103 
1104     return INVALID_SOCKET;
1105 }
1106 
1107 
1108 /**
1109  * xmlNanoHTTPOpen:
1110  * @URL:  The URL to load
1111  * @contentType:  if available the Content-Type information will be
1112  *                returned at that location
1113  *
1114  * This function try to open a connection to the indicated resource
1115  * via HTTP GET.
1116  *
1117  * Returns NULL in case of failure, otherwise a request handler.
1118  *     The contentType, if provided must be freed by the caller
1119  */
1120 
1121 void*
xmlNanoHTTPOpen(const char * URL,char ** contentType)1122 xmlNanoHTTPOpen(const char *URL, char **contentType) {
1123     if (contentType != NULL) *contentType = NULL;
1124     return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1125 }
1126 
1127 /**
1128  * xmlNanoHTTPOpenRedir:
1129  * @URL:  The URL to load
1130  * @contentType:  if available the Content-Type information will be
1131  *                returned at that location
1132  * @redir: if available the redirected URL will be returned
1133  *
1134  * This function try to open a connection to the indicated resource
1135  * via HTTP GET.
1136  *
1137  * Returns NULL in case of failure, otherwise a request handler.
1138  *     The contentType, if provided must be freed by the caller
1139  */
1140 
1141 void*
xmlNanoHTTPOpenRedir(const char * URL,char ** contentType,char ** redir)1142 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1143     if (contentType != NULL) *contentType = NULL;
1144     if (redir != NULL) *redir = NULL;
1145     return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1146 }
1147 
1148 /**
1149  * xmlNanoHTTPRead:
1150  * @ctx:  the HTTP context
1151  * @dest:  a buffer
1152  * @len:  the buffer length
1153  *
1154  * This function tries to read @len bytes from the existing HTTP connection
1155  * and saves them in @dest. This is a blocking call.
1156  *
1157  * Returns the number of byte read. 0 is an indication of an end of connection.
1158  *         -1 indicates a parameter error.
1159  */
1160 int
xmlNanoHTTPRead(void * ctx,void * dest,int len)1161 xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1162     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1163 #ifdef LIBXML_ZLIB_ENABLED
1164     int bytes_read = 0;
1165     int orig_avail_in;
1166     int z_ret;
1167 #endif
1168 
1169     if (ctx == NULL) return(-1);
1170     if (dest == NULL) return(-1);
1171     if (len <= 0) return(0);
1172 
1173 #ifdef LIBXML_ZLIB_ENABLED
1174     if (ctxt->usesGzip == 1) {
1175         if (ctxt->strm == NULL) return(0);
1176 
1177         ctxt->strm->next_out = dest;
1178         ctxt->strm->avail_out = len;
1179 	ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1180 
1181         while (ctxt->strm->avail_out > 0 &&
1182 	       (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1183             orig_avail_in = ctxt->strm->avail_in =
1184 			    ctxt->inptr - ctxt->inrptr - bytes_read;
1185             ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1186 
1187             z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1188             bytes_read += orig_avail_in - ctxt->strm->avail_in;
1189 
1190             if (z_ret != Z_OK) break;
1191 	}
1192 
1193         ctxt->inrptr += bytes_read;
1194         return(len - ctxt->strm->avail_out);
1195     }
1196 #endif
1197 
1198     while (ctxt->inptr - ctxt->inrptr < len) {
1199         if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1200     }
1201     if (ctxt->inptr - ctxt->inrptr < len)
1202         len = ctxt->inptr - ctxt->inrptr;
1203     if (len > 0) {
1204         memcpy(dest, ctxt->inrptr, len);
1205         ctxt->inrptr += len;
1206     }
1207     return(len);
1208 }
1209 
1210 /**
1211  * xmlNanoHTTPClose:
1212  * @ctx:  the HTTP context
1213  *
1214  * This function closes an HTTP context, it ends up the connection and
1215  * free all data related to it.
1216  */
1217 void
xmlNanoHTTPClose(void * ctx)1218 xmlNanoHTTPClose(void *ctx) {
1219     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1220 
1221     if (ctx == NULL) return;
1222 
1223     xmlNanoHTTPFreeCtxt(ctxt);
1224 }
1225 
1226 
1227 /**
1228  * xmlNanoHTTPHostnameMatch:
1229  * @pattern: The pattern as it appears in no_proxy environment variable
1230  * @hostname: The hostname to test as it appears in the URL
1231  *
1232  * This function tests whether a given hostname matches a pattern. The pattern
1233  * usually is a token from the no_proxy environment variable. Wildcards in the
1234  * pattern are not supported.
1235  *
1236  * Returns true, iff hostname matches the pattern.
1237  */
1238 
1239 static int
xmlNanoHTTPHostnameMatch(const char * pattern,const char * hostname)1240 xmlNanoHTTPHostnameMatch(const char *pattern, const char *hostname) {
1241     int idx_pattern, idx_hostname;
1242     const char * pattern_start;
1243 
1244     if (!pattern || *pattern == '\0' || !hostname)
1245 	return 0;
1246 
1247     /* Ignore trailing '.' */
1248     if (*pattern == '.') {
1249         idx_pattern = strlen(pattern) -1;
1250         pattern_start = pattern + 1;
1251     }
1252     else {
1253         idx_pattern = strlen(pattern);
1254         pattern_start = pattern;
1255     }
1256     idx_hostname = strlen(hostname);
1257 
1258     for (; idx_pattern >= 0 && idx_hostname >= 0;
1259            --idx_pattern, --idx_hostname) {
1260 	if (tolower((unsigned char)pattern_start[idx_pattern]) !=
1261             tolower((unsigned char)hostname[idx_hostname]))
1262 	    break;
1263     }
1264 
1265     return idx_pattern == -1 && (idx_hostname == -1|| hostname[idx_hostname] == '.');
1266 }
1267 
1268 
1269 /**
1270  * xmlNanoHTTPBypassProxy:
1271  * @hostname: The hostname as it appears in the URL
1272  *
1273  * This function evaluates the no_proxy environment variable and returns
1274  * whether the proxy server should be bypassed for a given host.
1275  *
1276  * Returns true, iff a proxy server should be bypassed for the given hostname.
1277  */
1278 
1279 static int
xmlNanoHTTPBypassProxy(const char * hostname)1280 xmlNanoHTTPBypassProxy(const char *hostname) {
1281     size_t envlen;
1282     char *env = getenv("no_proxy"), *cpy=NULL, *p=NULL;
1283     if (!env)
1284 	return 0;
1285 
1286     /* (Avoid strdup because it's not portable.) */
1287     envlen = strlen(env) + 1;
1288     cpy = xmlMalloc(envlen);
1289     memcpy(cpy, env, envlen);
1290     env = cpy;
1291 
1292     /* The remainder of the function is basically a tokenizing: */
1293     while (isspace((unsigned char)*env))
1294     	++env;
1295     if (*env == '\0') {
1296     	xmlFree(cpy);
1297 	return 0;
1298     }
1299 
1300     p = env;
1301     while (*env) {
1302 
1303     	if (*env != ',') {
1304 	    ++env;
1305 	    continue;
1306 	}
1307 
1308 	*(env++) = '\0';
1309 	if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1310 	    xmlFree(cpy);
1311 	    return 1;
1312 	}
1313 
1314 	while (isspace((unsigned char)*env))
1315 	    ++env;
1316 	p = env;
1317     }
1318     if (xmlNanoHTTPHostnameMatch(p, hostname)) {
1319     	xmlFree(cpy);
1320     	return 1;
1321     }
1322 
1323     xmlFree(cpy);
1324     return 0;
1325 }
1326 
1327 
1328 /**
1329  * xmlNanoHTTPMethodRedir:
1330  * @URL:  The URL to load
1331  * @method:  the HTTP method to use
1332  * @input:  the input string if any
1333  * @contentType:  the Content-Type information IN and OUT
1334  * @redir:  the redirected URL OUT
1335  * @headers:  the extra headers
1336  * @ilen:  input length
1337  *
1338  * This function try to open a connection to the indicated resource
1339  * via HTTP using the given @method, adding the given extra headers
1340  * and the input buffer for the request content.
1341  *
1342  * Returns NULL in case of failure, otherwise a request handler.
1343  *     The contentType, or redir, if provided must be freed by the caller
1344  */
1345 
1346 void*
xmlNanoHTTPMethodRedir(const char * URL,const char * method,const char * input,char ** contentType,char ** redir,const char * headers,int ilen)1347 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1348                   char **contentType, char **redir,
1349 		  const char *headers, int ilen ) {
1350     xmlNanoHTTPCtxtPtr ctxt;
1351     char *bp, *p;
1352     int blen;
1353     SOCKET ret;
1354     int nbRedirects = 0;
1355     int use_proxy;
1356     char *redirURL = NULL;
1357 
1358     if (URL == NULL) return(NULL);
1359     if (method == NULL) method = "GET";
1360     xmlNanoHTTPInit();
1361 
1362 retry:
1363     if (redirURL == NULL) {
1364 	ctxt = xmlNanoHTTPNewCtxt(URL);
1365 	if (ctxt == NULL)
1366 	    return(NULL);
1367     } else {
1368 	ctxt = xmlNanoHTTPNewCtxt(redirURL);
1369 	if (ctxt == NULL)
1370 	    return(NULL);
1371 	ctxt->location = xmlMemStrdup(redirURL);
1372     }
1373 
1374     if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1375 	__xmlIOErr(XML_FROM_IO, XML_IO_UNSUPPORTED_PROTOCOL, ctxt->protocol);
1376         xmlNanoHTTPFreeCtxt(ctxt);
1377 	if (redirURL != NULL) xmlFree(redirURL);
1378         return(NULL);
1379     }
1380     if (ctxt->hostname == NULL) {
1381 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1382 	           "Failed to identify host in URI");
1383         xmlNanoHTTPFreeCtxt(ctxt);
1384 	if (redirURL != NULL) xmlFree(redirURL);
1385         return(NULL);
1386     }
1387     use_proxy = proxy && !xmlNanoHTTPBypassProxy(ctxt->hostname);
1388     if (use_proxy) {
1389 	blen = strlen(ctxt->hostname) * 2 + 16;
1390 	ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1391     }
1392     else {
1393 	blen = strlen(ctxt->hostname);
1394 	ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1395     }
1396     if (ret == INVALID_SOCKET) {
1397         xmlNanoHTTPFreeCtxt(ctxt);
1398 	if (redirURL != NULL) xmlFree(redirURL);
1399         return(NULL);
1400     }
1401     ctxt->fd = ret;
1402 
1403     if (input == NULL)
1404 	ilen = 0;
1405     else
1406 	blen += 36;
1407 
1408     if (headers != NULL)
1409 	blen += strlen(headers) + 2;
1410     if (contentType && *contentType)
1411 	/* reserve for string plus 'Content-Type: \r\n" */
1412 	blen += strlen(*contentType) + 16;
1413     if (ctxt->query != NULL)
1414 	/* 1 for '?' */
1415 	blen += strlen(ctxt->query) + 1;
1416     blen += strlen(method) + strlen(ctxt->path) + 24;
1417 #ifdef LIBXML_ZLIB_ENABLED
1418     /* reserve for possible 'Accept-Encoding: gzip' string */
1419     blen += 23;
1420 #endif
1421     if (ctxt->port != 80) {
1422 	/* reserve space for ':xxxxx', incl. potential proxy */
1423 	if (use_proxy)
1424 	    blen += 17;
1425 	else
1426 	    blen += 11;
1427     }
1428     bp = xmlMalloc(blen);
1429     if ( bp == NULL ) {
1430         xmlNanoHTTPFreeCtxt( ctxt );
1431 	xmlHTTPErrMemory();
1432 	return ( NULL );
1433     }
1434 
1435     p = bp;
1436 
1437     if (use_proxy) {
1438 	if (ctxt->port != 80) {
1439 	    p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s",
1440 			method, ctxt->hostname,
1441 			ctxt->port, ctxt->path );
1442 	}
1443 	else
1444 	    p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1445 			ctxt->hostname, ctxt->path);
1446     }
1447     else
1448 	p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1449 
1450     if (ctxt->query != NULL)
1451 	p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1452 
1453     if (ctxt->port == 80) {
1454         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n",
1455 		    ctxt->hostname);
1456     } else {
1457         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1458 		    ctxt->hostname, ctxt->port);
1459     }
1460 
1461 #ifdef LIBXML_ZLIB_ENABLED
1462     p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1463 #endif
1464 
1465     if (contentType != NULL && *contentType)
1466 	p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1467 
1468     if (headers != NULL)
1469 	p += snprintf( p, blen - (p - bp), "%s", headers );
1470 
1471     if (input != NULL)
1472 	snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1473     else
1474 	snprintf(p, blen - (p - bp), "\r\n");
1475 
1476     ctxt->outptr = ctxt->out = bp;
1477     ctxt->state = XML_NANO_HTTP_WRITE;
1478     blen = strlen( ctxt->out );
1479     xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1480 
1481     if ( input != NULL ) {
1482 	xmlNanoHTTPSend( ctxt, input, ilen );
1483     }
1484 
1485     ctxt->state = XML_NANO_HTTP_READ;
1486 
1487     while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1488         if (*p == 0) {
1489 	    ctxt->content = ctxt->inrptr;
1490 	    xmlFree(p);
1491 	    break;
1492 	}
1493 	xmlNanoHTTPScanAnswer(ctxt, p);
1494 
1495         xmlFree(p);
1496     }
1497 
1498     if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1499         (ctxt->returnValue < 400)) {
1500 	while ( xmlNanoHTTPRecv(ctxt) > 0 )
1501             ;
1502         if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1503 	    nbRedirects++;
1504 	    if (redirURL != NULL)
1505 		xmlFree(redirURL);
1506 	    redirURL = xmlMemStrdup(ctxt->location);
1507 	    xmlNanoHTTPFreeCtxt(ctxt);
1508 	    goto retry;
1509 	}
1510 	xmlNanoHTTPFreeCtxt(ctxt);
1511 	if (redirURL != NULL) xmlFree(redirURL);
1512 	return(NULL);
1513     }
1514 
1515     if (contentType != NULL) {
1516 	if (ctxt->contentType != NULL)
1517 	    *contentType = xmlMemStrdup(ctxt->contentType);
1518 	else
1519 	    *contentType = NULL;
1520     }
1521 
1522     if ((redir != NULL) && (redirURL != NULL)) {
1523 	*redir = redirURL;
1524     } else {
1525 	if (redirURL != NULL)
1526 	    xmlFree(redirURL);
1527 	if (redir != NULL)
1528 	    *redir = NULL;
1529     }
1530 
1531     return((void *) ctxt);
1532 }
1533 
1534 /**
1535  * xmlNanoHTTPMethod:
1536  * @URL:  The URL to load
1537  * @method:  the HTTP method to use
1538  * @input:  the input string if any
1539  * @contentType:  the Content-Type information IN and OUT
1540  * @headers:  the extra headers
1541  * @ilen:  input length
1542  *
1543  * This function try to open a connection to the indicated resource
1544  * via HTTP using the given @method, adding the given extra headers
1545  * and the input buffer for the request content.
1546  *
1547  * Returns NULL in case of failure, otherwise a request handler.
1548  *     The contentType, if provided must be freed by the caller
1549  */
1550 
1551 void*
xmlNanoHTTPMethod(const char * URL,const char * method,const char * input,char ** contentType,const char * headers,int ilen)1552 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1553                   char **contentType, const char *headers, int ilen) {
1554     return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1555 		                  NULL, headers, ilen));
1556 }
1557 
1558 /**
1559  * xmlNanoHTTPFetch:
1560  * @URL:  The URL to load
1561  * @filename:  the filename where the content should be saved
1562  * @contentType:  if available the Content-Type information will be
1563  *                returned at that location
1564  *
1565  * This function try to fetch the indicated resource via HTTP GET
1566  * and save it's content in the file.
1567  *
1568  * Returns -1 in case of failure, 0 in case of success. The contentType,
1569  *     if provided must be freed by the caller
1570  */
1571 int
xmlNanoHTTPFetch(const char * URL,const char * filename,char ** contentType)1572 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1573     void *ctxt = NULL;
1574     char *buf = NULL;
1575     int fd;
1576     int len;
1577     int ret = 0;
1578 
1579     if (filename == NULL) return(-1);
1580     ctxt = xmlNanoHTTPOpen(URL, contentType);
1581     if (ctxt == NULL) return(-1);
1582 
1583     if (!strcmp(filename, "-"))
1584         fd = 0;
1585     else {
1586         fd = open(filename, O_CREAT | O_WRONLY, 00644);
1587 	if (fd < 0) {
1588 	    xmlNanoHTTPClose(ctxt);
1589 	    if ((contentType != NULL) && (*contentType != NULL)) {
1590 	        xmlFree(*contentType);
1591 		*contentType = NULL;
1592 	    }
1593 	    return(-1);
1594 	}
1595     }
1596 
1597     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1598     if ( len > 0 ) {
1599 	if (write(fd, buf, len) == -1) {
1600 	    ret = -1;
1601 	}
1602     }
1603 
1604     xmlNanoHTTPClose(ctxt);
1605     close(fd);
1606     return(ret);
1607 }
1608 
1609 #ifdef LIBXML_OUTPUT_ENABLED
1610 /**
1611  * xmlNanoHTTPSave:
1612  * @ctxt:  the HTTP context
1613  * @filename:  the filename where the content should be saved
1614  *
1615  * This function saves the output of the HTTP transaction to a file
1616  * It closes and free the context at the end
1617  *
1618  * Returns -1 in case of failure, 0 in case of success.
1619  */
1620 int
xmlNanoHTTPSave(void * ctxt,const char * filename)1621 xmlNanoHTTPSave(void *ctxt, const char *filename) {
1622     char *buf = NULL;
1623     int fd;
1624     int len;
1625     int ret = 0;
1626 
1627     if ((ctxt == NULL) || (filename == NULL)) return(-1);
1628 
1629     if (!strcmp(filename, "-"))
1630         fd = 0;
1631     else {
1632         fd = open(filename, O_CREAT | O_WRONLY, 0666);
1633 	if (fd < 0) {
1634 	    xmlNanoHTTPClose(ctxt);
1635 	    return(-1);
1636 	}
1637     }
1638 
1639     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1640     if ( len > 0 ) {
1641 	if (write(fd, buf, len) == -1) {
1642 	    ret = -1;
1643 	}
1644     }
1645 
1646     xmlNanoHTTPClose(ctxt);
1647     close(fd);
1648     return(ret);
1649 }
1650 #endif /* LIBXML_OUTPUT_ENABLED */
1651 
1652 /**
1653  * xmlNanoHTTPReturnCode:
1654  * @ctx:  the HTTP context
1655  *
1656  * Get the latest HTTP return code received
1657  *
1658  * Returns the HTTP return code for the request.
1659  */
1660 int
xmlNanoHTTPReturnCode(void * ctx)1661 xmlNanoHTTPReturnCode(void *ctx) {
1662     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1663 
1664     if (ctxt == NULL) return(-1);
1665 
1666     return(ctxt->returnValue);
1667 }
1668 
1669 /**
1670  * xmlNanoHTTPAuthHeader:
1671  * @ctx:  the HTTP context
1672  *
1673  * Get the authentication header of an HTTP context
1674  *
1675  * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1676  * header.
1677  */
1678 const char *
xmlNanoHTTPAuthHeader(void * ctx)1679 xmlNanoHTTPAuthHeader(void *ctx) {
1680     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1681 
1682     if (ctxt == NULL) return(NULL);
1683 
1684     return(ctxt->authHeader);
1685 }
1686 
1687 /**
1688  * xmlNanoHTTPContentLength:
1689  * @ctx:  the HTTP context
1690  *
1691  * Provides the specified content length from the HTTP header.
1692  *
1693  * Return the specified content length from the HTTP header.  Note that
1694  * a value of -1 indicates that the content length element was not included in
1695  * the response header.
1696  */
1697 int
xmlNanoHTTPContentLength(void * ctx)1698 xmlNanoHTTPContentLength( void * ctx ) {
1699     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1700 
1701     return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1702 }
1703 
1704 /**
1705  * xmlNanoHTTPRedir:
1706  * @ctx:  the HTTP context
1707  *
1708  * Provides the specified redirection URL if available from the HTTP header.
1709  *
1710  * Return the specified redirection URL or NULL if not redirected.
1711  */
1712 const char *
xmlNanoHTTPRedir(void * ctx)1713 xmlNanoHTTPRedir( void * ctx ) {
1714     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1715 
1716     return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1717 }
1718 
1719 /**
1720  * xmlNanoHTTPEncoding:
1721  * @ctx:  the HTTP context
1722  *
1723  * Provides the specified encoding if specified in the HTTP headers.
1724  *
1725  * Return the specified encoding or NULL if not available
1726  */
1727 const char *
xmlNanoHTTPEncoding(void * ctx)1728 xmlNanoHTTPEncoding( void * ctx ) {
1729     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1730 
1731     return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1732 }
1733 
1734 /**
1735  * xmlNanoHTTPMimeType:
1736  * @ctx:  the HTTP context
1737  *
1738  * Provides the specified Mime-Type if specified in the HTTP headers.
1739  *
1740  * Return the specified Mime-Type or NULL if not available
1741  */
1742 const char *
xmlNanoHTTPMimeType(void * ctx)1743 xmlNanoHTTPMimeType( void * ctx ) {
1744     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1745 
1746     return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1747 }
1748 
1749 /**
1750  * xmlNanoHTTPFetchContent:
1751  * @ctx:  the HTTP context
1752  * @ptr:  pointer to set to the content buffer.
1753  * @len:  integer pointer to hold the length of the content
1754  *
1755  * Check if all the content was read
1756  *
1757  * Returns 0 if all the content was read and available, returns
1758  * -1 if received content length was less than specified or an error
1759  * occurred.
1760  */
1761 static int
xmlNanoHTTPFetchContent(void * ctx,char ** ptr,int * len)1762 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1763     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1764 
1765     int			rc = 0;
1766     int			cur_lgth;
1767     int			rcvd_lgth;
1768     int			dummy_int;
1769     char *		dummy_ptr = NULL;
1770 
1771     /*  Dummy up return input parameters if not provided  */
1772 
1773     if ( len == NULL )
1774         len = &dummy_int;
1775 
1776     if ( ptr == NULL )
1777         ptr = &dummy_ptr;
1778 
1779     /*  But can't work without the context pointer  */
1780 
1781     if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1782         *len = 0;
1783 	*ptr = NULL;
1784 	return ( -1 );
1785     }
1786 
1787     rcvd_lgth = ctxt->inptr - ctxt->content;
1788 
1789     while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1790 
1791 	rcvd_lgth += cur_lgth;
1792 	if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1793 	    break;
1794     }
1795 
1796     *ptr = ctxt->content;
1797     *len = rcvd_lgth;
1798 
1799     if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1800         rc = -1;
1801     else if ( rcvd_lgth == 0 )
1802 	rc = -1;
1803 
1804     return ( rc );
1805 }
1806 
1807 #endif /* LIBXML_HTTP_ENABLED */
1808