xref: /aosp_15_r20/external/curl/docs/examples/htmltitle.cpp (revision 6236dae45794135f37c4eb022389c904c8b0090d)
1*6236dae4SAndroid Build Coastguard Worker /***************************************************************************
2*6236dae4SAndroid Build Coastguard Worker  *                                  _   _ ____  _
3*6236dae4SAndroid Build Coastguard Worker  *  Project                     ___| | | |  _ \| |
4*6236dae4SAndroid Build Coastguard Worker  *                             / __| | | | |_) | |
5*6236dae4SAndroid Build Coastguard Worker  *                            | (__| |_| |  _ <| |___
6*6236dae4SAndroid Build Coastguard Worker  *                             \___|\___/|_| \_\_____|
7*6236dae4SAndroid Build Coastguard Worker  *
8*6236dae4SAndroid Build Coastguard Worker  * Copyright (C) Daniel Stenberg, <[email protected]>, et al.
9*6236dae4SAndroid Build Coastguard Worker  *
10*6236dae4SAndroid Build Coastguard Worker  * This software is licensed as described in the file COPYING, which
11*6236dae4SAndroid Build Coastguard Worker  * you should have received as part of this distribution. The terms
12*6236dae4SAndroid Build Coastguard Worker  * are also available at https://curl.se/docs/copyright.html.
13*6236dae4SAndroid Build Coastguard Worker  *
14*6236dae4SAndroid Build Coastguard Worker  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15*6236dae4SAndroid Build Coastguard Worker  * copies of the Software, and permit persons to whom the Software is
16*6236dae4SAndroid Build Coastguard Worker  * furnished to do so, under the terms of the COPYING file.
17*6236dae4SAndroid Build Coastguard Worker  *
18*6236dae4SAndroid Build Coastguard Worker  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19*6236dae4SAndroid Build Coastguard Worker  * KIND, either express or implied.
20*6236dae4SAndroid Build Coastguard Worker  *
21*6236dae4SAndroid Build Coastguard Worker  * SPDX-License-Identifier: curl
22*6236dae4SAndroid Build Coastguard Worker  *
23*6236dae4SAndroid Build Coastguard Worker  ***************************************************************************/
24*6236dae4SAndroid Build Coastguard Worker /* <DESC>
25*6236dae4SAndroid Build Coastguard Worker  * Get a web page, extract the title with libxml.
26*6236dae4SAndroid Build Coastguard Worker  * </DESC>
27*6236dae4SAndroid Build Coastguard Worker 
28*6236dae4SAndroid Build Coastguard Worker  Written by Lars Nilsson
29*6236dae4SAndroid Build Coastguard Worker 
30*6236dae4SAndroid Build Coastguard Worker  GNU C++ compile command line suggestion (edit paths accordingly):
31*6236dae4SAndroid Build Coastguard Worker 
32*6236dae4SAndroid Build Coastguard Worker  g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \
33*6236dae4SAndroid Build Coastguard Worker  -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2
34*6236dae4SAndroid Build Coastguard Worker */
35*6236dae4SAndroid Build Coastguard Worker #include <stdio.h>
36*6236dae4SAndroid Build Coastguard Worker #include <string.h>
37*6236dae4SAndroid Build Coastguard Worker #include <stdlib.h>
38*6236dae4SAndroid Build Coastguard Worker #include <string>
39*6236dae4SAndroid Build Coastguard Worker #include <curl/curl.h>
40*6236dae4SAndroid Build Coastguard Worker #include <libxml/HTMLparser.h>
41*6236dae4SAndroid Build Coastguard Worker 
42*6236dae4SAndroid Build Coastguard Worker //
43*6236dae4SAndroid Build Coastguard Worker //  Case-insensitive string comparison
44*6236dae4SAndroid Build Coastguard Worker //
45*6236dae4SAndroid Build Coastguard Worker 
46*6236dae4SAndroid Build Coastguard Worker #ifdef _MSC_VER
47*6236dae4SAndroid Build Coastguard Worker #define COMPARE(a, b) (!_stricmp((a), (b)))
48*6236dae4SAndroid Build Coastguard Worker #else
49*6236dae4SAndroid Build Coastguard Worker #define COMPARE(a, b) (!strcasecmp((a), (b)))
50*6236dae4SAndroid Build Coastguard Worker #endif
51*6236dae4SAndroid Build Coastguard Worker 
52*6236dae4SAndroid Build Coastguard Worker //
53*6236dae4SAndroid Build Coastguard Worker //  libxml callback context structure
54*6236dae4SAndroid Build Coastguard Worker //
55*6236dae4SAndroid Build Coastguard Worker 
56*6236dae4SAndroid Build Coastguard Worker struct Context
57*6236dae4SAndroid Build Coastguard Worker {
ContextContext58*6236dae4SAndroid Build Coastguard Worker   Context(): addTitle(false) { }
59*6236dae4SAndroid Build Coastguard Worker 
60*6236dae4SAndroid Build Coastguard Worker   bool addTitle;
61*6236dae4SAndroid Build Coastguard Worker   std::string title;
62*6236dae4SAndroid Build Coastguard Worker };
63*6236dae4SAndroid Build Coastguard Worker 
64*6236dae4SAndroid Build Coastguard Worker //
65*6236dae4SAndroid Build Coastguard Worker //  libcurl variables for error strings and returned data
66*6236dae4SAndroid Build Coastguard Worker 
67*6236dae4SAndroid Build Coastguard Worker static char errorBuffer[CURL_ERROR_SIZE];
68*6236dae4SAndroid Build Coastguard Worker static std::string buffer;
69*6236dae4SAndroid Build Coastguard Worker 
70*6236dae4SAndroid Build Coastguard Worker //
71*6236dae4SAndroid Build Coastguard Worker //  libcurl write callback function
72*6236dae4SAndroid Build Coastguard Worker //
73*6236dae4SAndroid Build Coastguard Worker 
writer(char * data,size_t size,size_t nmemb,std::string * writerData)74*6236dae4SAndroid Build Coastguard Worker static int writer(char *data, size_t size, size_t nmemb,
75*6236dae4SAndroid Build Coastguard Worker                   std::string *writerData)
76*6236dae4SAndroid Build Coastguard Worker {
77*6236dae4SAndroid Build Coastguard Worker   if(writerData == NULL)
78*6236dae4SAndroid Build Coastguard Worker     return 0;
79*6236dae4SAndroid Build Coastguard Worker 
80*6236dae4SAndroid Build Coastguard Worker   writerData->append(data, size*nmemb);
81*6236dae4SAndroid Build Coastguard Worker 
82*6236dae4SAndroid Build Coastguard Worker   return size * nmemb;
83*6236dae4SAndroid Build Coastguard Worker }
84*6236dae4SAndroid Build Coastguard Worker 
85*6236dae4SAndroid Build Coastguard Worker //
86*6236dae4SAndroid Build Coastguard Worker //  libcurl connection initialization
87*6236dae4SAndroid Build Coastguard Worker //
88*6236dae4SAndroid Build Coastguard Worker 
init(CURL * & conn,char * url)89*6236dae4SAndroid Build Coastguard Worker static bool init(CURL *&conn, char *url)
90*6236dae4SAndroid Build Coastguard Worker {
91*6236dae4SAndroid Build Coastguard Worker   CURLcode code;
92*6236dae4SAndroid Build Coastguard Worker 
93*6236dae4SAndroid Build Coastguard Worker   conn = curl_easy_init();
94*6236dae4SAndroid Build Coastguard Worker 
95*6236dae4SAndroid Build Coastguard Worker   if(conn == NULL) {
96*6236dae4SAndroid Build Coastguard Worker     fprintf(stderr, "Failed to create CURL connection\n");
97*6236dae4SAndroid Build Coastguard Worker     exit(EXIT_FAILURE);
98*6236dae4SAndroid Build Coastguard Worker   }
99*6236dae4SAndroid Build Coastguard Worker 
100*6236dae4SAndroid Build Coastguard Worker   code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
101*6236dae4SAndroid Build Coastguard Worker   if(code != CURLE_OK) {
102*6236dae4SAndroid Build Coastguard Worker     fprintf(stderr, "Failed to set error buffer [%d]\n", code);
103*6236dae4SAndroid Build Coastguard Worker     return false;
104*6236dae4SAndroid Build Coastguard Worker   }
105*6236dae4SAndroid Build Coastguard Worker 
106*6236dae4SAndroid Build Coastguard Worker   code = curl_easy_setopt(conn, CURLOPT_URL, url);
107*6236dae4SAndroid Build Coastguard Worker   if(code != CURLE_OK) {
108*6236dae4SAndroid Build Coastguard Worker     fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
109*6236dae4SAndroid Build Coastguard Worker     return false;
110*6236dae4SAndroid Build Coastguard Worker   }
111*6236dae4SAndroid Build Coastguard Worker 
112*6236dae4SAndroid Build Coastguard Worker   code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
113*6236dae4SAndroid Build Coastguard Worker   if(code != CURLE_OK) {
114*6236dae4SAndroid Build Coastguard Worker     fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
115*6236dae4SAndroid Build Coastguard Worker     return false;
116*6236dae4SAndroid Build Coastguard Worker   }
117*6236dae4SAndroid Build Coastguard Worker 
118*6236dae4SAndroid Build Coastguard Worker   code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
119*6236dae4SAndroid Build Coastguard Worker   if(code != CURLE_OK) {
120*6236dae4SAndroid Build Coastguard Worker     fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
121*6236dae4SAndroid Build Coastguard Worker     return false;
122*6236dae4SAndroid Build Coastguard Worker   }
123*6236dae4SAndroid Build Coastguard Worker 
124*6236dae4SAndroid Build Coastguard Worker   code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
125*6236dae4SAndroid Build Coastguard Worker   if(code != CURLE_OK) {
126*6236dae4SAndroid Build Coastguard Worker     fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
127*6236dae4SAndroid Build Coastguard Worker     return false;
128*6236dae4SAndroid Build Coastguard Worker   }
129*6236dae4SAndroid Build Coastguard Worker 
130*6236dae4SAndroid Build Coastguard Worker   return true;
131*6236dae4SAndroid Build Coastguard Worker }
132*6236dae4SAndroid Build Coastguard Worker 
133*6236dae4SAndroid Build Coastguard Worker //
134*6236dae4SAndroid Build Coastguard Worker //  libxml start element callback function
135*6236dae4SAndroid Build Coastguard Worker //
136*6236dae4SAndroid Build Coastguard Worker 
StartElement(void * voidContext,const xmlChar * name,const xmlChar ** attributes)137*6236dae4SAndroid Build Coastguard Worker static void StartElement(void *voidContext,
138*6236dae4SAndroid Build Coastguard Worker                          const xmlChar *name,
139*6236dae4SAndroid Build Coastguard Worker                          const xmlChar **attributes)
140*6236dae4SAndroid Build Coastguard Worker {
141*6236dae4SAndroid Build Coastguard Worker   Context *context = static_cast<Context *>(voidContext);
142*6236dae4SAndroid Build Coastguard Worker 
143*6236dae4SAndroid Build Coastguard Worker   if(COMPARE(reinterpret_cast<char *>(name), "TITLE")) {
144*6236dae4SAndroid Build Coastguard Worker     context->title = "";
145*6236dae4SAndroid Build Coastguard Worker     context->addTitle = true;
146*6236dae4SAndroid Build Coastguard Worker   }
147*6236dae4SAndroid Build Coastguard Worker   (void) attributes;
148*6236dae4SAndroid Build Coastguard Worker }
149*6236dae4SAndroid Build Coastguard Worker 
150*6236dae4SAndroid Build Coastguard Worker //
151*6236dae4SAndroid Build Coastguard Worker //  libxml end element callback function
152*6236dae4SAndroid Build Coastguard Worker //
153*6236dae4SAndroid Build Coastguard Worker 
EndElement(void * voidContext,const xmlChar * name)154*6236dae4SAndroid Build Coastguard Worker static void EndElement(void *voidContext,
155*6236dae4SAndroid Build Coastguard Worker                        const xmlChar *name)
156*6236dae4SAndroid Build Coastguard Worker {
157*6236dae4SAndroid Build Coastguard Worker   Context *context = static_cast<Context *>(voidContext);
158*6236dae4SAndroid Build Coastguard Worker 
159*6236dae4SAndroid Build Coastguard Worker   if(COMPARE(reinterpret_cast<char *>(name), "TITLE"))
160*6236dae4SAndroid Build Coastguard Worker     context->addTitle = false;
161*6236dae4SAndroid Build Coastguard Worker }
162*6236dae4SAndroid Build Coastguard Worker 
163*6236dae4SAndroid Build Coastguard Worker //
164*6236dae4SAndroid Build Coastguard Worker //  Text handling helper function
165*6236dae4SAndroid Build Coastguard Worker //
166*6236dae4SAndroid Build Coastguard Worker 
handleCharacters(Context * context,const xmlChar * chars,int length)167*6236dae4SAndroid Build Coastguard Worker static void handleCharacters(Context *context,
168*6236dae4SAndroid Build Coastguard Worker                              const xmlChar *chars,
169*6236dae4SAndroid Build Coastguard Worker                              int length)
170*6236dae4SAndroid Build Coastguard Worker {
171*6236dae4SAndroid Build Coastguard Worker   if(context->addTitle)
172*6236dae4SAndroid Build Coastguard Worker     context->title.append(reinterpret_cast<char *>(chars), length);
173*6236dae4SAndroid Build Coastguard Worker }
174*6236dae4SAndroid Build Coastguard Worker 
175*6236dae4SAndroid Build Coastguard Worker //
176*6236dae4SAndroid Build Coastguard Worker //  libxml PCDATA callback function
177*6236dae4SAndroid Build Coastguard Worker //
178*6236dae4SAndroid Build Coastguard Worker 
Characters(void * voidContext,const xmlChar * chars,int length)179*6236dae4SAndroid Build Coastguard Worker static void Characters(void *voidContext,
180*6236dae4SAndroid Build Coastguard Worker                        const xmlChar *chars,
181*6236dae4SAndroid Build Coastguard Worker                        int length)
182*6236dae4SAndroid Build Coastguard Worker {
183*6236dae4SAndroid Build Coastguard Worker   Context *context = static_cast<Context *>(voidContext);
184*6236dae4SAndroid Build Coastguard Worker 
185*6236dae4SAndroid Build Coastguard Worker   handleCharacters(context, chars, length);
186*6236dae4SAndroid Build Coastguard Worker }
187*6236dae4SAndroid Build Coastguard Worker 
188*6236dae4SAndroid Build Coastguard Worker //
189*6236dae4SAndroid Build Coastguard Worker //  libxml CDATA callback function
190*6236dae4SAndroid Build Coastguard Worker //
191*6236dae4SAndroid Build Coastguard Worker 
cdata(void * voidContext,const xmlChar * chars,int length)192*6236dae4SAndroid Build Coastguard Worker static void cdata(void *voidContext,
193*6236dae4SAndroid Build Coastguard Worker                   const xmlChar *chars,
194*6236dae4SAndroid Build Coastguard Worker                   int length)
195*6236dae4SAndroid Build Coastguard Worker {
196*6236dae4SAndroid Build Coastguard Worker   Context *context = static_cast<Context *>(voidContext);
197*6236dae4SAndroid Build Coastguard Worker 
198*6236dae4SAndroid Build Coastguard Worker   handleCharacters(context, chars, length);
199*6236dae4SAndroid Build Coastguard Worker }
200*6236dae4SAndroid Build Coastguard Worker 
201*6236dae4SAndroid Build Coastguard Worker //
202*6236dae4SAndroid Build Coastguard Worker //  libxml SAX callback structure
203*6236dae4SAndroid Build Coastguard Worker //
204*6236dae4SAndroid Build Coastguard Worker 
205*6236dae4SAndroid Build Coastguard Worker static htmlSAXHandler saxHandler =
206*6236dae4SAndroid Build Coastguard Worker {
207*6236dae4SAndroid Build Coastguard Worker   NULL,
208*6236dae4SAndroid Build Coastguard Worker   NULL,
209*6236dae4SAndroid Build Coastguard Worker   NULL,
210*6236dae4SAndroid Build Coastguard Worker   NULL,
211*6236dae4SAndroid Build Coastguard Worker   NULL,
212*6236dae4SAndroid Build Coastguard Worker   NULL,
213*6236dae4SAndroid Build Coastguard Worker   NULL,
214*6236dae4SAndroid Build Coastguard Worker   NULL,
215*6236dae4SAndroid Build Coastguard Worker   NULL,
216*6236dae4SAndroid Build Coastguard Worker   NULL,
217*6236dae4SAndroid Build Coastguard Worker   NULL,
218*6236dae4SAndroid Build Coastguard Worker   NULL,
219*6236dae4SAndroid Build Coastguard Worker   NULL,
220*6236dae4SAndroid Build Coastguard Worker   NULL,
221*6236dae4SAndroid Build Coastguard Worker   StartElement,
222*6236dae4SAndroid Build Coastguard Worker   EndElement,
223*6236dae4SAndroid Build Coastguard Worker   NULL,
224*6236dae4SAndroid Build Coastguard Worker   Characters,
225*6236dae4SAndroid Build Coastguard Worker   NULL,
226*6236dae4SAndroid Build Coastguard Worker   NULL,
227*6236dae4SAndroid Build Coastguard Worker   NULL,
228*6236dae4SAndroid Build Coastguard Worker   NULL,
229*6236dae4SAndroid Build Coastguard Worker   NULL,
230*6236dae4SAndroid Build Coastguard Worker   NULL,
231*6236dae4SAndroid Build Coastguard Worker   NULL,
232*6236dae4SAndroid Build Coastguard Worker   cdata,
233*6236dae4SAndroid Build Coastguard Worker   NULL
234*6236dae4SAndroid Build Coastguard Worker };
235*6236dae4SAndroid Build Coastguard Worker 
236*6236dae4SAndroid Build Coastguard Worker //
237*6236dae4SAndroid Build Coastguard Worker //  Parse given (assumed to be) HTML text and return the title
238*6236dae4SAndroid Build Coastguard Worker //
239*6236dae4SAndroid Build Coastguard Worker 
parseHtml(const std::string & html,std::string & title)240*6236dae4SAndroid Build Coastguard Worker static void parseHtml(const std::string &html,
241*6236dae4SAndroid Build Coastguard Worker                       std::string &title)
242*6236dae4SAndroid Build Coastguard Worker {
243*6236dae4SAndroid Build Coastguard Worker   htmlParserCtxtPtr ctxt;
244*6236dae4SAndroid Build Coastguard Worker   Context context;
245*6236dae4SAndroid Build Coastguard Worker 
246*6236dae4SAndroid Build Coastguard Worker   ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
247*6236dae4SAndroid Build Coastguard Worker                                   XML_CHAR_ENCODING_NONE);
248*6236dae4SAndroid Build Coastguard Worker 
249*6236dae4SAndroid Build Coastguard Worker   htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
250*6236dae4SAndroid Build Coastguard Worker   htmlParseChunk(ctxt, "", 0, 1);
251*6236dae4SAndroid Build Coastguard Worker 
252*6236dae4SAndroid Build Coastguard Worker   htmlFreeParserCtxt(ctxt);
253*6236dae4SAndroid Build Coastguard Worker 
254*6236dae4SAndroid Build Coastguard Worker   title = context.title;
255*6236dae4SAndroid Build Coastguard Worker }
256*6236dae4SAndroid Build Coastguard Worker 
main(int argc,char * argv[])257*6236dae4SAndroid Build Coastguard Worker int main(int argc, char *argv[])
258*6236dae4SAndroid Build Coastguard Worker {
259*6236dae4SAndroid Build Coastguard Worker   CURL *conn = NULL;
260*6236dae4SAndroid Build Coastguard Worker   CURLcode code;
261*6236dae4SAndroid Build Coastguard Worker   std::string title;
262*6236dae4SAndroid Build Coastguard Worker 
263*6236dae4SAndroid Build Coastguard Worker   // Ensure one argument is given
264*6236dae4SAndroid Build Coastguard Worker 
265*6236dae4SAndroid Build Coastguard Worker   if(argc != 2) {
266*6236dae4SAndroid Build Coastguard Worker     fprintf(stderr, "Usage: %s <url>\n", argv[0]);
267*6236dae4SAndroid Build Coastguard Worker     exit(EXIT_FAILURE);
268*6236dae4SAndroid Build Coastguard Worker   }
269*6236dae4SAndroid Build Coastguard Worker 
270*6236dae4SAndroid Build Coastguard Worker   curl_global_init(CURL_GLOBAL_DEFAULT);
271*6236dae4SAndroid Build Coastguard Worker 
272*6236dae4SAndroid Build Coastguard Worker   // Initialize CURL connection
273*6236dae4SAndroid Build Coastguard Worker 
274*6236dae4SAndroid Build Coastguard Worker   if(!init(conn, argv[1])) {
275*6236dae4SAndroid Build Coastguard Worker     fprintf(stderr, "Connection initialization failed\n");
276*6236dae4SAndroid Build Coastguard Worker     exit(EXIT_FAILURE);
277*6236dae4SAndroid Build Coastguard Worker   }
278*6236dae4SAndroid Build Coastguard Worker 
279*6236dae4SAndroid Build Coastguard Worker   // Retrieve content for the URL
280*6236dae4SAndroid Build Coastguard Worker 
281*6236dae4SAndroid Build Coastguard Worker   code = curl_easy_perform(conn);
282*6236dae4SAndroid Build Coastguard Worker   curl_easy_cleanup(conn);
283*6236dae4SAndroid Build Coastguard Worker 
284*6236dae4SAndroid Build Coastguard Worker   if(code != CURLE_OK) {
285*6236dae4SAndroid Build Coastguard Worker     fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
286*6236dae4SAndroid Build Coastguard Worker     exit(EXIT_FAILURE);
287*6236dae4SAndroid Build Coastguard Worker   }
288*6236dae4SAndroid Build Coastguard Worker 
289*6236dae4SAndroid Build Coastguard Worker   // Parse the (assumed) HTML code
290*6236dae4SAndroid Build Coastguard Worker   parseHtml(buffer, title);
291*6236dae4SAndroid Build Coastguard Worker 
292*6236dae4SAndroid Build Coastguard Worker   // Display the extracted title
293*6236dae4SAndroid Build Coastguard Worker   printf("Title: %s\n", title.c_str());
294*6236dae4SAndroid Build Coastguard Worker 
295*6236dae4SAndroid Build Coastguard Worker   return EXIT_SUCCESS;
296*6236dae4SAndroid Build Coastguard Worker }
297