1 /*
2  *
3  * Copyright (c) 2003
4  * John Maddock
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12  /*
13   *   LOCATION:    see http://www.boost.org for most recent version.
14   *   FILE         regex_token_iterator_example_2.cpp
15   *   VERSION      see <boost/version.hpp>
16   *   DESCRIPTION: regex_token_iterator example: spit out linked URL's.
17   */
18 
19 
20 #include <boost/regex.hpp>
21 #include <fstream>
22 #include <iostream>
23 #include <iterator>
24 
25 boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
26                boost::regex::normal | boost::regbase::icase);
27 
load_file(std::string & s,std::istream & is)28 void load_file(std::string& s, std::istream& is)
29 {
30    s.erase();
31    if(is.bad()) return;
32    //
33    // attempt to grow string buffer to match file size,
34    // this doesn't always work...
35    s.reserve(static_cast<std::string::size_type>(is.rdbuf()->in_avail()));
36    char c;
37    while(is.get(c))
38    {
39       // use logarithmic growth stategy, in case
40       // in_avail (above) returned zero:
41       if(s.capacity() == s.size())
42          s.reserve(s.capacity() * 3);
43       s.append(1, c);
44    }
45 }
46 
main(int argc,char ** argv)47 int main(int argc, char** argv)
48 {
49    std::string s;
50    int i;
51    for(i = 1; i < argc; ++i)
52    {
53       std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
54       s.erase();
55       std::ifstream is(argv[i]);
56       load_file(s, is);
57       is.close();
58       boost::sregex_token_iterator i(s.begin(), s.end(), e, 1);
59       boost::sregex_token_iterator j;
60       while(i != j)
61       {
62          std::cout << *i++ << std::endl;
63       }
64    }
65    //
66    // alternative method:
67    // test the array-literal constructor, and split out the whole
68    // match as well as $1....
69    //
70    for(i = 1; i < argc; ++i)
71    {
72       std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
73       s.erase();
74       std::ifstream is(argv[i]);
75       load_file(s, is);
76       is.close();
77       const int subs[] = {1, 0,};
78       boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
79       boost::sregex_token_iterator j;
80       while(i != j)
81       {
82          std::cout << *i++ << std::endl;
83       }
84    }
85 
86    return 0;
87 }
88 
89 
90