xref: /aosp_15_r20/external/antlr/runtime/Perl5/t/lexer.t (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robotuse strict;
2*16467b97STreehugger Robotuse warnings;
3*16467b97STreehugger Robot
4*16467b97STreehugger Robotuse FindBin;
5*16467b97STreehugger Robotuse lib qw( t/lib );
6*16467b97STreehugger Robot
7*16467b97STreehugger Robotuse File::Slurp;
8*16467b97STreehugger Robot
9*16467b97STreehugger Robotuse Test::More;
10*16467b97STreehugger Robotuse ANTLR::Runtime::Test;
11*16467b97STreehugger Robot
12*16467b97STreehugger Robotplan tests => 2;
13*16467b97STreehugger Robot
14*16467b97STreehugger Robotsub grammar_file {
15*16467b97STreehugger Robot    my ($file) = @_;
16*16467b97STreehugger Robot    return read_file("t/$file");
17*16467b97STreehugger Robot}
18*16467b97STreehugger Robot
19*16467b97STreehugger Robot# A simple test: try to lex one possible token.
20*16467b97STreehugger Robotg_test_output_is({ grammar => <<'GRAMMAR', test_program => <<'CODE', expected => <<'OUTPUT' });
21*16467b97STreehugger Robot/* This is a comment.  Note that we're in the ANTLR grammar here, so it's not
22*16467b97STreehugger Robot   a Perl '#' comment, and may be multi line... */
23*16467b97STreehugger Robot// ... or a single line comment
24*16467b97STreehugger Robotlexer grammar INTLexer;
25*16467b97STreehugger Robot/* Set target language to Perl5. */
26*16467b97STreehugger Robotoptions { language = Perl5; }
27*16467b97STreehugger Robot
28*16467b97STreehugger Robot/* Lexer rule for an integer. */
29*16467b97STreehugger RobotINT : '0'..'9'+;
30*16467b97STreehugger RobotGRAMMAR
31*16467b97STreehugger Robotuse strict;
32*16467b97STreehugger Robotuse warnings;
33*16467b97STreehugger Robot
34*16467b97STreehugger Robotuse ANTLR::Runtime::ANTLRStringStream;
35*16467b97STreehugger Robotuse INTLexer;
36*16467b97STreehugger Robot
37*16467b97STreehugger Robotmy $input = ANTLR::Runtime::ANTLRStringStream->new({ input => '123' });
38*16467b97STreehugger Robotmy $lexer = INTLexer->new({ input => $input });
39*16467b97STreehugger Robotwhile ((my $_ = $lexer->next_token())) {
40*16467b97STreehugger Robot    print $_->get_text(), "\n";
41*16467b97STreehugger Robot}
42*16467b97STreehugger RobotCODE
43*16467b97STreehugger Robot123
44*16467b97STreehugger RobotOUTPUT
45*16467b97STreehugger Robot
46*16467b97STreehugger Robot# Multiple choice, including 'skip' and 'hide' actions.
47*16467b97STreehugger Robotg_test_output_is({ grammar => <<'GRAMMAR', test_program => <<'CODE', expected => <<'OUTPUT' });
48*16467b97STreehugger Robotlexer grammar IDLexer;
49*16467b97STreehugger Robotoptions { language = Perl5; }
50*16467b97STreehugger Robot
51*16467b97STreehugger RobotID      : ('a'..'z'|'A'..'Z')+ ;
52*16467b97STreehugger RobotINT     : '0'..'9'+ ;
53*16467b97STreehugger RobotNEWLINE : '\r'? '\n'  { $self->skip() } ;
54*16467b97STreehugger RobotWS      : (' '|'\t')+ { $channel = HIDDEN } ;
55*16467b97STreehugger RobotGRAMMAR
56*16467b97STreehugger Robotuse strict;
57*16467b97STreehugger Robotuse warnings;
58*16467b97STreehugger Robot
59*16467b97STreehugger Robotuse ANTLR::Runtime::ANTLRStringStream;
60*16467b97STreehugger Robotuse IDLexer;
61*16467b97STreehugger Robot
62*16467b97STreehugger Robotmy $input = ANTLR::Runtime::ANTLRStringStream->new({ input => "Hello World!\n42\n" });
63*16467b97STreehugger Robotmy $lexer = IDLexer->new({ input => $input });
64*16467b97STreehugger Robot
65*16467b97STreehugger Robotwhile (1) {
66*16467b97STreehugger Robot    my $token = $lexer->next_token();
67*16467b97STreehugger Robot    last if $token->get_type() == IDLexer->EOF;
68*16467b97STreehugger Robot
69*16467b97STreehugger Robot    print "text: '", $token->get_text(), "'\n";
70*16467b97STreehugger Robot    print "type: ",  $token->get_type(), "\n";
71*16467b97STreehugger Robot    print "pos: ",   $token->get_line(), ':', $token->get_char_position_in_line(), "\n";
72*16467b97STreehugger Robot    print "channel: ",     $token->get_channel(), "\n";
73*16467b97STreehugger Robot    print "token index: ", $token->get_token_index(), "\n";
74*16467b97STreehugger Robot    print "\n";
75*16467b97STreehugger Robot}
76*16467b97STreehugger RobotCODE
77*16467b97STreehugger Robottext: 'Hello'
78*16467b97STreehugger Robottype: 4
79*16467b97STreehugger Robotpos: 1:0
80*16467b97STreehugger Robotchannel: 0
81*16467b97STreehugger Robottoken index: -1
82*16467b97STreehugger Robot
83*16467b97STreehugger Robottext: ' '
84*16467b97STreehugger Robottype: 7
85*16467b97STreehugger Robotpos: 1:5
86*16467b97STreehugger Robotchannel: 99
87*16467b97STreehugger Robottoken index: -1
88*16467b97STreehugger Robot
89*16467b97STreehugger Robottext: 'World'
90*16467b97STreehugger Robottype: 4
91*16467b97STreehugger Robotpos: 1:6
92*16467b97STreehugger Robotchannel: 0
93*16467b97STreehugger Robottoken index: -1
94*16467b97STreehugger Robot
95*16467b97STreehugger Robottext: '42'
96*16467b97STreehugger Robottype: 5
97*16467b97STreehugger Robotpos: 2:0
98*16467b97STreehugger Robotchannel: 0
99*16467b97STreehugger Robottoken index: -1
100*16467b97STreehugger Robot
101*16467b97STreehugger RobotOUTPUT
102*16467b97STreehugger Robot
103*16467b97STreehugger Robot=begin SKIP doesn't compile yet
104*16467b97STreehugger Robot
105*16467b97STreehugger Robotg_test_output_is({ grammar => scalar grammar_file('XMLLexer.g'), test_program => <<'CODE', expected => <<'OUTPUT' });
106*16467b97STreehugger Robotuse English qw( -no_match_vars );
107*16467b97STreehugger Robotuse ANTLR::Runtime::ANTLRStringStream;
108*16467b97STreehugger Robotuse XMLLexer;
109*16467b97STreehugger Robot
110*16467b97STreehugger Robotuse strict;
111*16467b97STreehugger Robotuse warnings;
112*16467b97STreehugger Robot
113*16467b97STreehugger Robotmy $input = ANTLR::Runtime::ANTLRStringStream->new(<< 'XML');
114*16467b97STreehugger Robot<?xml version='1.0'?>
115*16467b97STreehugger Robot<test>foo</test>
116*16467b97STreehugger RobotXML
117*16467b97STreehugger Robotmy $lexer = IDLexer->new($input);
118*16467b97STreehugger Robotwhile ((my $_ = $lexer->next_token())) {
119*16467b97STreehugger Robot}
120*16467b97STreehugger RobotCODE
121*16467b97STreehugger RobotXML declaration
122*16467b97STreehugger RobotPCDATA: "foo"
123*16467b97STreehugger RobotOUTPUT
124*16467b97STreehugger Robot}
125*16467b97STreehugger Robot
126*16467b97STreehugger Robot=end SKIP
127