1*16467b97STreehugger Robot#!/usr/bin/ruby 2*16467b97STreehugger Robot# encoding: utf-8 3*16467b97STreehugger Robot 4*16467b97STreehugger Robot=begin LICENSE 5*16467b97STreehugger Robot 6*16467b97STreehugger Robot[The "BSD licence"] 7*16467b97STreehugger RobotCopyright (c) 2009-2010 Kyle Yetter 8*16467b97STreehugger RobotAll rights reserved. 9*16467b97STreehugger Robot 10*16467b97STreehugger RobotRedistribution and use in source and binary forms, with or without 11*16467b97STreehugger Robotmodification, are permitted provided that the following conditions 12*16467b97STreehugger Robotare met: 13*16467b97STreehugger Robot 14*16467b97STreehugger Robot 1. Redistributions of source code must retain the above copyright 15*16467b97STreehugger Robot notice, this list of conditions and the following disclaimer. 16*16467b97STreehugger Robot 2. Redistributions in binary form must reproduce the above copyright 17*16467b97STreehugger Robot notice, this list of conditions and the following disclaimer in the 18*16467b97STreehugger Robot documentation and/or other materials provided with the distribution. 19*16467b97STreehugger Robot 3. The name of the author may not be used to endorse or promote products 20*16467b97STreehugger Robot derived from this software without specific prior written permission. 21*16467b97STreehugger Robot 22*16467b97STreehugger RobotTHIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23*16467b97STreehugger RobotIMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24*16467b97STreehugger RobotOF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25*16467b97STreehugger RobotIN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26*16467b97STreehugger RobotINCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27*16467b97STreehugger RobotNOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28*16467b97STreehugger RobotDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29*16467b97STreehugger RobotTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30*16467b97STreehugger Robot(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31*16467b97STreehugger RobotTHIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32*16467b97STreehugger Robot 33*16467b97STreehugger Robot=end 34*16467b97STreehugger Robot 35*16467b97STreehugger Robotmodule ANTLR3 36*16467b97STreehugger Robotmodule Profile 37*16467b97STreehugger Robot=begin rdoc ANTLR3::Profile::ParserEvents 38*16467b97STreehugger Robot 39*16467b97STreehugger RobotANTLR3::Profile::ParserEvents expands basic debugging events for use by 40*16467b97STreehugger Robotrecognition code generated by ANTLR when called with the <tt>-profile</tt> 41*16467b97STreehugger Robotswitch. 42*16467b97STreehugger Robot 43*16467b97STreehugger Robot=end 44*16467b97STreehugger Robotmodule ParserEvents 45*16467b97STreehugger Robot include ANTLR3::Debug::ParserEvents 46*16467b97STreehugger Robot 47*16467b97STreehugger Robot def self.included( klass ) 48*16467b97STreehugger Robot super 49*16467b97STreehugger Robot if klass.is_a?( ::Class ) 50*16467b97STreehugger Robot def klass.profile? 51*16467b97STreehugger Robot true 52*16467b97STreehugger Robot end 53*16467b97STreehugger Robot end 54*16467b97STreehugger Robot end 55*16467b97STreehugger Robot 56*16467b97STreehugger Robot def initialize( stream, options = {} ) 57*16467b97STreehugger Robot options[ :debug_listener ] ||= Profiler.new( self ) 58*16467b97STreehugger Robot super( stream, options ) 59*16467b97STreehugger Robot end 60*16467b97STreehugger Robot 61*16467b97STreehugger Robot def already_parsed_rule?( rule ) 62*16467b97STreehugger Robot @debug_listener.examine_rule_memoization( rule ) 63*16467b97STreehugger Robot super 64*16467b97STreehugger Robot end 65*16467b97STreehugger Robot 66*16467b97STreehugger Robot def profile 67*16467b97STreehugger Robot @debug_listener.profile 68*16467b97STreehugger Robot end 69*16467b97STreehugger Robot 70*16467b97STreehugger Robot def memoize( rule, start_index, success ) 71*16467b97STreehugger Robot @debug_listener.memoize( rule, rule_start_index, sucess ) 72*16467b97STreehugger Robot super 73*16467b97STreehugger Robot end 74*16467b97STreehugger Robotend 75*16467b97STreehugger Robot 76*16467b97STreehugger Robotclass DataSet < ::Array 77*16467b97STreehugger Robot include ::Math 78*16467b97STreehugger Robot def total 79*16467b97STreehugger Robot inject( :+ ) 80*16467b97STreehugger Robot end 81*16467b97STreehugger Robot def average 82*16467b97STreehugger Robot length > 0 ? ( total.to_f / length ) : 0 83*16467b97STreehugger Robot end 84*16467b97STreehugger Robot def variance 85*16467b97STreehugger Robot length.zero? and return( 0.0 ) 86*16467b97STreehugger Robot mean = average 87*16467b97STreehugger Robot inject( 0.0 ) { |t, i| t + ( i - mean )**2 } / ( length - 1 ) 88*16467b97STreehugger Robot end 89*16467b97STreehugger Robot def standard_deviation 90*16467b97STreehugger Robot sqrt( variance ) 91*16467b97STreehugger Robot end 92*16467b97STreehugger Robotend 93*16467b97STreehugger Robot 94*16467b97STreehugger Robot 95*16467b97STreehugger Robotunless const_defined?( :Profile ) 96*16467b97STreehugger Robot Profile = Struct.new( 97*16467b97STreehugger Robot :grammar_file, :parser_class, :top_rule, 98*16467b97STreehugger Robot :rule_invocations, :guessing_rule_invocations, :rule_invocation_depth, 99*16467b97STreehugger Robot :fixed_looks, :cyclic_looks, :syntactic_predicate_looks, 100*16467b97STreehugger Robot :memoization_cache_entries, :memoization_cache_hits, 101*16467b97STreehugger Robot :memoization_cache_misses, :tokens, :hidden_tokens, 102*16467b97STreehugger Robot :characters_matched, :hidden_characters_matched, :semantic_predicates, 103*16467b97STreehugger Robot :syntactic_predicates, :reported_errors 104*16467b97STreehugger Robot ) 105*16467b97STreehugger Robotend 106*16467b97STreehugger Robot 107*16467b97STreehugger Robotclass Profile 108*16467b97STreehugger Robot def initialize 109*16467b97STreehugger Robot init_values = Array.new( self.class.members.length, 0 ) 110*16467b97STreehugger Robot super( *init_values ) 111*16467b97STreehugger Robot self.top_rule = self.parser_class = self.grammar_file = nil 112*16467b97STreehugger Robot self.fixed_looks = DataSet.new 113*16467b97STreehugger Robot self.cyclic_looks = DataSet.new 114*16467b97STreehugger Robot self.syntactic_predicate_looks = DataSet.new 115*16467b97STreehugger Robot end 116*16467b97STreehugger Robot 117*16467b97STreehugger Robot def fixed_decisions 118*16467b97STreehugger Robot fixed_looks.length 119*16467b97STreehugger Robot end 120*16467b97STreehugger Robot 121*16467b97STreehugger Robot def cyclic_decisions 122*16467b97STreehugger Robot cyclic_looks.length 123*16467b97STreehugger Robot end 124*16467b97STreehugger Robot 125*16467b97STreehugger Robot def backtracking_decisions 126*16467b97STreehugger Robot syntactic_predicate_looks.length 127*16467b97STreehugger Robot end 128*16467b97STreehugger Robot 129*16467b97STreehugger Robot def generate_report 130*16467b97STreehugger Robot report = '+' << '-' * 78 << "+\n" 131*16467b97STreehugger Robot report << '| ' << "ANTLR Rule Profile".center( 76 ) << " |\n" 132*16467b97STreehugger Robot report << '+' << '-' * 78 << "+\n" 133*16467b97STreehugger Robot report << "| Generated at #{ Time.now }".ljust( 78 ) << " |\n" 134*16467b97STreehugger Robot report << "| Profiled #{ parser_class.name }##{ top_rule }".ljust( 78 ) << " |\n" 135*16467b97STreehugger Robot report << "| Rule source generated from grammar file #{ grammar_file }".ljust( 78 ) << " |\n" 136*16467b97STreehugger Robot report << '+' << '-' * 78 << "+\n" 137*16467b97STreehugger Robot 138*16467b97STreehugger Robot report << '| ' << "Rule Invocations".center( 76 ) << " |\n" 139*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 140*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Total Invocations", rule_invocations ] 141*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "``Guessing'' Invocations", guessing_rule_invocations ] 142*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Deepest Level of Invocation", rule_invocation_depth ] 143*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 144*16467b97STreehugger Robot 145*16467b97STreehugger Robot report << '| ' << "Execution Events".center( 76 ) << " |\n" 146*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 147*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Semantic Predicates Evaluated", semantic_predicates ] 148*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Syntactic Predicates Evaluated", syntactic_predicates ] 149*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Errors Reported", reported_errors ] 150*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 151*16467b97STreehugger Robot 152*16467b97STreehugger Robot report << '| ' << "Token and Character Data".center( 76 ) << " |\n" 153*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 154*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Tokens Consumed", tokens ] 155*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Hidden Tokens Consumed", hidden_tokens ] 156*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Characters Matched", characters_matched ] 157*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Hidden Characters Matched", hidden_characters_matched ] 158*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 159*16467b97STreehugger Robot 160*16467b97STreehugger Robot report << '| ' << "Memoization".center( 76 ) << " |\n" 161*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 162*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Cache Entries", memoization_cache_entries ] 163*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Cache Hits", memoization_cache_hits ] 164*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Cache Misses", memoization_cache_misses ] 165*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 166*16467b97STreehugger Robot 167*16467b97STreehugger Robot [ 168*16467b97STreehugger Robot [ 'Fixed Lookahead (k)', fixed_looks ], 169*16467b97STreehugger Robot [ 'Arbitrary Lookahead (k)', cyclic_looks ], 170*16467b97STreehugger Robot [ 'Backtracking (Syntactic Predicate)', syntactic_predicate_looks ] 171*16467b97STreehugger Robot ].each do |name, set| 172*16467b97STreehugger Robot mean, stdev = '%4.2f' % set.average, '%4.2f' % set.standard_deviation 173*16467b97STreehugger Robot report << '| ' << "#{ name } Decisions".center( 76 ) << " |\n" 174*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 175*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Count", set.length ] 176*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Minimum k", set.min ] 177*16467b97STreehugger Robot report << "| %-66s | %7i |\n" % [ "Maximum k", set.max ] 178*16467b97STreehugger Robot report << "| %-66s | %7s |\n" % [ "Average k", mean ] 179*16467b97STreehugger Robot report << "| %-66s | %7s |\n" % [ "Standard Deviation of k", stdev ] 180*16467b97STreehugger Robot report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n" 181*16467b97STreehugger Robot end 182*16467b97STreehugger Robot return( report ) 183*16467b97STreehugger Robot end 184*16467b97STreehugger Robotend 185*16467b97STreehugger Robot 186*16467b97STreehugger Robot=begin rdoc ANTLR3::Profile::Profiler 187*16467b97STreehugger Robot 188*16467b97STreehugger RobotWhen ANTLR is run with the <tt>-profile</tt> switch, it generates recognition 189*16467b97STreehugger Robotcode that performs accounting about the decision logic performed while parsing 190*16467b97STreehugger Robotany given input. This information can be used to help refactor a slow grammar. 191*16467b97STreehugger RobotProfiler is an event-listener that performs all of the profiling accounting and 192*16467b97STreehugger Robotbuilds a simple report to present the various statistics. 193*16467b97STreehugger Robot 194*16467b97STreehugger Robot=end 195*16467b97STreehugger Robotclass Profiler 196*16467b97STreehugger Robot include Debug::EventListener 197*16467b97STreehugger Robot include Constants 198*16467b97STreehugger Robot 199*16467b97STreehugger Robot PROTOCOL_VERSION = 2 200*16467b97STreehugger Robot 201*16467b97STreehugger Robot attr_accessor :parser 202*16467b97STreehugger Robot attr_reader :rule_level 203*16467b97STreehugger Robot attr_reader :decision_level 204*16467b97STreehugger Robot 205*16467b97STreehugger Robot # tracks the maximum look value for the current decision 206*16467b97STreehugger Robot # (maxLookaheadInCurrentDecision in java Profiler) 207*16467b97STreehugger Robot attr_reader :decision_look 208*16467b97STreehugger Robot 209*16467b97STreehugger Robot # the last token consumed 210*16467b97STreehugger Robot # (lastTokenConsumed in java Profiler) 211*16467b97STreehugger Robot attr_reader :last_token 212*16467b97STreehugger Robot attr_reader :look_stack 213*16467b97STreehugger Robot attr_reader :profile 214*16467b97STreehugger Robot 215*16467b97STreehugger Robot attr_accessor :output 216*16467b97STreehugger Robot 217*16467b97STreehugger Robot def initialize( parser = nil, output = nil ) 218*16467b97STreehugger Robot @parser = parser 219*16467b97STreehugger Robot @profile = nil 220*16467b97STreehugger Robot @rule_level = 0 221*16467b97STreehugger Robot @decision_level = 0 222*16467b97STreehugger Robot @decision_look = 0 223*16467b97STreehugger Robot @last_token = nil 224*16467b97STreehugger Robot @look_stack = [] 225*16467b97STreehugger Robot @output = output 226*16467b97STreehugger Robot end 227*16467b97STreehugger Robot 228*16467b97STreehugger Robot def commence 229*16467b97STreehugger Robot @profile = Profile.new 230*16467b97STreehugger Robot @rule_level = 0 231*16467b97STreehugger Robot @decision_level = 0 232*16467b97STreehugger Robot @decision_look = 0 233*16467b97STreehugger Robot @last_token = nil 234*16467b97STreehugger Robot @look_stack = [] 235*16467b97STreehugger Robot end 236*16467b97STreehugger Robot 237*16467b97STreehugger Robot def enter_rule( grammar_file_name, rule_name ) 238*16467b97STreehugger Robot if @rule_level.zero? 239*16467b97STreehugger Robot commence 240*16467b97STreehugger Robot @profile.grammar_file = grammar_file_name 241*16467b97STreehugger Robot @profile.parser_class = @parser.class 242*16467b97STreehugger Robot @profile.top_rule = rule_name 243*16467b97STreehugger Robot end 244*16467b97STreehugger Robot @rule_level += 1 245*16467b97STreehugger Robot @profile.rule_invocations += 1 246*16467b97STreehugger Robot @profile.rule_invocation_depth < @rule_level and 247*16467b97STreehugger Robot @profile.rule_invocation_depth = @rule_level 248*16467b97STreehugger Robot end 249*16467b97STreehugger Robot 250*16467b97STreehugger Robot def exit_rule( grammar_file_name, rule_name ) 251*16467b97STreehugger Robot @rule_level -= 1 252*16467b97STreehugger Robot end 253*16467b97STreehugger Robot 254*16467b97STreehugger Robot def examine_rule_memoization( rule ) 255*16467b97STreehugger Robot stop_index = parser.rule_memoization( rule, @parser.input.index ) 256*16467b97STreehugger Robot if stop_index == MEMO_RULE_UNKNOWN 257*16467b97STreehugger Robot @profile.memoization_cache_misses += 1 258*16467b97STreehugger Robot @profile.guessing_rule_invocations += 1 259*16467b97STreehugger Robot else 260*16467b97STreehugger Robot @profile.memoization_cache_hits += 1 261*16467b97STreehugger Robot end 262*16467b97STreehugger Robot end 263*16467b97STreehugger Robot 264*16467b97STreehugger Robot def memoize( rule, start_index, success ) 265*16467b97STreehugger Robot @profile.memoization_cache_entries += 1 266*16467b97STreehugger Robot end 267*16467b97STreehugger Robot 268*16467b97STreehugger Robot 269*16467b97STreehugger Robot def enter_decision( decision_number ) 270*16467b97STreehugger Robot @decision_level += 1 271*16467b97STreehugger Robot starting_look_index = @parser.input.index 272*16467b97STreehugger Robot @look_stack << starting_look_index 273*16467b97STreehugger Robot end 274*16467b97STreehugger Robot 275*16467b97STreehugger Robot def exit_decision( decision_number ) 276*16467b97STreehugger Robot @look_stack.pop 277*16467b97STreehugger Robot @decision_level -= 1 278*16467b97STreehugger Robot if @parser.cyclic_decision? then 279*16467b97STreehugger Robot @profile.cyclic_looks << @decision_look 280*16467b97STreehugger Robot else @profile.fixed_looks << @decision_look 281*16467b97STreehugger Robot end 282*16467b97STreehugger Robot 283*16467b97STreehugger Robot @parser.cyclic_decision = false 284*16467b97STreehugger Robot @decision_look = 0 285*16467b97STreehugger Robot end 286*16467b97STreehugger Robot 287*16467b97STreehugger Robot def consume_token( token ) 288*16467b97STreehugger Robot @last_token = token 289*16467b97STreehugger Robot end 290*16467b97STreehugger Robot 291*16467b97STreehugger Robot def in_decision? 292*16467b97STreehugger Robot return( @decision_level > 0 ) 293*16467b97STreehugger Robot end 294*16467b97STreehugger Robot 295*16467b97STreehugger Robot def consume_hidden_token( token ) 296*16467b97STreehugger Robot @last_token = token 297*16467b97STreehugger Robot end 298*16467b97STreehugger Robot 299*16467b97STreehugger Robot def look( i, token ) 300*16467b97STreehugger Robot in_decision? or return 301*16467b97STreehugger Robot starting_index = look_stack.last 302*16467b97STreehugger Robot input = @parser.input 303*16467b97STreehugger Robot this_ref_index = input.index 304*16467b97STreehugger Robot num_hidden = input.tokens( starting_index, this_ref_index ).count { |t| t.hidden? } 305*16467b97STreehugger Robot depth = i + this_ref_index - starting_index - num_hidden 306*16467b97STreehugger Robot if depth > @decision_look 307*16467b97STreehugger Robot @decision_look = depth 308*16467b97STreehugger Robot end 309*16467b97STreehugger Robot end 310*16467b97STreehugger Robot 311*16467b97STreehugger Robot def end_backtrack( level, successful ) 312*16467b97STreehugger Robot @profile.syntactic_predicate_looks << @decision_look 313*16467b97STreehugger Robot end 314*16467b97STreehugger Robot 315*16467b97STreehugger Robot def recognition_exception( error ) 316*16467b97STreehugger Robot @profile.reported_errors += 1 317*16467b97STreehugger Robot end 318*16467b97STreehugger Robot 319*16467b97STreehugger Robot def semantic_predicate( result, predicate ) 320*16467b97STreehugger Robot in_decision? and @profile.semantic_predicates += 1 321*16467b97STreehugger Robot end 322*16467b97STreehugger Robot 323*16467b97STreehugger Robot def terminate 324*16467b97STreehugger Robot input = @parser.input 325*16467b97STreehugger Robot hidden_tokens = input.select { |token| token.hidden? } 326*16467b97STreehugger Robot @profile.hidden_tokens = hidden_tokens.length 327*16467b97STreehugger Robot @profile.tokens = input.tokens.length 328*16467b97STreehugger Robot @profile.hidden_characters_matched = hidden_tokens.inject( 0 ) do |count, token| 329*16467b97STreehugger Robot count + token.text.length rescue count 330*16467b97STreehugger Robot end 331*16467b97STreehugger Robot @profile.characters_matched = ( @last_token || input.tokens.last ).stop + 1 rescue 0 332*16467b97STreehugger Robot write_report 333*16467b97STreehugger Robot end 334*16467b97STreehugger Robot 335*16467b97STreehugger Robot 336*16467b97STreehugger Robot def write_report 337*16467b97STreehugger Robot @output << @profile.generate_report unless @output.nil? 338*16467b97STreehugger Robot rescue NoMethodError => error 339*16467b97STreehugger Robot if error.name.to_s == '<<' 340*16467b97STreehugger Robot warn( <<-END.strip! % [ __FILE__, __LINE__, @output ] ) 341*16467b97STreehugger Robot [%s @ %s]: failed to write report to %p as it does not respond to :<< 342*16467b97STreehugger Robot END 343*16467b97STreehugger Robot else raise 344*16467b97STreehugger Robot end 345*16467b97STreehugger Robot rescue IOError => error 346*16467b97STreehugger Robot $stderr.puts( Util.tidy( <<-END ) % [ __FILE__, __LINE__, @output, error.class, error.message ] ) 347*16467b97STreehugger Robot | [%s @ %s]: failed to write profile report to %p due to an IO Error: 348*16467b97STreehugger Robot | %s: %s 349*16467b97STreehugger Robot END 350*16467b97STreehugger Robot $stderr.puts( error.backtrace.map { |call| " - #{ call }" }.join( "\n" ) ) 351*16467b97STreehugger Robot end 352*16467b97STreehugger Robot 353*16467b97STreehugger Robot def report 354*16467b97STreehugger Robot @profile.generate_report 355*16467b97STreehugger Robot end 356*16467b97STreehugger Robot 357*16467b97STreehugger Robot alias to_s report 358*16467b97STreehugger Robotend 359*16467b97STreehugger Robotend 360*16467b97STreehugger Robotend 361