#!/usr/bin/perl use strict; use warnings; use Getopt::Long; my $options = {}; my ($report_title, $output_dir, $output_file, $output_config_file, $config_file, $help); GetOptions( 'output_dir=s' => \$output_dir, 'output_file=s' => \$output_file, 'report_title=s' => \$report_title, 'config_file=s' => \$config_file, 'make_new_config=s' => \$output_config_file, 'help' => \$help, 'usage' => \$help, ); if ($help) { usage(); exit; } if (defined $output_config_file) { if (-e $output_config_file) { die("\n$0: The file $output_config_file already exists. Cowardly refusing to overwrite existing file.\n\n"); } NihongoOrg::WebStats::BrowserCounter->generate_new_configuration_file($output_config_file); print "New configuration file $output_config_file saved.\n"; exit; } $options->{'reportTitle'} = $report_title if (defined $report_title); $options->{'outputDir'} = $output_dir if (defined $output_dir); $options->{'outputFile'} = $output_file if (defined $output_file); $options->{'configFile'} = $config_file if (defined $config_file); $options->{'accessLogs'} = [@ARGV] if (0 < @ARGV); my $bc = NihongoOrg::WebStats::BrowserCounter->new( $options ); $bc->process_stats; exit; ###################################################################################################### sub usage { print <<"EOT"; BrowserCounter - A program for generating web browser usage statistics (v. 1.3.0) A fast, portable, and highly configurable web browser log analysis program for analyzing standard and non-standard format web log files. ---------------------------------------------------------------------------- Required: Perl 5.6 or later Optional: GD and GD::Graph (if pie charts are wanted) Time::HiRes (improves precision of performance metrics) ---------------------------------------------------------------------------- This program is licensed under the same conditions and terms as Perl itself. This means that you can, at your option, redistribute is and/or modify it under either the terms the GNU Public License (GPL) version 1 or later, or under the Perl Artistic License. See http://dev.perl.org/licenses/ ---------------------------------------------------------------------------- By default it generates a report on the platorms (Mac, Windows, Linux, etc) used by web browsers, a summary of the 'brand' (MSIE, Firefox, Mozilla, Safari, etc), and summaries of the brands broken down by major and minor version. The default configuration is at the end of the script and is documented within it. It performs extensive filtering of processed log entries to generate as nearly 'pure' results as possible. This means in practice that 90% of entries are excluded because of statistical biases caused by differential browser behaviors such as loading/not loading images, loading/not loading stylesheet, loading/not loading of javascript and ActiveX, as well as more subtle biases such as image loading sequence. By default, it also exlcudes identifiable web crawling robots from the statistics. These behaviors can be changed, if desired, by using a custom configuration file. The program will generate pie chart graphs of the analyzed statistics if the 'GD' and 'GD::Graph' modules are installed (available from CPAN http://cpan.org/ and from various PPM repositories). If not available, the pie charts will be omitted from generated reports. You can generate an initial configuration configuration file using the '--new_config_file' option: $0 --new_config_file=example_configuration.conf The configuration file contains documentation for its options along with a template that can be used to reformat the output report pretty much in any way you want. The command line options for the program are as follows: --usage or --help Prints this usage message and exits. --new_config_file=example.conf Saves a new initial configuration file to the specified file. --report_title="Browser Report" This allows setting the report title. The default is 'Browser Report'. --output_dir=/var/www/html/statistics/browser The path to the directory you want to save the output report to. The default configuration is /var/www/html/statistics/browser If the output directory does not exist the program will die with an error. The --output_dir commandline option overrides any configuration file 'output_dir' specification. --output_file=index.html Specifies the name of the output report file. By default, index.html The --output_file commandline option overrides any configuration file 'output_file' specification. --config_file=/path/to/configuration_file The path to a configuration file for controlling the report. The default is to an internally specified configuration. Example: $0 --config_file=/var/www/configs/browsercounter.conf Any files listed on the command line after all options have been processed are assumed to be log files for processing and override the logfiles specified within the configuration. The program can handle gzip (.gz), compress (.z), and bzip2 (.bz2) compressed logfiles as long as 'gzip' and 'bzip2' are in the PATH. You can add additional compression program support via the configuration file. Multiple logfiles are supported both on the commandline or via the configuration file. Simple usage example: $0 --report_title="My Report" --output_dir=/var/www/html/statistics --output_file=browsers.html /var/log/httpd/access_log Note: _BROWSERCOUNTER IS NOT A CGI SCRIPT_. It is intended to be run periodically (say once a day, week or even month) via a system job such as 'cron' and generate static HTML pages for viewing. You could probably modify it to work as a CGI with a modest amount of effort, but it is not a good idea: Log analysis is a resource intensive exercise and for logs of more than a few megabytes takes a noticable amount of time (a half-million line log takes roughly 30 seconds to process on a 3Ghz Pentium system). EOT } ###################################################################################################### ###################################################################################################### ###################################################################################################### package NihongoOrg::WebStats::BrowserCounter; ## =head1 NAME ## ## NihongoOrg::WebStats::BrowserCounter - A package for processing web server logs and generating a user agents ("browsers") report ## ## =head1 SYNOPSIS ## ## my $access_logs = [@ARGV]; ## my $bc = NihongoOrg::WebStats::BrowserCounter->new( accessLogs => $access_logs ); ## $bc->process_stats; ## ## =head1 DESCRIPTION ## ## =head1 CHANGES ## ## 1.3.0 20 Aug 2005 - Initial release ## ## =cut ## ####################################################################### use strict; use warnings; use Carp qw (croak confess); use File::Spec; use IO::File; use vars qw ($VERSION); ####################################################################### BEGIN { $VERSION = '1.3.0'; } ####################################################################### ## ## =head1 METHODS ## ## =cut ## ####################################################################### # Object properties generic get/set accessor sub _property { my $self = shift; my $property = shift; my $package = __PACKAGE__; if (0 == @_) { my $output = $self->{$package}->{$property}; return $output; } elsif (1 == @_) { my $input = shift; $self->{$package}->{$property} = $input; return; } else { die ("{$package}::_property() - bad calling parameters\n"); } } ####################################################################### ## ## =over 4 ## ## =item new([accessLogs => [@list_of_log_files,] [configFile => $configurationFile ]); ## ## Creates and optionally initializes the log processor. ## ## It takes the following optional initialization parameters: ## ## =over 4 ## ## =item accessLogs ## ## An anonymous list of log files for processing. ## ## =item configFile ## ## The path to the file to be used for setting the configuration of the processor ## ## =back ## ## =back ## ## =cut ## sub new { my $proto = shift; my $package = __PACKAGE__; my $class = ref($proto) || $proto || $package; my $self = bless {}, $class; my $options = {}; if (1 < @_) { %$options = @_; } else { ($options) = @_; } if (defined $options->{'configFile'}) { $self->config_file($options->{'configFile'}); delete $options->{'configFile'}; } $self->probable_robots({}); $self->unrecognized_robots({}); $self->unrecognized_robots_count(0); $self->excluded_lines_count(0); $self->log_decompressors({}); $self->log_parser_patterns({}); $self->log_parser_maps({}); $self->exclude_robots('yes'); $self->minimum_browser_report_percentage(1); # Default 1% $self->raw_user_agents({}); $self->access_logs([]); $self->class_map({}); $self->platform_map({}); $self->class_colors_map({}); $self->pie_chart_size(300); $self->load_configuration({ configFile => $self->config_file }); if (defined $options->{'accessLogs'}) { my $access_logs = $options->{'accessLogs'}; my $parm_type = ref($access_logs); if ($parm_type eq '') { $self->access_logs([$access_logs]); } elsif ($parm_type eq 'ARRAY') { if (0 < @$access_logs) { $self->access_logs($access_logs); } } else { croak("accessLog parameter had unsupported reference type of $parm_type"); } delete $options->{'accessLogs'}; } if (defined $options->{'reportTitle'}) { $self->report_title($options->{'reportTitle'}); delete $options->{'reportTitle'}; } if (defined $options->{'outputDir'}) { $self->output_dir($options->{'outputDir'}); delete $options->{'outputDir'}; } if (defined $options->{'outputFile'}) { $self->output_file($options->{'outputFile'}); delete $options->{'outputFile'}; } my @remaining_parms = sort keys %$options; if (0 < @remaining_parms) { require Data::Dumper; confess("Unexpected parameters in 'new' parameter list: " . Data::Dumper::Dumper ($options)); } return $self; } ####################################################################### ## ## =over 4 ## ## =item access_logs([\@list_of_log_files]); ## ## Get/Set accessor for the list of logfiles for processing. ## ## Expects/Returns an anonymous list: ## ## Ex. ## ## $bc->access_logs(['/var/log/httpd/access_log']); ## ## my $access_logs = $bc->access_logs; ## print "Log files: " . join(', ', @$access_logs) . "\n"; ## ## =back ## ## =cut ## sub access_logs { shift->_property('access_logs', @_); } ####################################################################### ## ## =over 4 ## ## =item config_file([$configuration_file_path]); ## ## Get/Set accessor for the path to the configuration file. If the ## configuration file is set to undef or the empty string the ## default configuration is used. ## ## =back ## ## =cut ## sub config_file { shift->_property('config_file', @_); } ####################################################################### ## ## =over 4 ## ## =item log_parse_map([$log_fields_map_hash]); ## ## Get/Set accessor for the map of field names to positions in the log parsing ## regular expression. ## ## Ex. ## ## $bc->log_parse_map({ ## host => 0, ## remote_addr => 1, ## ident => 2, ## user => 3, ## day => 4, ## month => 5, ## year => 6, ## hour => 7, ## minute => 8, ## second => 9, ## timezone => 10, ## method => 11, ## uri => 12, ## protocal => 13, ## status => 14, ## bytes => 15, ## referrer => 16, ## useragent => 17, ## }); ## ## my $parse_fields_map = $bc->log_parse_map; ## ## =back ## ## =cut ## sub log_parse_map { shift->_property('log_parse_map', @_); } ####################################################################### ## ## =over 4 ## ## =item log_parse_regex([$parsing_regular_expression]); ## ## Get/Set accessor for the regular expression used to parse the log files. ## ## Ex. ## ## $bc->log_parse_regex(qr/^(\S+) (\S+) (\S+) (\S+) \[([^\]\[]+)\] \"(\S+)\s+(\S+)\s+(\S+)\" (\S+) (\S+) \"?([^"]*)\"? \"(.*)\"/); ## ## my $parser_regex = $bc->log_parse_regex; ## ## =back ## ## =cut ## sub log_parse_regex { shift->_property('log_parse_regex', @_); } ####################################################################### sub _user_agent_field { return shift->log_parse_map->{'useragent'}; } sub _referrer_field { return shift->log_parse_map->{'referrer'}; } sub _remote_addr_field { return shift->log_parse_map->{'remote_addr'}; } sub _uri_field { return shift->log_parse_map->{'uri'}; } sub _month_field { return shift->log_parse_map->{'month'}; } sub _year_field { return shift->log_parse_map->{'year'}; } sub pie_chart_size { shift->_property('pie_chart_size', @_); } sub class_colors_map { shift->_property('class_colors_map', @_); } sub platform_map { shift->_property('platform_map', @_); } sub class_map { shift->_property('class_map', @_); } sub report_template { shift->_property('report_template', @_); } sub report_title { shift->_property('report_title', @_); } sub show_detailed_browser_report { shift->_property('show_detailed_browser_report', @_); } sub output_dir { shift->_property('output_dir', @_); } sub output_file { shift->_property('output_file', @_); } sub raw_user_agents { shift->_property('raw_user_agents', @_); } sub monthly_raw_user_agents { shift->_property('monthly_raw_user_agents', @_); } sub probable_robots { shift->_property('probable_robots', @_); } sub unrecognized_robots_count { shift->_property('unrecognized_robots_count', @_); } sub unrecognized_robots { shift->_property('unrecognized_robots', @_); } sub unparsable_lines_count { shift->_property('unparsable_lines_count', @_); } sub excluded_lines_count { shift->_property('excluded_lines_count', @_); } sub processed_lines_count { shift->_property('processed_lines_count', @_); } sub refs_count { shift->_property('refs_count', @_); } sub exclude_robots { shift->_property('exclude_robots', @_); } sub exclude_remote_addrs { shift->_property('exclude_remote_addrs', @_); } sub include_remote_addrs { shift->_property('include_remote_addrs', @_); } sub include_only_refs_to_uri_regex { shift->_property('include_only_refs_to_uri_regex', @_); } sub exclude_all_refs_to_uri_regex { shift->_property('exclude_all_refs_to_uri_regex', @_); } sub log_parser_patterns { shift->_property('log_parser_patterns', @_); } sub log_parser_maps { shift->_property('log_parser_maps', @_); } sub log_decompressors { shift->_property('log_decompressors', @_); } sub log_format { shift->_property('log_format', @_); } sub log_size { shift->_property('log_size', @_); } sub robots_useragent_regex { shift->_property('robots_useragent_regex', @_); } sub robots_useragent_false_positives { shift->_property('robots_useragent_false_positives', @_); } sub minimum_browser_report_percentage { shift->_property('minimum_browser_report_percentage', @_); } ####################################################################### # _gd_graph_available; # # Returns availability of GD::Graph (true if available, false if not) # sub _gd_graph_available { my $self = shift; my $available = $self->_property('gd_graph_available'); unless (defined $available) { eval { require GD::Graph; require GD::Graph::pie; require GD::Graph::colour; require GD::Graph::Data; }; if ($@) { $available = 0; } else { $available = 1; } $self->_property('gd_graph_available', $available); } return $available; } ####################################################################### # _hi_res_time_available; # # Returns availability of Time::HiRes (true if available, false if not) # sub _hi_res_time_available { my $self = shift; my $hi_res_time = $self->_property('hi_res_time_available'); unless (defined $hi_res_time) { eval { require Time::HiRes; }; if ($@) { $hi_res_time = 0; } else { $hi_res_time = 1; } $self->_property('hi_res_time_available', $hi_res_time); } return $hi_res_time; } ####################################################################### # _start_time([$time]); # # Stores the start time for the elapsed time timer # sub _start_time { shift->_property('start_time', @_); } ####################################################################### # _reset_timer; # # Resets the elapsed time timer to the current time. # sub _reset_timer { my $self = shift; if ($self->_hi_res_time_available) { my $current_time = [Time::HiRes::gettimeofday()]; $self->_start_time($current_time); } else { my $current_time = time; $self->_start_time($current_time); } return; } ####################################################################### # _elapsed_time; # # Returns the elapsed wallclock time since the last '_reset_timer' call. # This time is either the integer number of seconds (if Time::HiRes # is not available) or the time accurate to the nearest millisecond # (if Time::HiRes _is_ available). # sub _elapsed_time { my $self = shift; my $start_time = $self->_start_time; unless (defined $start_time) { my $package = __PACKAGE__; confess("_elapsed_time called without first calling _reset_time. Bad Programmer: no biscuit."); } if ($self->_hi_res_time_available) { my $elapsed_time = sprintf('%0.3f',Time::HiRes::tv_interval($start_time)); return $elapsed_time; } else { my $elapsed_time = time - $start_time; return $elapsed_time; } } ####################################################################### ## ## =over 4 ## ## =item process_stats; ## ## Executes the complete stats analysis including parsing logs files and outputting all requested reports. ## ## =back ## ## =cut ## sub process_stats { my $self = shift; my $package = __PACKAGE__; $self->_reset_timer; $self->parse_logs; $self->output_reports; } ####################################################################### ## ## =over 4 ## ## =item parse_logs; ## ## Parses the specified log files according the the current configuration settings. ## ## =back ## ## =cut sub parse_logs { my $self = shift; my $access_logs = $self->access_logs; unless (defined $access_logs) { my $package = __PACKAGE__; my $subname = $package . '::' . (caller(0))[3]; croak("$subname - no logs specified for processing"); } my %probable_robots = %{$self->probable_robots}; my %unrecognized_bot = %{$self->unrecognized_robots}; my $unrecognized_bots_counter = $self->unrecognized_robots_count; my $parse_re = $self->log_parse_regex; unless (defined $parse_re) { croak("No regular expression for parsing log was set"); } my $parse_map = $self->log_parse_map; unless (defined $parse_map) { croak("No map for regular expression for parsing log was set"); } my $month_field = $self->_month_field; unless (defined $month_field) { croak("No 'month' field map for regular expression for parsing log was set"); } my $year_field = $self->_year_field; unless (defined $year_field) { croak("No 'year' field map for regular expression for parsing log was set"); } my $agent_field = $self->_user_agent_field; unless (defined $agent_field) { croak("No 'useragent' field map for regular expression for parsing log was set"); } my $ref_field = $self->_referrer_field; unless (defined $ref_field) { croak("No 'referrer' field map for regular expression for parsing log was set"); } my $remote_addr_field = $self->_remote_addr_field; unless (defined $remote_addr_field) { croak("No 'remote_addr' field map for regular expression for parsing log was set"); } my $uri_field = $self->_uri_field; unless (defined $uri_field) { croak("No 'uri' field map for regular expression for parsing log was set"); } my $processed_lines = 0; my $unparsable_lines = 0; my %not_bots = (); my %month_translations = qw( Jan 1 Feb 2 Mar 3 Apr 4 May 5 Jun 6 Jul 7 Aug 8 Sep 9 Oct 10 Nov 11 Dec 12 jan 1 feb 2 mar 3 apr 4 may 5 jun 6 jul 7 aug 8 sep 9 oct 10 nov 11 dec 12 January 1 February 2 March 3 April 4 May 5 June 6 July 7 August 8 September 9 October 10 November 11 December 12 01 1 02 2 03 3 04 4 05 5 06 6 07 7 08 8 09 9 10 10 11 11 12 12 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 ); my %log_escapes = ( "\\\\" => '%5c', "\\\"" => '%22', ); my %raw_user_agents = %{$self->raw_user_agents}; my $exclude_remote_addrs = $self->exclude_remote_addrs; my $include_remote_addrs = $self->include_remote_addrs; my $include_only_refs_to_uri_regex = $self->include_only_refs_to_uri_regex; my $exclude_all_refs_to_uri_regex = $self->exclude_all_refs_to_uri_regex; my $robots_useragent_regex = $self->robots_useragent_regex; my $robots_useragent_false_positives = $self->robots_useragent_false_positives; my $exclude_robots = ($self->exclude_robots eq 'yes') ? 1 : 0; my $decompressors = $self->log_decompressors; my %monthly_user_agent_breakdown = (); ###### # This is where the virtually all of the time needed for log processing is spent. # It is _extremely_ time critical so be very careful when modifying anything in the following # block - it has been heavily optimized for speed. my $log_size = 0; { local *AGENT_LOG_FH; # We do this vice 'gensym' or IO::File because it is _much_ faster according to testing foreach my $access_log (@$access_logs) { my ($logfile_suffix) = $access_log =~ m/\.([-_A-Za-z0-9]+)$/; $logfile_suffix = lc ($logfile_suffix); my $access_log_opener = $access_log; if (defined $decompressors->{$logfile_suffix}) { $access_log_opener = $decompressors->{$logfile_suffix} . " $access_log |"; } if (! open(AGENT_LOG_FH,$access_log_opener) ) { warn "Can't open ${access_log}. Skipped.\n $!"; next; } binmode AGENT_LOG_FH; while () { $log_size += length($_); $processed_lines++; s#(\\[\\"])#$log_escapes{$1}#gs; my ($remote_addr, $year, $month, $filename, $user_agent, $referrer) = (m/$parse_re/)[$remote_addr_field, $year_field, $month_field, $uri_field, $agent_field, $ref_field]; unless (defined $user_agent) { $unparsable_lines++; next; } next if ( ($include_only_refs_to_uri_regex && ($filename !~ m/$include_only_refs_to_uri_regex/)) or ($exclude_all_refs_to_uri_regex && ($filename =~ m/$exclude_all_refs_to_uri_regex/)) or ($exclude_remote_addrs && ($remote_addr =~ m/$exclude_remote_addrs/)) or ($include_remote_addrs && ($remote_addr !~ m/$include_remote_addrs/)) ); $user_agent =~ s#\s+# #gs; # Fixes proxy info bug. Fix suggested by # James Walter Martin III $month = $month_translations{$month}; if ($filename eq '/robots.txt') { if (not($probable_robots{$user_agent})) { if ($user_agent !~ m/$robots_useragent_regex/) { if ($user_agent !~ m/$robots_useragent_false_positives/) { $unrecognized_bots_counter++; $probable_robots{$user_agent}++; } } else { $probable_robots{$user_agent}++; } } } $monthly_user_agent_breakdown{$year}->{$month}->{$user_agent}++; $raw_user_agents{$user_agent}++; } } } $self->log_size($log_size); $self->monthly_raw_user_agents(\%monthly_user_agent_breakdown); $self->raw_user_agents(\%raw_user_agents); $self->unrecognized_robots_count($unrecognized_bots_counter); $self->processed_lines_count($processed_lines); $self->probable_robots(\%probable_robots); $self->unparsable_lines_count($unparsable_lines); return; } ####################################################################### sub output_reports { my $self = shift; my $processed_lines = $self->processed_lines_count; my $excluded_lines = $self->excluded_lines_count; my $refscounter = $self->refs_count; my $unparsable_lines = $self->unparsable_lines_count; my $probable_robots = $self->probable_robots; my $raw_user_agents = $self->raw_user_agents; my $report_title = $self->report_title; my $exclude_robots = ($self->exclude_robots eq 'yes') ? 1 : 0; my $robots_useragent_regex = $self->robots_useragent_regex; my $robots_useragent_false_positives = $self->robots_useragent_false_positives; my $output_dir = $self->output_dir; unless (defined $output_dir) { croak("No 'output_dir' defined"); } unless (-e $output_dir) { mkdir $output_dir; } if (not -e $output_dir) { croak("output_dir $output_dir does not exist and could not be created: $!"); } unless(-d _) { croak("output_dir $output_dir is not a directory"); } unless (-w _) { croak("output_dir $output_dir cannot be written to (permissions error)"); } my $output_file = $self->output_file; unless (defined $output_file) { croak("No 'output_file' defined"); } my $index_file = File::Spec->catfile($output_dir,$output_file); my $minimum_browser_report_percentage = $self->minimum_browser_report_percentage; my %Ruser_agents = (); my $robot_hits = 0; while (my ($user_agent, $agent_count) = each %$raw_user_agents) { if ($probable_robots->{$user_agent}) { $probable_robots->{$user_agent} = $agent_count; $robot_hits += $agent_count; } elsif (($user_agent =~ m/$robots_useragent_regex/) and ($user_agent !~ m/$robots_useragent_false_positives/o)) { $robot_hits += $agent_count; $probable_robots->{$user_agent} = $agent_count; } $refscounter += $agent_count; $Ruser_agents{$user_agent} = $agent_count; } my $bot_hits = 0; foreach my $agent (keys %$probable_robots) { $bot_hits += $Ruser_agents{$agent}; } my $platform_map = $self->platform_map; my @platforms_list = map { quotemeta($_) } sort { length($b) <=> length($a) } keys %$platform_map; my $platform_string = '(' . join('|',@platforms_list) . ')'; my $platform_regex = qr/$platform_string/; my %rawagents; my $non_bot_hits = 0; my %html_escape = ( '<' => '<', '>' => '>', '&' => '&', '"' => '"', ); while(my ($user_agent,$Count) = each(%Ruser_agents)) { if (($user_agent eq "-") || ($user_agent eq '')) { $user_agent = "Unknown"; } else { my $robot_id = ''; if ($probable_robots->{$user_agent}) { next if $exclude_robots; $robot_id = ' Possible Robot'; } else { $non_bot_hits += $Count; } # Undo any URL encoding of user agent $user_agent =~ tr/+/ /; $user_agent =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; # Despoof various people if ($user_agent =~ m#^\S+\s+(WebTV/\S+)#o) { $user_agent = "$1 spoofing as $user_agent"; } elsif ($user_agent =~ m#(MSIECrawler|VoilaBot|Girafabot|Netnose-Crawler)#) { $user_agent = "$1 spoofing as $user_agent"; } elsif ($user_agent =~ m#AppleWebKit.* (Safari/\d+)#) { $user_agent = "$1 spoofing as $user_agent"; } elsif ($user_agent =~ m# (Opera) ([0-9]+\.[0-9]+) #) { $user_agent = "$1/$2 spoofing as $user_agent"; } elsif ($user_agent =~ m# (Firefox)/([0-9.]+)$#) { my ($browser, $version) = ($1,$2); if (($version eq '1') or ($version eq '1.0')) { $version = '1.0.0' } $user_agent = "$browser/$version spoofing as $user_agent"; } elsif ($user_agent =~ m# (Konqueror)[-/](\d+)#) { $user_agent = "$1/$2 spoofing as $user_agent"; } elsif ($user_agent =~ m# (Cerberian Drtrs) Version[-/](\d+)#) { $user_agent = "$1/$2 spoofing as $user_agent"; } elsif ($user_agent =~ m#Microsoft Data Access Internet Publishing Provider Protocol Discovery#) { $user_agent = "MSFrontPage/0"; } elsif ($user_agent =~ m#Microsoft Office Protocol Discovery#) { $user_agent = "MSOffice/0"; } else { # despoofs people using pseudo-'standard' of 'compatible' if ($user_agent =~ m#^Mozilla.*\(compatible; *([^;)]+)#oi) { my $spoofer = $1; $spoofer =~ s#/#-#og; $spoofer =~ s/\W+$//o; $user_agent="$spoofer spoofing as $user_agent"; } } $user_agent .= $robot_id; # Lets not let children play with dangerous toys... $user_agent =~ s#([<>&"])#$html_escape{$1}#gs; } $rawagents{$user_agent}+=$Count; } my (%agentgroup, %agentversion, %baseagent); my $platform_matches = {}; foreach my $agent (keys (%rawagents)) { my $robot_id = ''; if ($agent =~ m/ Possible Robot$/) { next if $exclude_robots; $robot_id = ' (Possible Robot)'; } my $longagent = $agent; $longagent =~ s/ Possible Robot$//; my ($base) = $longagent =~ m#^([^\(\[]+)#o; $base =~ s#\s+$##o; $base =~ s#via proxy.*$##ogi; my ($name,$version) = $base =~ m#^([^\d\/]+)[\s\/vV]+(\d[\.\d]+)#o; ($name) = $base =~ m#^([^\d\/]+)#o if (! $name); $name = 'Failed to parse' unless defined ($name); $version = '0' unless (defined $version); $agentgroup{"$name$robot_id"} += $rawagents{$agent}; $agentversion{"$name $version$robot_id"} += $rawagents{$agent}; $baseagent{"$base$robot_id"} += $rawagents{$agent}; my ($platform_id) = $agent =~ m/$platform_regex/; if (($robot_id eq '') and (defined $platform_id)) { my $platform = $platform_map->{$platform_id}; if (defined $platform) { $platform_matches->{$platform} += $rawagents{$agent}; } else { $platform_matches->{'Unknown'} += $rawagents{$agent}; } } else { $platform_matches->{'Unknown'} += $rawagents{$agent}; } } my $major_version; foreach my $key (keys(%agentversion)) { my $mversion = $key; $mversion =~ s/(\d+)\.\d+([^\.].*$|$)/$1/og; $major_version->{$mversion} += $agentversion{$key}; } $refscounter -= $bot_hits if $exclude_robots; $excluded_lines += $processed_lines - $refscounter; my $report_template = $self->report_template; my $min_matches_percent_formatted = sprintf("%0.2f",$minimum_browser_report_percentage); my $date= localtime(time); $refscounter = $non_bot_hits if ($self->exclude_robots eq 'yes'); my $class_map = $self->class_map; my $class_colors_map = $self->class_colors_map; #################################################################################################3 my $pie_chart_size = $self->pie_chart_size; ################################################################################################# ########################## # Platform Summary $report_template = $self->ranked_hits_report({ 'hitsData' => $platform_matches, 'minimumPercent' => 0, 'classMap' => $class_map, 'tag' => 'browser_platform_summary', 'template' => $report_template, }); $report_template = $self->pie_chart({ 'file' => 'browser_platform_pie_chart', 'size' => $pie_chart_size, 'classMap' => $class_map, 'classColorsMap' => $class_colors_map, 'title' => 'Platform', 'hitsData' => $platform_matches, 'minimumPercent' => 1, 'template' => $report_template, 'tag' => 'browser_platform_pie_chart', }); ########################## # Brand Summary $report_template = $self->ranked_hits_report({ 'hitsData' => \%agentgroup, # 'totalHits' => $refscounter, 'minimumPercent' => $minimum_browser_report_percentage, 'classMap' => $class_map, 'tag' => 'browser_brand_summary', 'template' => $report_template, }); $report_template = $self->pie_chart({ 'file' => 'browser_brand_pie_chart', 'size' => $pie_chart_size, 'classMap' => $class_map, 'classColorsMap' => $class_colors_map, 'title' => 'Browser Brand', 'hitsData' => \%agentgroup, # 'totalHits' => $refscounter, 'minimumPercent' => 1, 'template' => $report_template, 'tag' => 'browser_brand_pie_chart', }); ########################## # Major version summary $report_template = $self->ranked_hits_report({ 'hitsData' => $major_version, # 'totalHits' => $refscounter, 'minimumPercent' => $minimum_browser_report_percentage, 'classMap' => $class_map, 'tag' => 'browser_major_version_summary', 'template' => $report_template, }); $report_template = $self->pie_chart({ 'file' => 'browser_major_version_pie_chart', 'size' => $pie_chart_size, 'classMap' => $class_map, 'classColorsMap' => $class_colors_map, 'title' => 'Browser Major Version', 'hitsData' => $major_version, # 'totalHits' => $refscounter, 'minimumPercent' => 1, 'template' => $report_template, 'tag' => 'browser_major_version_pie_chart', }); ########################## # Minor version summary $report_template = $self->ranked_hits_report({ 'hitsData' => \%agentversion, # 'totalHits' => $refscounter, 'minimumPercent' => $minimum_browser_report_percentage, 'classMap' => $class_map, 'tag' => 'browser_minor_version_summary', 'template' => $report_template, }); $report_template = $self->pie_chart({ 'file' => 'browser_minor_version_pie_chart', 'size' => $pie_chart_size, 'classMap' => $class_map, 'classColorsMap' => $class_colors_map, 'title' => 'Browser Minor Version', 'hitsData' => \%agentversion, # 'totalHits' => $refscounter, 'minimumPercent' => 1, 'template' => $report_template, 'tag' => 'browser_minor_version_pie_chart', }); ########################## # The nearly raw dump of user agents (the detailed report) if ($self->show_detailed_browser_report eq 'yes') { $report_template = $self->alpha_hits_report({ 'hitsData' => \%rawagents, # 'totalHits' => $refscounter, 'classMap' => $class_map, 'tag' => 'browser_detail_summary', 'template' => $report_template, }); } ########## # All the real work is done. Now we build the output results and save them. my $elapsed_time = $self->_elapsed_time; my $lines_per_second = 'n/a'; my $megabytes_per_second = 'n/a'; my $log_size = $self->log_size; if ($elapsed_time > 0) { $lines_per_second = int($processed_lines / $elapsed_time); $megabytes_per_second = sprintf('%0.2f', (($log_size / 1000000) / $elapsed_time)); } my $report_values = { 'version' => $VERSION, 'report_date' => $date, 'report_title' => $report_title, 'processed_lines' => $processed_lines, 'measured_hits' => $refscounter, 'robot_hits' => $robot_hits, 'lines_per_second' => $lines_per_second, 'elapsed_time' => $elapsed_time, 'unparsable_lines' => $unparsable_lines, 'excluded_lines' => $excluded_lines, 'log_size' => $log_size, 'megabytes_per_second' => $megabytes_per_second, 'browser_report_cutoff' => $min_matches_percent_formatted, }; $report_template = $self->make_tagged_block_substitutions({ 'data' => { 1 => $report_values }, 'template' => $report_template, 'startTag' => "", 'endTag' => "", }); my $class_color = {}; my $class_counter = 0; while (my($classname, $classdata) = each %$class_colors_map) { $class_color->{$class_counter++} = $classdata; } $report_template = $self->make_tagged_block_substitutions({ 'data' => $class_color, 'template' => $report_template, 'startTag' => "", 'endTag' => "", }); $self->write_to_file({ 'data' => $report_template, 'filename' => $index_file }); return; } ############################################################################### # write_to_file({ 'data' => $data, 'filename' => $path_to_file }); # sub write_to_file { my $self = shift; my ($parms) = @_; my $data = $parms->{'data'}; unless (defined ($data)) { confess("write_to_file() - No data????") }; my $path = $parms->{'filename'}; unless (defined ($path) and ($path ne '')) { confess("write_to_file() - No Filename????") }; my $output_fh = IO::File->new($path, ">"); unless ($output_fh) { confess ("write_to_file() - Could not open $path for writing: $!\n"); } binmode $output_fh; # No mucking around with our bits. print $output_fh $data; $output_fh->close; return; } ############################################################################### # pie_chart({ # 'file' => 'browser_minor_version_pie_chart', # 'size' => $pie_chart_size, # 'colors' => $pie_chart_color_map, # 'title' => $pie_chart_title, # 'hitsData' => \%agentversion, # 'totalHits' => $refscounter, # 'minimumPercent' => $minimum_browser_report_percentage, # 'tag' => $block_substition_tag, # 'template' => $template_text, # }); sub pie_chart { my $self = shift; my ($parms) = @_; my $file = $parms->{'file'}; unless (defined ($file) and ($file ne '')) { croak("No Filename????") }; my $size = $parms->{'size'}; my $class_map = $parms->{'classMap'}; my $class_colors_map = $parms->{'classColorsMap'}; my $title = $parms->{'title'}; my $hits_data = $parms->{'hitsData'}; my $total_hits = $parms->{'totalHits'}; my $min_percent = $parms->{'minimumPercent'}; my $min_hits = $parms->{'minimumHits'}; my $tag = $parms->{'tag'}; my $template = $parms->{'template'}; unless ($self->_gd_graph_available) { $template = $self->make_tagged_block_substitutions({ 'data' => {}, 'template' => $template, 'startTag' => "<$tag>", 'endTag' => "", }); return $template; } my $ranking = 0; my $data = {}; my @line_keys = sort { $hits_data->{$b} <=> $hits_data->{$a} } keys %$hits_data; unless (defined $total_hits) { $total_hits = 0; foreach my $key (@line_keys) { $total_hits += $hits_data->{$key} }; } my $graph_data = GD::Graph::Data->new; my $shown_percent = 0; my $point_counter = 0; my $colors_list = []; my $colors_map = {}; my @default_colors = GD::Graph::colour::sorted_colour_list(); foreach my $key (@line_keys) { $ranking++; my $hits = $hits_data->{$key}; my $percentage = sprintf('%0.2f', (100 * $hits / $total_hits)); next if ((defined($min_percent) and ($percentage < $min_percent)) or (defined($min_hits) and ($hits < $min_hits)) ); $shown_percent += $percentage; my $class = $self->identify_class({ 'classMap' => $class_map, 'item' => $key }); my $color = defined($class_colors_map->{$class}) ? $class_colors_map->{$class}->{'classbgcolor'} : shift @default_colors; push (@$colors_list, $color); $colors_map->{$class} = $color; my $subs = { 'hits' => $hits, 'percentage' => $percentage, 'name' => $key, 'ranking' => $ranking, 'color' => $color, 'class' => $class, 'point_n' => $point_counter, }; $point_counter++; $data->{$ranking} = $subs; $graph_data->add_point(" $key - $percentage\% ", $percentage); } my $other_percentage = 100 - $shown_percent; $other_percentage = 0 if ($other_percentage < 0); $other_percentage = sprintf('%0.2f', $other_percentage); if ($other_percentage ne '0.0') { $graph_data->add_point(" Other - $other_percentage\% ", $other_percentage); my $class = $self->identify_class({ 'classMap' => $class_map, 'item' => 'Other' }); my $color = defined($class_colors_map->{$class}) ? $class_colors_map->{$class}->{'classbgcolor'} : shift @default_colors; $colors_map->{$class} = $color; push (@$colors_list, $color); } while (my ($color_key, $color_info) = each %{$colors_map}) { GD::Graph::colour::add_colour( $color_key => $color_info ); } my $graph = GD::Graph::pie->new( $size, $size); $graph->set( 'y_min_value' => 0, 'y_max_value' => 100, 'dclrs' => $colors_list, '3d' => 1, 'pie_height' => 20, 'suppress_angle' => 1, 'transparent' => 0, 'label' => $title, ); my $output_file = $file; my $chart; my $gd = $graph->plot($graph_data) or die ($graph->error); if ($gd->can('png')) { $chart = $gd->png; if (defined $chart) { $output_file .= '.png'; } elsif ($gd->can('gif')) { $chart = $gd->gif; if (defined $chart) { $output_file .= '.gif'; } else { croak("GD appears broken - failed to generate either GIF or PNG"); } } else { croak("GD appears broken - failed to generate either GIF or PNG"); } } elsif ($gd->can('gif')) { $chart = $gd->gif; if (defined $chart) { $output_file .= '.gif'; } elsif ($gd->can('png')) { $chart = $gd->png; if (defined $chart) { $output_file .= '.png'; } else { croak("GD appears broken - failed to generate either GIF or PNG"); } } else { croak("GD appears broken - failed to generate either GIF or PNG"); } } else { croak("Bad stuff - GD does not appear able to output either PNG or GIF\n"); } unless (defined $chart) { croak("Failed to generate $output_file"); } my $output_dir = $self->output_dir; my $filepath = File::Spec->catfile($output_dir, $output_file); $self->write_to_file({ 'filename' => $filepath, 'data' => $chart }); my $template_data = { 'url' => $output_file, 'height' => $size, 'width' => $size, }; $template = $self->make_tagged_block_substitutions({ 'data' => { 1 => $template_data }, 'template' => $template, 'startTag' => "<$tag>", 'endTag' => "", }); return $template; } ############################################################################### # ranked_hits_report({ tag => 'example', # hitsData => \%hit_count_data, # template => $report_template, # [ totalHits => $total_number_of_hits, ] (optional, will be computed from hitsData if not given) # [ minimumPercent => $min_percent_in_report, ] (optional) # [ minimumHits => $min_hits_in_report, ] (optional) # ); # sub ranked_hits_report { my $self = shift; my ($parms) = @_; my $tag = $parms->{'tag'}; my $hits_data = $parms->{'hitsData'}; my $min_percent = $parms->{'minimumPercent'}; my $min_hits = $parms->{'minimumHits'}; my $template = $parms->{'template'}; my $refscounter = $parms->{'totalHits'}; my $class_map = $parms->{'classMap'}; my $ranking = 0; my $data = {}; my @line_keys = sort { $hits_data->{$b} <=> $hits_data->{$a} } keys %$hits_data; unless (defined $refscounter) { $refscounter = 0; foreach my $key (@line_keys) { $refscounter += $hits_data->{$key} }; } foreach my $key (@line_keys) { $ranking++; my $hits = $hits_data->{$key}; my $percentage = sprintf('%0.2f', (100 * $hits / $refscounter)); next if ((defined($min_percent) and ($percentage < $min_percent)) or (defined($min_hits) and ($hits < $min_hits)) ); my $class = $self->identify_class({ 'classMap' => $class_map, 'item' => $key }); my $subs = { 'hits' => $hits, 'percentage' => $percentage, 'name' => $key, 'ranking' => $ranking, 'class' => $class, }; $data->{$ranking} = $subs; } $template = $self->make_tagged_block_substitutions({ 'data' => $data, 'template' => $template, 'startTag' => "<$tag>", 'endTag' => "", }); return $template; } ############################################################################### # alpha_hits_report({ tag => 'example', # hitsData => \%hit_count_data, # template => $report_template, # [ totalHits => $total_number_of_hits, ] (optional, will be computed from hitsData if not given) # [ minimumPercent => $min_percent_in_report, ] (optional) # [ minimumHits => $min_hits_in_report, ] (optional) # [ classMap => $class_keys_map,] (optional) # ); # sub alpha_hits_report { my $self = shift; my ($parms) = @_; my $tag = $parms->{'tag'}; my $hits_data = $parms->{'hitsData'}; my $min_percent = $parms->{'minimumPercent'}; my $min_hits = $parms->{'minimumHits'}; my $template = $parms->{'template'}; my $refscounter = $parms->{'totalHits'}; my $class_map = $parms->{'classMap'}; my $ranking = 0; my $data = {}; my @line_keys = sort keys %$hits_data; unless (defined $refscounter) { $refscounter = 0; foreach my $key (@line_keys) { $refscounter += $hits_data->{$key} }; } foreach my $key (@line_keys) { $ranking++; my $hits = $hits_data->{$key}; my $percentage = sprintf('%0.2f', (100 * $hits / $refscounter)); next if ((defined($min_percent) and ($percentage < $min_percent)) or (defined($min_hits) and ($hits < $min_hits)) ); my $class = $self->identify_class({ 'classMap' => $class_map, 'item' => $key }); my $subs = { 'hits' => $hits, 'percentage' => $percentage, 'name' => $key, 'ranking' => $ranking, 'class' => $class, }; $data->{$ranking} = $subs; } $template = $self->make_tagged_block_substitutions({ 'data' => $data, 'template' => $template, 'startTag' => "<$tag>", 'endTag' => "", }); return $template; } ############################################################################### # identify_class({ 'classMap' => $class_map, 'item' => $item }); # # Looks for the $item in the classMap and returns the matching class. # If no exact match is found, does a longest common prefix search for a class. # If no prefix match is found, returns 'unknown'. # sub identify_class { my $self = shift; my ($parms) = @_; my $class_map = $parms->{'classMap'}; unless (defined $class_map) { croak("No 'classMap'"); } my $item = $parms->{'item'}; unless (defined $item) { croak("No 'item'"); } my $class_name = $class_map->{$item}; unless (defined $class_name) { # Look for longest prefix match if we didn't find an exact match $class_name = 'unknown'; my $item_length = length($item); my @class_names = sort { length($b) <=> length($a) } keys %$class_map; foreach my $name (@class_names) { my $key_length = length($name); next unless ($key_length <= $item_length); my $index_pos = index ($item, $name, 0); next if (0 != $index_pos); $class_name = $class_map->{$name}; } } return $class_name; } ############################################################################### # make_tagged_block_substititions ( 'template' => $template_text, # 'startTag' => '', # 'endTag' => '', # 'data' => { # 1 => { ...data hash item1... }, # 2 => { ...data hash item2... }, # .... # }); sub make_tagged_block_substitutions { my $self = shift; my ($parms) = @_; my $start_tag = $parms->{'startTag'}; my $end_tag = $parms->{'endTag'}; my $data = $parms->{'data'}; my $template = $parms->{'template'}; my @tagged_blocks = $template =~ m#($start_tag.*?$end_tag)#gs; my %subs = (); my %block_targets = (); my %item_subs_hash = (); my %all_targets = (); my @data_keys_list = sort { $a <=> $b } keys %$data; foreach my $data_key (@data_keys_list) { my $record = $data->{$data_key}; my @item_key_list = keys %$record; foreach my $item_key (@item_key_list) { my $escaped_item = quotemeta($item_key); $all_targets{$escaped_item} = 1; } } my $block_string = "^$start_tag(.*?)$end_tag"; my $item_sub_string = '\$\{(' . join('|',sort keys %all_targets) . ')\}'; my $item_sub_regex = qr/$item_sub_string/; foreach my $block (@tagged_blocks) { my ($unwrapped_content) = $block =~ m/$block_string/s; my $escaped_block = quotemeta($block); my @sub_data = (); foreach my $key (@data_keys_list) { my $sub_hash = $data->{$key}; my $content = $unwrapped_content; { no strict; $content =~ s/$item_sub_regex/$sub_hash->{$1}/gs; } push (@sub_data,$content); } my $sub_result = join('', @sub_data); $subs{$block} = $sub_result; $block_targets{$escaped_block} = 1; } my $block_targets_regex = '(' . join('|', keys %block_targets ) . ')'; $template =~ s/$block_targets_regex/$subs{$1}/gs; return $template; } ##

Summary by fine detail of version:

## ## ## ## ## ## ## EOF ## foreach my $key (sort {$baseagent{$b} <=> $baseagent{$a}} keys(%baseagent)) { ## my $percentage=100*$baseagent{$key}/$refscounter; ## next unless ($percentage >= $minimum_browser_report_percentage); ## $percentage = 0.00 if ($percentage < 0.001); ## $percentage=~s/(....).*/$1/o; ## print $output_fh "\n \n \n \n\n"; ## } ## ## print $output_fh "
HitsPercentBrowser
$baseagent{$key}"; ## print $output_fh "${percentage}\%$key
\n"; ############################################################################### # generate_new_configuration_file ($file); # # Saves the default configuration into a new configuration file # sub generate_new_configuration_file { my $self = shift; my ($config_file) = @_; my $fh = IO::File->new($config_file, O_WRONLY | O_TRUNC | O_CREAT); unless ($fh) { die("Unable to open new $config_file: $!\n"); } binmode $fh; while () { print $fh $_; } close (DATA); return; } ############################################################################### # load_configuration({ configFile => $config_file }); # # Returns an anon hash containing the configuration information # # If no 'configFile' is passed, it uses the __DATA__ section for configuration data. # sub load_configuration { my $self = shift; my ($parms) = @_; my $config_file = $parms->{'configFile'}; my $fh; if (defined $config_file) { unless (-e $config_file) { croak("$config_file either does not exist or cannot be accessed\n"); } unless (-r _) { croak("$config_file cannot be read (check file permissions)\n"); } unless (-f _) { croak("$config_file does not appear to be a regular file\n"); } if (-d _) { croak("$config_file is a directory (was expecting a file)\n"); } } if (defined $config_file) { $fh = IO::File->new($config_file, O_RDONLY); unless ($fh) { die("Unable to open $config_file: $!\n"); } } else { $fh = \*DATA; } my $configuration = {}; my $linecount = 0; my $errors = ''; my $log_decompressors = $self->log_decompressors; my $log_parser_patterns = $self->log_parser_patterns; my $log_parser_maps = $self->log_parser_maps; my $access_logs = $self->access_logs; my $class_colors_map = {}; $self->class_colors_map($class_colors_map); my $class_map = {}; $self->class_map($class_map); my $platform_map = {}; $self->platform_map($platform_map); while (<$fh>) { chomp; $linecount++; # Skip comments and blank lines next if ((m/^\s*#/) or (m/^\s*$/)); if (m/^\s*decompress.(\S+)\s*=\s*(.*?)\s*$/) { my $suffix = lc($1); my $program = $2; $log_decompressors->{$suffix} = $program; } elsif (m/^\s*report_title\s*=\s*(.*?)\s*$/) { my $report_title = $1; $self->report_title($report_title); } elsif (m/^\s*platform_map\.'([^']+)'\s*=\s*(.*?)\s*$/) { my $map_key = $1; my $map_platform = $2; $platform_map->{$map_key} = $map_platform; } elsif (m/^\s*class_map\.([A-Za-z][A-Za-z0-9]*)\s*=\s*(.*?)\s*$/) { my $map_class = $1; my $map_key = $2; $class_map->{$map_key} = $map_class; } elsif (m/^\s*class_color_map\.([A-Za-z][A-Za-z0-9]*)\s*=\s*(#[0-9a-fA-Z]{6})\s*,\s*(#[0-9a-fA-Z]{6})\s*$/) { my $map_key = $1; my $map_bgcolor = $2; my $map_fgcolor = $3; $class_colors_map->{$map_key} = { 'classfgcolor' => $map_fgcolor, 'classbgcolor' => $map_bgcolor, 'classname' => $map_key }; } elsif (m/^\s*access_log\s*=\s*(.*?)\s*$/) { my $access_log = $1; push (@$access_logs, $access_log); } elsif (m/^\s*output_dir\s*=\s*(.*?)\s*$/) { my $output_dir = $1; $self->output_dir($output_dir); } elsif (m/^\s*output_file\s*=\s*(.*?)\s*$/) { my $output_file = $1; $self->output_file($output_file); } elsif (m/^\s*exclude_all_refs_to_uri_regex\s*=\s*(.*?)\s*$/) { my $exclude_all_refs_to_uri_regex = $1; if ($exclude_all_refs_to_uri_regex ne '') { $self->exclude_all_refs_to_uri_regex(qr/$exclude_all_refs_to_uri_regex/); } } elsif (m/^\s*include_only_refs_to_uri_regex\s*=\s*(.*?)\s*$/) { my $include_only_refs_to_uri_regex = $1; if ($include_only_refs_to_uri_regex ne '') { $self->include_only_refs_to_uri_regex(qr/$include_only_refs_to_uri_regex/); } } elsif (m/^\s*exclude_remote_addrs\s*=\s*(.*?)\s*$/) { my $exclude_remote_addrs = $1; if ($exclude_remote_addrs ne '') { $self->exclude_remote_addrs(qr/$exclude_remote_addrs/); } } elsif (m/^\s*include_remote_addrs\s*=\s*(.*?)\s*$/) { my $include_remote_addrs = $1; if ($include_remote_addrs ne '') { $self->include_remote_addrs(qr/$include_remote_addrs/); } } elsif (m/^\s*exclude_robots\s*=\s*(yes|no)\s*$/i) { my $exclude_robots = lc($1); $self->exclude_robots($exclude_robots); } elsif (m/^\s*minimum_browser_report_percentage\s*=\s*(.*?)\s*$/) { my $minimum_browser_report_percentage = $1; $self->minimum_browser_report_percentage($minimum_browser_report_percentage); } elsif (m/^\s*show_detailed_browser_report\s*=\s*(yes|no)\s*$/i) { my $show_detailed_browser_report = lc($1); $self->show_detailed_browser_report($show_detailed_browser_report); } elsif (m/^\s*robots_useragent_regex\s*=\s*(.*?)\s*$/) { my $robots_useragent_regex = $1; $self->robots_useragent_regex(qr/$robots_useragent_regex/i); } elsif (m/^\s*robots_useragent_false_positives_regex\s*=\s*(.*?)\s*$/) { my $robots_useragent_false_positives = $1; $self->robots_useragent_false_positives(qr/$robots_useragent_false_positives/i); } elsif (m/^\s*log_format\s*=\s*([-_a-zA-Z0-9]+)\s*$/) { my $log_format = lc($1); $self->log_format($log_format); } elsif (m/^\s*log_parsing_regex\.([-_a-zA-Z0-9]+)\s*=\s*(.*?)\s*$/) { my $pattern_name = lc($1); my $pattern_value = $2; $log_parser_patterns->{$pattern_name} = qr/$pattern_value/; } elsif (m/^\s*log_parsing_fields\.([-_a-zA-Z0-9]+)\s*=\s*(.*?)\s*$/) { my $pattern_name = lc($1); my $pattern_value = $2; my @field_names = split(/\s+/,$pattern_value); my $fields_index = {}; for (my $index = 0; $index < @field_names; $index++) { $fields_index->{$field_names[$index]} = $index; } $log_parser_maps->{$pattern_name} = $fields_index; } elsif (m/^\s*__START REPORT TEMPLATE__\s*$/) { my @template_lines = (); while (<$fh>) { last if (m/\s*__END REPORT TEMPLATE__\s*$/); push (@template_lines, $_); } if (not m/\s*__END REPORT TEMPLATE__\s*$/) { $errors .= "No __END REPORT TEMPLATE__ found in configuration\n"; } else { my $report_template = join('',@template_lines); $self->report_template($report_template); } } elsif (m/^\s*pie_chart_size\s*=\s*([1-9][0-9]+)\s*$/) { my $size = $1; $self->pie_chart_size($size); } else { $errors .= "Syntax error in configuration at line $linecount: $_\n"; next; } } my $log_format = $self->log_format; my $log_regex = $log_parser_patterns->{$log_format}; my $log_map = $log_parser_maps->{$log_format}; if (not defined $log_format) { $errors .="No 'log_format' was specified in configuration\n"; } if (not defined $log_parser_maps->{$log_format}) { $errors .="No 'log_parsing_fields.$log_format' found in configuration\n"; } if (not defined $log_parser_patterns->{$log_format}) { $errors .="No 'log_parsing_regex.$log_format' found in configuration\n"; } if ($errors ne '') { croak ($errors); } $self->log_parse_regex($log_parser_patterns->{$log_format}); $self->log_parse_map($log_parser_maps->{$log_format}); return; } ####################################################################### ## ## =head1 CONFIGURATION FILE ## ## =head1 BUGS ## ## None known. ## ## =head1 TODO ## ## Add more regression tests. ## ## =head1 AUTHOR ## ## Benjamin Franz ## ## =head1 VERSION ## ## Version 1.3.0 27 Aug 2005 ## ## =head1 COPYRIGHT ## ## Copyright (c) Benjamin Franz. All rights reserved. ## ## =head1 LICENSE ## ## This program is free software; you can redistribute it ## and/or modify it under the same terms and conditions as ## Perl itself. ## ## This means that you can, at your option, redistribute it and/or modify it under ## either the terms the GNU Public License (GPL) version 1 or later, or under the ## Perl Artistic License. ## ## See http://dev.perl.org/licenses/ ## ## =head1 DISCLAIMER ## ## THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS ## OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A ## PARTICULAR PURPOSE. ## ## Use of this software in any way or in any form, source or binary, ## is not allowed in any country which prohibits disclaimers of any ## implied warranties of merchantability or fitness for a particular ## purpose or any disclaimers of a similar nature. ## ## IN NO EVENT SHALL I BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, ## SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE ## USE OF THIS SOFTWARE AND ITS DOCUMENTATION (INCLUDING, BUT NOT ## LIMITED TO, LOST PROFITS) EVEN IF I HAVE BEEN ADVISED OF THE ## POSSIBILITY OF SUCH DAMAGE ## ## =head1 SEE ALSO ## ## http://nihongo.org/snowhare/utilities/browsercounter/ ## ## =cut ## ############################################################################## # From here down is the default configuration and templates used by the # analyzer. # __DATA__ ############################################################################### # report_title is the title to be used for the report # # This can be overridden by the '--report_title=Some Title' command line option. # report_title = Browser Report ############################################################################### # access_log is the log file(s) to be processed. More than one log file may # be specified (one per 'access_log' line). This field is optional, and is # overridden by any command line listed files. # access_log = /var/log/httpd/access_log ############################################################################### # output_dir is the path to the directory used to output the browser report output_dir = /var/www/html/statistics/browsers ############################################################################### # output_file is the file within the output_dir where the report will be placed output_file = index.html ############################################################################### # show_detailed_browser_report flags if you want the 'detail' listing of all # web browsers sorted by name. This listing can be very long. # # I'm not kidding: IT CAN BE VERY LONG. You've been warned. # show_detailed_browser_report = no ############################################################################### # 'decompress.xxx' sections declare programs for handling the decompression of # compressed log files. The programs are required to take a filename on the # command line specifying the file to be decompressed and to send the decompressed # output to STDOUT. # # The format is 'decompress.$suffix = $program_invokation' # decompress.z = gzip -cd decompress.gz = gzip -cd decompress.bz2 = bzip2 -cd ############################################################################### # 'exclude_robots' declares whether you want to exclude robots from the reports # (other than those specifically about robots). # # Allowed values are 'yes' and 'no' # exclude_robots = yes ############################################################################### # 'minimum_browser_report_percentage' excludes browsers with less than the # specified percentage from the reports. The number must be a value between # 0 and 100 (inclusive). # minimum_browser_report_percentage = 0.1 ############################################################################### # include_only_refs_to_uri_regex defines a regular expression that URIs # must match to be included. If omitted or blank, all URIs are accepted. # include_only_refs_to_uri_regex = (\/|\.[jps]?html?|\.txt|\.[ja]sp|\.php\d*|\.cf)$ ############################################################################### # exclude_all_refs_to_uri_regex defines a regular expression for excluding # hits on URIs matching the regex from being included. If omitted or blank, # nothing is excluded. # exclude_all_refs_to_uri_regex = ############################################################################### # 'robots_useragent_regex' specifies a regular expression used to # identify robot user agents # robots_useragent_regex = http|FSP Utilities|Powermarks|Java\/[0-9]|bot|crawl|spider|slurp|search|google|teoma|archive|htdig|scooter|Bookmark Renewal|webcollage|ichiro|grub|findlinks|libwww-perl|larbin|RedAlert\.com|User-Agent: User-Agent:|User-Agent: Mozilla|MSNPTC|Walker|cfetch|index|Wget ############################################################################### # 'robots_useragent_false_positives_regex' specifies a regular expression used to # identify user agents that are 'false positived' by the 'robots_useragent_regex' # robots_useragent_false_positives_regex = ^Mozilla\/.*MSIE ############################################################################### # 'log_format' declares the log parsing pattern to be used to analyze the # log file log_format = combined ############################################################################### # 'log_parsing_regex.$log_format' is a regular expression used to parse # the log file being analysed. # # 'log_parsing_fields.$log_format' declares the identifying field names for # fields parsed by the 'log_parsing_regex.$log_format' regular expression # # This gives the mapping of each returned item to their field names # in the same order as returned by the log parsing pattern. ############################################################################### # Pattern for parsing a multi-host 'combined' format log file where the # first field is the webhost name and the remaining fields are a standard # 'combined' format log log_parsing_regex.multihost-combined = ^(\S+) (\S+) (\S+) (\S+) \[(\d\d?)/([A-Za-z]{3})/(\d{4}):(\d\d?):(\d\d):(\d\d) (\S+)\] \"(\S+)\s+(\S+)\s+(\S+)\" (\S+) (\S+) \"?([^"]*)\"? \"(.*)\" log_parsing_fields.multihost-combined = host remote_addr ident user day month year hour minute second timezone method uri protocal status bytes referrer useragent ############################################################################### # Pattern for parsing a standard 'combined' format log file where the last field is the User # Agent and the second to last field is the referring URL log_parsing_regex.combined = ^(\S+) (\S+) (\S+) \[(\d\d?)/([A-Za-z]{3})/(\d{4}):(\d\d?):(\d\d):(\d\d) (\S+)\] \"(\S+)\s+(\S+)\s+(\S+)\" (\S+) (\S+) \"?([^"]*)\"? \"(.*)\" log_parsing_fields.combined = remote_addr ident user day month year hour minute second timezone method uri protocal status bytes referrer useragent ############################################################################### # Pattern for parsing a reverse 'combined' format log file where the last field is the referring # URL and the second to last field is the User Agent log_parsing_regex.reverse-combined = ^(\S+) (\S+) (\S+) \[(\d\d?)/([A-Za-z]{3})/(\d{4}):(\d\d?):(\d\d):(\d\d) (\S+)\] \"(\S+)\s+(\S+)\s+(\S+)\" (\S+) (\S+) \"?([^"]*)\"? \"(.*)\" log_parsing_fields.reverse-combined = remote_addr ident user day month year hour minute second timezone method uri protocal status bytes useragent referrer ############################################################################### # pie_chart_size sets the size of the pie chart graphic in pixels. pie_chart_size = 300 ############################################################################### # platform_map provides identification for platforms by string matching platform_map.'Win32' = Windows NT/2000/XP platform_map.'Windows; I; 32bit' = Windows NT/2000/XP platform_map.'Windows; U; 32bit' = Windows NT/2000/XP platform_map.'Windows 2000' = Windows NT/2000/XP platform_map.'Windows NT 4' = Windows NT/2000/XP platform_map.'Windows NT 4.0' = Windows NT/2000/XP platform_map.'Windows NT 5.0' = Windows NT/2000/XP platform_map.'Windows NT 5.1' = Windows NT/2000/XP platform_map.'Windows NT 5.2' = Windows NT/2000/XP platform_map.'Windows NT 5.3' = Windows NT/2000/XP platform_map.'Windows NT 6.0' = Windows NT/2000/XP platform_map.'Windows NT 6.1' = Windows NT/2000/XP platform_map.'Windows NT' = Windows NT/2000/XP platform_map.'Windows XP' = Windows NT/2000/XP platform_map.'WinNT' = Windows NT/2000/XP platform_map.'WinNT4.0' = Windows NT/2000/XP platform_map.'Nokia' = Nokia platform_map.'Windows 98' = Windows 95/98/ME platform_map.'Win98' = Windows 95/98/ME platform_map.'Win95' = Windows 95/98/ME platform_map.'Windows CE' = Windows CE platform_map.'Win 9x' = Windows 95/98/ME platform_map.'Windows 95' = Windows 95/98/ME platform_map.'SunOS' = SunOS platform_map.'IRIX64' = IRIX64 platform_map.' AIX ' = AIX platform_map.'Linux' = Linux platform_map.'WebTV' = WebTV platform_map.'PlayStation Portable' = PlayStation Portable platform_map.'Macintosh; U; PPC Mac OS X' = PPC Macintosh OS X platform_map.'Macintosh; I; PPC Mac OS X' = PPC Macintosh OS X platform_map.'Macintosh; I; 68K' = 68K Macintosh platform_map.'Macintosh; I; PPC' = PPC Macintosh platform_map.'Macintosh; U; 68K' = 68K Macintosh platform_map.'Macintosh; U; PPC' = PPC Macintosh platform_map.'Mac_PowerPC' = PPC Macintosh platform_map.'Mac_PPC' = PPC Macintosh platform_map.'FreeBSD' = FreeBSD platform_map.'BlackBerry' = BlackBerry ############################################################################### # class_color_map provides a map of class names to colors # The format is class_color_map.$classname = $classbgcolor, $classfgcolor # class_color_map.msie = #aaaaff, #000000 class_color_map.firefox = #cccc66, #000000 class_color_map.opera = #66cc66, #000000 class_color_map.safari = #66cccc, #000000 class_color_map.omniweb = #66eeee, #000000 class_color_map.mozilla = #cc6666, #000000 class_color_map.konqueror = #cccccc, #000000 class_color_map.unknown = #aaaaaa, #000000 class_color_map.other = #eeeeee, #000000 class_color_map.winnt = #aaaaff, #000000 class_color_map.win98 = #7777dd, #000000 class_color_map.wince = #5555aa, #000000 class_color_map.webtv = #66cc66, #000000 class_color_map.linux = #66cccc, #000000 class_color_map.psp = #55aaaa, #000000 class_color_map.macosxppc = #cccc66, #000000 class_color_map.macppc = #cccc66, #000000 class_color_map.mac68k = #cccc66, #000000 class_color_map.freebsd = #cc6666, #000000 class_color_map.irix64 = #cccccc, #000000 class_color_map.aix = #cccccc, #000000 class_color_map.sunos = #cccccc, #000000 class_color_map.blackberry = #cccccc, #000000 ############################################################################### ############################################################################### # class_map provides mapping for various keyed items to classes # class_map.winnt = Windows NT/2000/XP class_map.nokia = Nokia class_map.win98 = Windows 95/98/ME class_map.wince = Windows CE class_map.sunos = SunOS class_map.irix64 = IRIX64 class_map.aix = AIX class_map.linux = Linux class_map.webtv = WebTV class_map.psp = PlayStation Portable class_map.macosxppc = PPC Macintoh OS X class_map.mac68k = 68K Macintosh class_map.macppc = PPC Macintosh class_map.freebsd = FreeBSD class_map.blackberry = BlackBerry class_map.msie = MSIE class_map.msie = MSIE 7 class_map.msie = MSIE 7.0 class_map.msie = MSIE 7.00 class_map.msie = MSIE 7.01 class_map.msie = MSIE 6 class_map.msie = MSIE 6.0 class_map.msie = MSIE 5 class_map.msie = MSIE 5.0 class_map.msie = MSIE 5.00 class_map.msie = MSIE 5.01 class_map.msie = MSIE 5.17 class_map.msie = MSIE 5.22 class_map.msie = MSIE 5.23 class_map.msie = MSIE 5.5 class_map.msie = MSIE 4 class_map.msie = MSIE 4.0 class_map.msie = MSIE 4.01 class_map.firefox = Firefox class_map.firefox = Firefox 0 class_map.firefox = Firefox 0.10.0 class_map.firefox = Firefox 0.10.1 class_map.firefox = Firefox 1 class_map.firefox = Firefox 0.8 class_map.firefox = Firefox 0.9 class_map.firefox = Firefox 0.9.3 class_map.firefox = Firefox 1.0 class_map.firefox = Firefox 1.0.0 class_map.firefox = Firefox 1.0.1 class_map.firefox = Firefox 1.0.2 class_map.firefox = Firefox 1.0.3 class_map.firefox = Firefox 1.0.4 class_map.firefox = Firefox 1.0.5 #class_map.firefox = Firefox 1.0.6 class_map.firefox = Firefox 1.0.7 class_map.firefox = Firefox 1.0.8 class_map.firefox = Firefox 1.0.9 class_map.firefox = Firefox 1.0.10 class_map.firefox = Firefox 1.0.11 class_map.firefox = Firefox 1.0.12 class_map.firefox = Firefox 1.1 class_map.firefox = Firefox 1.1.0 class_map.firefox = Firefox 1.5 class_map.firefox = Firefox 1.5.0 class_map.firefox = Firefox 1.5.1 class_map.firefox = Firefox 1.5.2 class_map.firefox = Firefox 1.5.3 class_map.firefox = Firefox 1.5.4 class_map.firefox = Firefox 1.5.5 class_map.firefox = Firefox 1.5.6 class_map.firefox = Firefox 1.5.7 class_map.firefox = Firefox 1.5.8 class_map.firefox = Firefox 1.5.9 class_map.firefox = Firefox 1.5.10 class_map.firefox = Firefox 2 class_map.firefox = Firefox 2.0 class_map.firefox = Firefox 2.0.0 class_map.firefox = Firefox 2.0.1 class_map.firefox = Firefox 2.0.2 class_map.firefox = Firefox 2.0.3 class_map.firefox = Firefox 2.0.4 class_map.firefox = Firefox 2.0.5 class_map.firefox = Firefox 2.0.6 class_map.firefox = Firefox 2.0.7 class_map.firefox = Firefox 2.0.8 class_map.firefox = Firefox 2.0.9 class_map.opera = Opera class_map.opera = Opera 0 class_map.opera = Opera 1 class_map.opera = Opera 2 class_map.opera = Opera 3 class_map.opera = Opera 4 class_map.opera = Opera 5 class_map.opera = Opera 6 class_map.opera = Opera 7 class_map.opera = Opera 7.54 class_map.opera = Opera 8 class_map.opera = Opera 8.01 class_map.opera = Opera 9 class_map.opera = Opera 10 class_map.omniweb = OmniWeb class_map.omniweb = OmniWeb 0 class_map.omniweb = OmniWeb 1 class_map.omniweb = OmniWeb 2 class_map.omniweb = OmniWeb 3 class_map.omniweb = OmniWeb 4 class_map.omniweb = OmniWeb 5 class_map.safari = Safari class_map.safari = Safari 85 class_map.safari = Safari 125 class_map.safari = Safari 312 class_map.safari = Safari 412 class_map.konqueror = Konqueror class_map.konqueror = Konqueror 0 class_map.mozilla = Mozilla class_map.mozilla = Mozilla 0 class_map.mozilla = Mozilla 1 class_map.mozilla = Mozilla 2 class_map.mozilla = Mozilla 3 class_map.mozilla = Mozilla 3.0 class_map.mozilla = Mozilla 3.01 class_map.mozilla = Mozilla 4 class_map.mozilla = Mozilla 4.0 class_map.mozilla = Mozilla 4.01 class_map.mozilla = Mozilla 5 class_map.mozilla = Mozilla 5.0 class_map.mozilla = Mozilla 5.5 class_map.mozilla = Mozilla 6 class_map.mozilla = Mozilla 7 class_map.unknown = Unknown class_map.unknown = Unknown 0 class_map.other = Other class_map.other = Other 0 ############################################################################### # The report template blocks contains the HTML templates used to generate the main # browser report. This allows nearly complete freedom in choosing what sections # will appear in the report and how they will be presented. # __START REPORT TEMPLATE__ ${report_title}

${report_title}

${report_date}
${processed_lines} lines processed in ${elapsed_time} seconds (${lines_per_second} lines / ${megabytes_per_second} megabytes per second).
${unparsable_lines} lines could not be parsed. ${robot_hits} hits looked like robots.
${excluded_lines} lines excluded, ${measured_hits} hits measured in this run.
Report cutoff at ${browser_report_cutoff}%

Platform
Hits Percent Platform
${hits} ${percentage}% ${name}
Platform Pie Chart
Browser Brand
Hits Percent Browser
${hits} ${percentage}% ${name}
Brand Pie Chart
Major Version
Hits Percent Browser
${hits} ${percentage}% ${name}
Major Version Pie Chart
Minor Version
Hits Percent Browser
${hits} ${percentage}% ${name}
Minor Version Pie Chart


__END REPORT TEMPLATE__