Generates a report from the data acquired in the read_new_logis and/or
read_old_reports methods. If an '-ouput_file' has been set, outputs
the report to that file. Also returns the HTML output as its return
value.
=back
=cut
sub make_report {
my ($self) = shift;
my ($output_file,$body_tag,$head_tags,
$system_name,$results,$start_date,
$end_date,$run_time_start) = $self->get('-output_file','-body_tag','-head_tags','-system_name',
'-results','-start_date','-end_date','-run_time_start');
my ($output_list) = {};
my @month_names = ('','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec');
my (@url_list) = sort {lc ($a) cmp lc ($b)} keys %{$results->{-urls}};
my (%output_list,$url,$esc_url,$bookmarked_entries,$bookmarked_last_date,$year,$month,$day,$hour,$min,$sec,$last_date);
foreach $url (@url_list) {
$esc_url = $url;
$esc_url =~ s/\&/\&/o;
$esc_url =~ s/\\</o;
$esc_url =~ s/\>/\>/o;
$esc_url =~ s/\"/\"/o;
$bookmarked_entries = $results->{-urls}->{$url}->{-count};
1 while ($bookmarked_entries =~ s/(.*\d)(\d\d\d)/$1,$2/o); # inserts commas into number
$bookmarked_last_date = $results->{-urls}->{$url}->{-last_date};
($year,$month,$day,$hour,$min,$sec) = $bookmarked_last_date =~ m/^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})$/o;
$last_date = "$hour:$min:$sec $day $month_names[$month] $year";
$output_list{$url} = sprintf("%s %10s %s\n",$last_date,$bookmarked_entries,$esc_url);
}
$system_name =~ s/\&/\&/o;
$system_name =~ s/\\</o;
$system_name =~ s/\>/\>/o;
$system_name =~ s/\"/\"/o;
# Figure the run time for the script
my $run_time = time - $run_time_start;
my $minutes = int ($run_time / 60);
my $seconds = int ($run_time % 60);
if ($seconds != 1) {
$run_time = "$seconds seconds";
} else {
$run_time = "$seconds second";
}
if ($minutes > 1) {
if ($seconds > 0) {
$run_time = "$minutes minutes $run_time";
} else {
$run_time = "$minutes minutes";
}
} elsif ($minutes == 1) {
if ($seconds > 0) {
$run_time = "$minutes minute $run_time";
} else {
$run_time = "$minutes minute";
}
}
# Figure today's date
my $today = scalar localtime time;
my $output =<<"EOF";
$system_name
$head_tags
$body_tag
$system_name
Generated On: $today
Run Time: $run_time
EOF
##########
# By URL #
##########
$output .=<<"EOF";
Last On Number Page
EOF
foreach my $item (@url_list) {
$output .= $output_list{$item};
}
$output .= "\n";
#####################
# By Bookmark Count #
#####################
$output .=<<"EOF";
Last On Number Page
EOF
foreach my $item (sort { $results->{-urls}->{$b}->{-count} <=> $results->{-urls}->{$a}->{-count} } @url_list) {
$output .= $output_list{$item};
}
$output .= "\n";
$output .=<<"EOF";
Created using $VERSION
EOF
# Save the report
$self->set({ -output => $output });
if ($output_file) {
if (not open (OUTPUTFILE,">$output_file")) {
warn ("Unable to open output file '$output_file' for writing: $!\n");
} else {
print OUTPUTFILE $output;
close (OUTPUTFILE);
}
}
$output;
}
##########################################################################
=over 4
=item C
Parses the '-log_type' and/or '-log_format' to determine the
correct regular expression (set into '-log_regex') to analyze
the log file.
commentary text stolen from mod_log_config.c in the Apache distribution
with some additions
The argument to LogFormat and CustomLog is a string, which can include
literal characters copied into the log files, and '%' directives as
follows:
%...b: bytes sent, excluding HTTP headers.
%...f: filename
%...h: remote host
%...{Foobar}i: The contents of Foobar: header line(s) in the request
sent to the client.
%...L: The HTTP protocal level identifier
%...l: remote logname (from identd, if supplied)
%...{Foobar}n: The contents of note "Foobar" from another module.
%...{Foobar}o: The contents of Foobar: header line(s) in the reply.
%...m The HTTP method used
%...p: the port the request was served to
%...P: the process ID of the child that serviced the request.
%...r: first line of request
%...s: status. For requests that got internally redirected, this
is status of the *original* request --- %...>s for the last.
%...t: time, in common log format time format
%...{format}t: The time, in the form given by format, which should
be in strftime(3) format.
%...T: the time taken to serve the request, in seconds.
%...u: remote user (from auth; may be bogus if return status (%s) is 401)
%...U: the URL path requested.
%...v: the name of the server (i.e. which virtual host?)
The '...' can be nothing at all (e.g. "%h %u %r %s %b"), or it can
indicate conditions for inclusion of the item (which will cause it
to be replaced with '-' if the condition is not met). Note that
there is no escaping performed on the strings from %r, %...i and
%...o; some with long memories may remember that I thought this was
a bad idea, once upon a time, and I am still not comfortable with
it, but it is difficult to see how to "do the right thing" with all
of '%..i', unless we URL-escape everything and break with CLF.
The forms of condition are a list of HTTP status codes, which may
or may not be preceded by '!'. Thus, '%400,501{User-agent}i' logs
User-agent: on 400 errors and 501 errors (Bad Request, Not
Implemented) only; '%!200,304,302{Referer}i' logs Referer: on all
requests which did *not* return some sort of normal status.
The default LogFormat reproduces CLF; see below.
The way this is supposed to work with virtual hosts is as follows:
a virtual host can have its own LogFormat, or its own TransferLog.
If it doesn't have its own LogFormat, it inherits from the main
server. If it doesn't have its own TransferLog, it writes to the
same descriptor (meaning the same process for "| ...").
=cut
sub parse_log_format {
my ($self) = shift;
my ($log_format,$log_type) = $self->get('-log_format','-log_type');
my %log_format_regex_patterns = (
'%b' => '\d+|-', # Number of bytes sent
'%h' => '[a-zA-Z0-9-.]+', # Remote host
'%f' => '\S+', # Filename
'%{referer}i' => '\S*', # Referrer
'%{user-agent}i' => '[^"]*', # User Agent
'%i' => '[^"]*', # Header junk
'%l' => '\S+', # remote logname (identd if provided)
'%p' => '\d+', # Port the request was server to
'%P' => '\d+', # Process ID of the child that serviced the request
'%r' => '[^"]*', # First line of request
'%s' => '\d\d\d', # Status Code
'%t' => '\[\d\d/\w\w\w/\d\d\d\d:\d\d:\d\d\:\d\d\s[+-]?\d\d\d\d]', # Date/Time stamp in CLF format
'%T' => '\d', # Time taken to serve the request in seconds
'%u' => '\S+', # Remote user (authentication)
'%U' => '\S+', # URL path requested
'%v' => '[a-zA-Z0-9.-]+', # Server name
'%m' => '\S+', # HTTP protocal method
'%L' => '\S+', # HTTP protocal level
);
my %log_format_identifiers = (
'%b' => 'bytes',
'%h' => 'remotehost', # Remote host
'%f' => 'filename', # Filename
'%{referer}i' => 'referrer', # Referrer
'%{user-agent}i' => 'useragent', # User Agent
'%i' => '[^"]*', # Header junk
'%l' => 'rfc931', # remote logname (identd if provided)
'%p' => 'port', # Port the request was server to
'%P' => 'process', # Process ID of the child that serviced the request
'%r' => 'request', # First line of request
'%s' => 'status', # Status Code
'%t' => 'timedate', # Date/Time stamp in CLF format
'%T' => 'servicetime', # Time taken to serve the request in seconds
'%u' => 'authuser', # Remote user (authentication)
'%U' => 'url', # URL path requested
'%v' => 'server', # Server name
'%m' => 'method', # HTTP method
'%L' => 'protocal' # HTTP protocal level
);
# [10/Aug/1996:09:55:25 -0700] <-CLF format time
if (($log_format eq '') and ($log_type =~ m#^common$#oi)) {
$log_format = '%h %l %u %t \"%m %U %L\" %s %b';
} elsif (($log_format eq '') and ($log_type =~ m#^combined$#oi)) {
$log_format = '%h %l %u %t \"%m %U %L\" %s %b \"?%{referer}i\"? \"%{user-agent}i\"';
}
my $match_format = "\^$log_format"; # Not terminated to add some forgiveness for wrong formats
# We are going to assume that everything is seperated by white space
my @log_elements = split(/\s+/,$log_format);
my $log_field_to_name = {};
my $name_to_log_field = {};
for (my $count=0;$count <= $#log_elements;$count++) {
my $ident_item = $log_elements[$count];
$ident_item =~ s#[^%}{a-zA-Z-]##og;
if (defined $log_format_regex_patterns{$ident_item}) {
my $match_item = quotemeta ($ident_item);
my $match_pattern = $log_format_regex_patterns{$ident_item};
$match_format =~ s/$match_item/\($match_pattern\)/;
$log_field_to_name->{$count} = $log_format_identifiers{$ident_item};
$name_to_log_field->{$log_format_identifiers{$ident_item}} = $count;
}
}
$self->set({ -log_field_to_name => $log_field_to_name,
-name_to_log_field => $name_to_log_field,
-log_regex => qr /$match_format/,
});
}
############################################################################
=over 4
=item C
Declares one or more parameters for use.
=back
=cut
sub declare {
my $self = shift;
my (@parmnames) = @_;
my $parmname;
foreach $parmname (@parmnames) {
$parmname = lc ($parmname);
$self->{-legal_parms}->{$parmname} = 1;
}
}
############################################################################
=over 4
=item C
Sets one or more named parameter values.
=back
=cut
sub set {
my $self = shift;
my ($parm_ref) = @_;
my (@parmnames) = keys %$parm_ref;
my $parmname;
foreach $parmname (@parmnames) {
my $keyname = lc ($parmname);
my $value = $parm_ref->{$parmname};
confess ("Attempted to set an undeclared named parameter: $keyname") if (not exists $self->{-legal_parms}->{$keyname});
$self->{-parm_values}->{$keyname} = $value;
}
}
############################################################################
=over 4
=item C
Gets one or more named parameter values. Returned as an array
if more than one requested.
=back
=cut
sub get {
my $self = shift;
my (@results) = ();
my (@parmnames) = @_;
my $parmname;
foreach $parmname (@parmnames) {
my $keyname = lc ($parmname);
confess ("Attempted to get an undeclared named parameter: $keyname") if (not exists $self->{-legal_parms}->{$keyname});
confess ("Attempted to get an uninitialized named parameter: $keyname") if (not exists $self->{-parm_values}->{$keyname});
push (@results,$self->{-parm_values}->{$keyname});
}
return @results;
}
############################################################################
=head1 AUTHOR
Benjamin Franz
=head1 TODO
Documentation. Integration.
=cut
1;