#!/usr/bin/perl -w # this script takes an apache logfile on stdin and parses it to figure # out what's taking up so much time: by ip, domain or url # # this should be named time_per_domain and symlinked to time_per_ip # and time_per_url to accomplish the three above functions. # # this assumes the following logfile format: # LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %T %{Host}i" alternc # %U could be used instead of %T to have more precise results # # output is a tab separated value format, with the seconds (or # microseconds, if %D is used) in the first column, and whatever stat # is requested on the second one. use Getopt::Std; getopts('at'); # -s implies -t if ($opt_a) { $opt_t = 1; } my %stats = (); my $hits = (); # get the basename of this script to figure out the mode ($mode = $0) =~ s#.*/##; # iterate over all files or stdin while (<>) { $opt_t && $hits++; if ($mode eq 'time_per_ip') { # fairly simple: we take the ip at the beginning, skip as # little as possible (.*?) to get the digit and domain at the # end if (/^([\d.]*) .*? (\d+) ([\w.]*)$/i) { $stats{$1} += $2; $opt_t && $hits{$1}++; } } elsif ($mode eq 'time_per_url') { # funky regex: we need to find the url, so we rely on # double-quotes and assume spaces in the url are url-encoded if (/^[^"]*"\w+ ([^ ]*) [^"]*".*? (\d+) ([\w.]*)$/i) { $time = $2; # need to keep this because we trash it below # do some canonicalisation: remove duplicated slashes ($url = $3.$1) =~ s#///*#/#g; $stats{$url} += $time; $opt_t && $hits{$url}++; } } else { # the simpler regex: just look at the end for the domain and the time if (/(\d+) ([\w.]*)$/i) { $stats{$2} += $1; $opt_t && $hits{$2}++; } } } # calculate a total, should be optional (ie. cli flag) $total = 0; $count = 0; $total_hits = 0; # if requested, average the results per hit if ($opt_a) { for my $stat (keys %stats) { # need to keep track of the total because we blow it here $total += $stats{$stat}; $stats{$stat} = $stats{$stat}/$hits{$stat}; } } # sort the output by values (by time) for my $stat (sort { $stats{$a} <=> $stats{$b} } keys %stats) { # total stat is computed during averaging if (!$opt_a) { $total += $stats{$stat}; } $count++; $opt_t && ($total_hits += $hits{$stat}); if ($opt_a) { printf "%0.2f\t%s", $stats{$stat}, $stat; } else { print "$stats{$stat}\t$stat"; } $opt_t && print "\t$hits{$stat}"; print "\n"; } if ($opt_a) { $total = sprintf "%0.2f", $total/$total_hits; } print "$total\t$count unique matches"; $opt_t && print " and $total_hits hits"; print "\n";