#!/usr/bin/perl -w # # Script to split an Apache Extended Combined access log into individual # logs based on the virtual host logged in the second field. # # Written by Jonathon Vance # Copyright (c) 2002 Quantified Systems, Inc. use strict; no strict 'refs'; use Getopt::Long; # Get the log file to split and an optional directory in which to write the split log files. my $log = ''; my $dir = $ENV{'PWD'}; my $help = 0; GetOptions('log=s' => \$log, 'dir=s' => \$dir, 'help' => \$help); if ($help) { &usage; exit; } # Verify that the directory exists and is writeable if ((! -e $dir) && (! -d $dir) && (! -w $dir)) { die "$dir does not exist, is not a directory, or is not writeable. Exiting." } # Check that the log file exists and is readable if ((! -e $log) && (! -r $log)) { die "$log does not exist or is not readable. Exiting." } # Open the logfile as readonly open(LOG, "<$log") || die "Problem opening log file: $!"; my ($line, @parts, $vhost, $out); my %files = (); # Loop through each line and write it to the appropriate log file while() { # Get the virtual host from the hit (make sure it exists) $line = $_; @parts = split(/ /, $line, 3); $vhost = $parts[1]; if (!$vhost) { next; } # Open the output file if not already open $out = "$dir/$vhost.log"; if (!$files{$out}) { $files{$out} = uc($vhost); open($files{$out}, ">>$out") || warn "Unable to open $out"; } # Print the hit to the file select($files{$out}); print $line; select(STDOUT); } close(LOG) || warn "Could not close main log file: $!"; # Close the open files and exit my $file; foreach $file (values(%files)) { close($file) || warn "Problems closing $file"; } # Subroutine to show the usage of this script sub usage { print "Usage: $0 --log /path/to/log_to_split \\ --dir /path/to/dir_for_split_logs --help Where: --log specifies the logfile to split --dir specifies the directory to write the split logs to (default is the current directory) --help prints this message This script splits an Apache access log file into multiple log files based on the virtual host logged in the second field. The new log files are written to a specifiable directory (default is the current directory) and are named \$vhost.log where \$vhost is the name of the virtual host in the log file. For instance, a hit that has www.urchin.com logged in the second field will be written to www.urchin.com.log.\n\n"; }