#!/usr/bin/perl -- 		# -*- perl -*-
# ====================================================================
# Copyright (c) 1995-1999 The Apache Group.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer. 
#
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in
#    the documentation and/or other materials provided with the
#    distribution.
#
# 3. All advertising materials mentioning features or use of this
#    software must display the following acknowledgment:
#    "This product includes software developed by the Apache Group
#    for use in the Apache HTTP server project (http://www.apache.org/)."
#
# 4. The names "Apache Server" and "Apache Group" must not be used to
#    endorse or promote products derived from this software without
#    prior written permission. For written permission, please contact
#    apache@apache.org.
#
# 5. Products derived from this software may not be called "Apache"
#    nor may "Apache" appear in their names without prior written
#    permission of the Apache Group.
#
# 6. Redistributions of any form whatsoever must retain the following
#    acknowledgment:
#    "This product includes software developed by the Apache Group
#    for use in the Apache HTTP server project (http://www.apache.org/)."
#
# THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE APACHE GROUP OR
# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# ====================================================================
#
# This software consists of voluntary contributions made by many
# individuals on behalf of the Apache Group and was originally based
# on public domain software written at the National Center for
# Supercomputing Applications, University of Illinois, Urbana-Champaign.
# For more information on the Apache Group and the Apache HTTP server
# project, please see <http://www.apache.org/>.
#
#
# cronosplit -- split log files into cronolog-compatible logs
#
# Copyright (c) 1996-1999 by Ford & Mason Ltd
#
# This software was submitted by Ford & Mason Ltd to the Apache
# Software Foundation in December 1999.  Future revisions and
# derivatives of this source code must acknowledge Ford & Mason Ltd
# as the original contributor of this module.  All other licensing
# and usage conditions are those of the Apache Software Foundation.
#
# cronosplit is loosly based on the splitlog script by 
# Roy Fielding <fielding@ics.uci.edu>
# (splitlog is part of the wwwstat package,
#  see <http://www.ics.uci.edu/pub/websoft/wwwstat/>)

require 5.8.0;

use Getopt::Long;
use Time::Local;
use POSIX qw(strftime);

use strict;

my $program    = 'cronosplit';
my $version    = '1.6.2';

# Programs

my $ZCAT = '/bin/zcat';
my $BZCAT = '/usr/bin/bzcat';

# Parameters

my $MaxHandles = 50;
my $DirMode    = 0775;

# Patterns for log file entries (Common Log Format) and timestamps

my $access_log_entry_pattern = "^(\\S+) (\\S+) ([^[]+) \\[([^]]*)] \"([^\"]*)\" (\\S+) (\\S+)(.*)";
my $access_timestamp_pattern = "^([ 0-3]?\\d)/([A-Za-z]+)/(\\d{4}):(\\d\\d):(\\d\\d):(\\d\\d) [+ -]\\d{1,4}";

my $error_log_entry_pattern = "^\\[([^]]*)] \\[([^]]*)] (\\S+)(.*)";
my $error_timestamp_pattern = "^[A-Z][a-z]{2} ([A-Z][a-z]{2}) ([0-3]?\\d) (\\d\\d):(\\d\\d):(\\d\\d) (\\d{4})";

# An associative array of month names and abbreviations

my %month = (Jan => 1,  January   => 1,
	  Feb => 2,  February  => 2,
	  Mar => 3,  March     => 3,
	  Apr => 4,  April     => 4,
	  May => 5,
	  Jun => 6,  June      => 6,
	  Jul => 7,  July      => 7,
	  Aug => 8,  August    => 8,
	  Sep => 9,  September => 9,  Sept => 9,
	  Oct => 10, October   => 10,
	  Nov => 11, November  => 11,
	  Dec => 12, December  => 12);


# Variables

my $file;
my $outfile;
my $template;
my $debug;
my $verbose;
my $utime;
my $PrintInvalids;
my $print_help;
my $print_version;
my $handle;

my %OpenHandles = ();
my @HandlesInUse = ();
my %utimes = ();

# Process options

(GetOptions("template=s",	\$template,
	    "print-invalid",	\$PrintInvalids,
	    "utime",		\$utime,
	    "debug",            \$debug,
	    "verbose",          \$verbose,
	    "help",             \$print_help,
	    "version",          \$print_version)
 and ($print_help or $print_version or $template))
    or $print_help++;

$verbose++ if $debug;		# --debug implies --verbose


# If version number requested, print it and exit

if ($print_version)
{
    print <<EOF;
$program version $version

Copyright (C) 1997-1998 Ford & Mason Ltd
This is free software; see the source for copying conditions.
There is NO warranty; not even for MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE.

Written by Andrew Ford <A.Ford\@ford-mason.co.uk>

$program is part of the cronolog package.
The latest version of which can be found at:

    http://www.ford-mason.co.uk/resources/cronolog/
EOF
    exit(0);
}


# If help requested, print it and exit

if ($print_help)
{
    print <<EOS;
Usage: $program --template=TEMPLATE [OPTIONS] file ...

  --template=TEMPLATE   output log-file template
  --print-invalid       print invalid log-file entries
  --utime               update mtime of output files to last entry
  --help                print this help, then exit
  --version             print version number, then exit
EOS
    exit(0);
}



# Process each input file specified

push(@ARGV, "-") unless @ARGV;

 FILE:
foreach $file (@ARGV) {
    if ($file eq "-") {
	print STDERR "reading from STDIN\n" if $verbose;
	*INFILE = *STDIN;
    }
    elsif ($file =~ /\.gz$/) {
	print STDERR "opening \"$file\"\n" if $verbose;
	
	if (!open(INFILE, "-|", $ZCAT, $file)) {
	    print STDERR "cannot open \"$file\"\n";
	    next FILE;
	}
    }
    elsif ($file =~ /\.bz2$/) {
	if (! -x $BZCAT)
	{
	    print STDERR "Cannot process \"$file\", install bzip2 first.\n";
	    next FILE;
	}
	print STDERR "opening \"$file\"\n" if $verbose;

	if (!open(INFILE, "-|", $BZCAT, $file)) {
	    print STDERR "cannot open \"$file\"\n";
	    next FILE;
	}
    }
    else {
	print STDERR "opening \"$file\"\n" if $verbose;

	if (! open(INFILE, "<$file")) {
	    print STDERR "cannot open \"$file\"\n";
	    next FILE;
	}
    }


    # For each line in the current input log file parse the line,
    # determine the appropritate output log and write the line.

  LINE:
    while (<INFILE>)
    {
	my ($timestamp, $time_t);
	if (/$access_log_entry_pattern/)
	{
	    $timestamp = $4;
	    if ($timestamp =~ /$access_timestamp_pattern/)
	    {
		# 1:day 2:month 3:year 4:hour 5:minute 6:second
		$time_t = eval { timelocal($6, $5, $4, $1, $month{$2} - 1, $3 - 1900) };
	    }
	}
	elsif (/$error_log_entry_pattern/)
	{
	    $timestamp = $1;
	    if ($timestamp =~ /$error_timestamp_pattern/)
	    {
		# 1:month 2:day 3:hour 4:minute 5:second 6:year
		$time_t = eval { timelocal($5, $4, $3, $2, $month{$1} - 1, $6 - 1900) };
	    }
	}
        if (!defined ($time_t))
        {
            if ($PrintInvalids) { print STDERR "$.:$_"; }
            next LINE;
        }

        next LINE unless defined($outfile = &get_handle($template, $time_t));

        print($outfile $_);

    }
    close(INFILE);
}

# close any open files
foreach my $oldkey (@HandlesInUse)
{
    $handle = $OpenHandles{$oldkey};
     close $handle;
    if ($utime)
    {
	my $mtime = $utimes{$oldkey};
	utime $mtime, $mtime, $oldkey;
    }
}

# Get a file handle for a log file, closing the oldest handle if there
# are too many handles open

sub get_handle
{
    my($template, $time_t) = @_;


    # Determine the filename from the template and time

    my($file) = $template;
    my(@time) = localtime($time_t);
    $file =~ s/(%[a-zA-Z%])/strftime($1, @time)/eg;

    if ($utime)
    {
	if (!defined ($utimes{$file}) || $time_t > $utimes{$file})
	{
	    $utimes{$file} = $time_t;
	}
    }

    # See if we already have it open and ready to write
    
    return $handle if defined($handle = $OpenHandles{$file});

    # See if we already have too many files opened

    if (($#HandlesInUse + 1) >= $MaxHandles)
    {
        my ($oldkey) = shift @HandlesInUse;   # close the oldest
        $handle = $OpenHandles{$oldkey};
        close $handle;
	if ($utime)
	{
	    my $mtime = $utimes{$oldkey};
	    utime $mtime, $mtime, $oldkey;
	}
        delete $OpenHandles{$oldkey};
    }

    # Finally, try to open and remember a new handle for this pathkey

    undef $handle;

    make_dirs($file);

    if (open($handle, ">>$file"))
    {
        push(@HandlesInUse, $file);
        $OpenHandles{$file} = $handle;
        return $handle;
    }
    else
    {
        warn "Failed open of $file: $!\n";
        return undef;
    }
}


# Make any missing directories on the path specified
# (this subroutine is not optimal).

sub make_dirs
{
    my($path) = shift;
    my($abs);
    
    # Strip off the filename bit

    $path =~ s!/[^/]*$!! or return;


    # Return early if the directory exists

    return if -d $path;


    # Trim off any leading '/' (remembering whether the path was
    # absolute or relative)

    $path =~ s!^(/)!!;
    $abs  = $1;


    # Split what's left into directories

    my(@path) = split(/\//, $path);


    # Check each directory on the path

    foreach my $i  (0 .. $#path)
    {
	$path = $abs . join("/", (@path[0 .. $i]));
	print(STDERR "Testing $path\n") if $debug;
	print(STDERR "Making  $path\n") if !-d $path and $debug;
	mkdir($path, $DirMode) unless -d $path;
    }
}
