package PPPUsage::Parse;

# provides:
#
# parse ($datafile, @logfiles)

use strict;
use DB_File;
use POSIX qw(tmpnam);
use Exporter;

use vars qw(@EXPORT @ISA);

@ISA    = qw(Exporter);
@EXPORT = qw(parse);

# -----> parse ($datafile, @logfiles)
#
# Parses @logfiles and saves relevant PPP data in a Berkeley DB Hash to
# $datafile. Returns 0 on success.
#
sub parse {

	my ($datafile,@ppplogs) = @_;
	my ($i,$y,$old_m,$action,$ignore,$con_sec);

	my $me = $main::me;

	my %mmname = ( 'Jan' => "01",
	               'Feb' => "02",
	               'Mar' => "03",
	               'Apr' => "04",
	               'May' => "05",
	               'Jun' => "06",
	               'Jul' => "07",
	               'Aug' => "08",
	               'Sep' => "09",
	               'Oct' => "10",
	               'Nov' => "11",
	               'Dec' => "12" );

	# Note: The following RegEx's will work for Linux pppd and *BSD
	# user-ppp log formats respectively. If you adjust the RegEx
	# appropriately, pppusage should work with any ppp[d] that logs
	# the date (month and day) and transfer volume (in and out).

	my $regex = $main::linux
	          ? '([A-Za-z]+)\s+(\d+)\s+(\d+):(\d+):(\d+)\s+.*pppd
	             \[\d+\]:\ Sent\ (\d+)\ bytes,\ received\ (\d+)\ bytes\.'
	          : '([A-Za-z]+)\s+(\d+)\s+(\d+):(\d+):(\d+)\s+.*ppp
	             \[\d+\]:\ tun\d:\ Phase:\ deflink:\ Connect\ time:
	             \ (\d+)\ secs:\ (\d+)\ octets\ in,\ (\d+)\ octets\ out';

	my $limin = '[A-Za-z]+\s+\d+\s+\d+:\d+:\d+\s+.*pppd
	             \[\d+\]:\ Connect\ time\ (\d+\.\d)\ minutes\.';


	$i = $y = $ignore = 0;

	my %DB;

	# tie %DB to a Berkeley DB HASH in $datafile

	tie(%DB, 'DB_File', $datafile, O_CREAT|O_RDWR, 0644, $DB_HASH)
		or die "$me: Can't tie Hash \"\%DB\" to $datafile (try `-c'): $!\n";

	$action = defined $DB{'connects'} ? "updating" : "populating";

	# Note: If we're updating, we assume that all new entries are
	# actually newer than the entries we got, thus we can set $i, $y
	# and $old_m to the old values. If our assumption is wrong, we
	# will produce chaos ;-) Bad.

	if ($action eq "updating") {

		$i = $DB{'connects'};
		$y = $DB{'years'};
		$old_m = $1 if $DB{$i-1} =~ /^\d+,(\d+),/;
	}

	print "$action database in $datafile:\n\n";

	# sort logfiles by mtime (oldest files first)

	@ppplogs = sort {(stat($a))[9] cmp (stat($b))[9]} @ppplogs;

	foreach my $logsrc (@ppplogs) {

		my $ppplog;

		if ($logsrc =~ /\.gz$/) {

			$ppplog = tmpnam();              # unsafe, bad
			`$main::gzip -cd $logsrc > $ppplog`;
				die "$me: Can't gunzip $logsrc\n" if ($? >> 8);

		} else {

			$ppplog = $logsrc;
		}

		open(LOG, "<$ppplog") or die "$me: Can't open $ppplog: $!\n";
		flock(LOG, 1)         or die "$me: Can't lock $ppplog: $!\n";

		$| = 1;
		print "\t- parsing $logsrc... ";
		$| = 0;

		# main write loop

		ENTRY: while (<LOG>) {

			$con_sec = $1*60 if ($main::linux && $_=~/$limin/x);

			if ($_ =~ /$regex/x) {

				my ($tm,$td,$ts,$ti,$to);

				if ($ignore > 0) {
					$ignore--;
					next ENTRY;
				}

				if ($main::linux) {         # Linux

					die "$me: Can't parse $ppplog\n"
						if (not defined $con_sec);

					$tm = $mmname{$1};  # month
					$td = $2;           # day
					$ts = $con_sec;     # connected seconds
					$ti = $7;           # octets in
					$to = $6;           # octets out

				} else {                    # BSD

					$tm = $mmname{$1};  # month
					$td = $2;           # day
					$ts = $6;           # connected seconds
					$ti = $7;           # octets in
					$to = $8;           # octets out
				}

				# if we are updating the database: find out whether
				# we already have the current connection in database,
				# and if not, start populating %DB with the new
				# connections

				if ($action eq "updating") {

					my ($key,$val);
					$action = "populating";

				# this can of course take a long time especially if
				# both our database _and_ the logfiles are big, but
				# hopefully the logfiles get rotated ;-)

					while (($key, $val) = each %DB) {

						if ($val =~ /$tm,$td,$ts,$ti,$to$/) {
							$ignore = ($i - 1) - $key;
							next ENTRY;
						}
					}
				}

				# this is the actually saved data

				$DB{$i} = "$y,$tm,$td,$ts,$ti,$to";

				if ($old_m) {

					if ($tm<$old_m) {

				# if the current month is "smaller" than the old
				# month, happy new year! this will fail if the user
				# had no connects in more than twelve months, but
				# there is nothing we can do about that

						$DB{"$y-end"}        = $i-1;
						$DB{"$y-$old_m-end"} = $i-1;
						$DB{++$y.'-start'}   = $i;
						$DB{"$y-$tm-start"}  = $i;

				# we must rewrite the current data because ++$y

						$DB{$i} = "$y,$tm,$td,$ts,$ti,$to";

					} elsif ($tm>$old_m) {

				# if the current month is "larger" than the old
				# month, we must write that up

						$DB{"$y-$old_m-end"} = $i-1;
						$DB{"$y-$tm-start"}  = $i;
					}

				} else {

				# in this case we're doing our very first entry, so
				# we must set year- and year-month-start to $i

					$DB{"$y-start"}     = $i;
					$DB{"$y-$tm-start"} = $i;
				}

				# next turn this month will be $old_m

				$old_m = $tm;
				$i++;
			}
		}

		close (LOG);

		# delete temporary file (we should also use a END{} block,
		# so that these files are deleted in case we die)

		(unlink($ppplog) or die "$me: Can't unlink $ppplog: $!\n")
			if ($ppplog ne $logsrc);

		print "done.\n";
	}

	print "\n$i PPP connections in database.\n";

	if ($i > 0 && $action eq "populating") {

		$DB{"$y-$old_m-end"} = $i-1;
		$DB{"$y-end"}        = $i-1;
		$DB{'connects'}      = $i;
		$DB{'years'}         = $y;
		$DB{'thisyear'}      = (localtime((stat $ppplogs[$#ppplogs])[9]))[5] + 1900;
	}

	untie %DB;
	return 0;
}

1;
