#! /usr/bin/perl -w
##**************************************************************
##
## Copyright (C) 1990-2007, Condor Team, Computer Sciences Department,
## University of Wisconsin-Madison, WI.
## 
## Licensed under the Apache License, Version 2.0 (the "License"); you
## may not use this file except in compliance with the License.  You may
## obtain a copy of the License at
## 
##    http://www.apache.org/licenses/LICENSE-2.0
## 
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
##**************************************************************

use strict;

# Update the module include path
BEGIN
{
    my $Dir = $0;
    if ( $Dir =~ /(.*)\/.*/ )
    {
	push @INC, "$1";
    }
}
use HawkeyePublish;
use HawkeyeLib;

# Prototypes
sub Init( );
sub RunIt( );
sub RunAbsent( );
sub RunTotal( $$ );
sub RunSubmit(  );


# Setup the hawkeye stuff
my $Hawkeye;
my $Hash;

# Config info
my %Directories = (
		   Base => "",
		   Global => "",
		   Local => "",
		  );

my %Config = (
	      MaxAge => 24 * 60 * 60,
	      MaxSize => 500 * 1024 * 1024,
	     );

# Do it
Init();
RunIt();

sub Init()
{
    HawkeyeLib::DoConfig( );

    $Hawkeye = HawkeyePublish->new;
    $Hawkeye->Quiet( 1 );
    $Hawkeye->AutoIndexSet( 0 );

    # Get the GASS cache base directory..
    $Directories{Base} = HawkeyeLib::ReadConfig( "_directory", "" );
    if ( $Directories{Base} eq "" && -d $ENV{HOME} . "/.globus/.gass_cache" )
    {
	$Directories{Base} = $ENV{HOME} . "/.globus/.gass_cache";
    }

    # Is this a valid Gass Cache?
    if ( $Directories{Base} eq "" )
    {
	print STDERR "No cache directory specified & ".
	    "\$HOME/.globus/.gass_cache doesn't exit\n";
	exit 1;
    }
    elsif ( ! -d $Directories{Base} )
    {
	print STDERR "Cache directory '$Directories{Base}' is invalid\n";
	exit 1;
    }
    $Directories{Global} = $Directories{Base} . "/global";
    $Directories{Local} = $Directories{Base} . "/local";
    if (  ( ! -d $Directories{Local} ) || ( ! -d $Directories{Global} )  )
    {
	print STDERR
	    "Cache directory '$Directories{Base}' does not appear to\n" .
	    "be a valid GASS cache\n";
	exit 1;
    }

    # Max age
    my $Tmp = HawkeyeLib::ReadConfig( "_maxage", "" );
    $Config{MaxAge} = HawkeyeLib::ParseSeconds( $Tmp, $Config{MaxAge} );

    # Max size
    $Tmp = HawkeyeLib::ReadConfig( "_maxsize", "" );
    $Config{MaxSize} = HawkeyeLib::ParseBytes( $Tmp, $Config{MaxSize} );

    # Finally, parse the command line...
    foreach my $Arg ( @ARGV )
    {
	# No args at current...
	if ( $Arg =~ /^-this-option-is-bogus$/ )
	{
	    # Do nothing
	}
	else
	{
	    print STDERR "Unknown option '$Arg'\n";
	    print STDERR "Usage: gass_cache [name [options]]\n";
	    print STDERR
		"  [name]         \tModules logical name\n" .
		"  {No options]   \t\n";
	    exit 1;
	}
    }
}

# Cache data file info...
my @DataFiles;
my @EmptyDirs;
my %DataInodes;
my %TagInodes;
my %UrlInodes;
my %AllInodes;

sub WarnExit( $ )
{
    my $Cmd = shift;

    if ( $! == 0 )
    {
	warn "$Cmd: Exit status $?";
    }
    else
    {
	warn "$Cmd: Error closing pipe: $!";
    }
}

# Do the real work here...
sub RunIt()
{

    # Start things off
    $Hash = HawkeyeHash->new( \$Hawkeye, "" );

    # Make the cache our working directory..
    chdir $Directories{Base} || die "Can't chdir to $Directories{Base}\n";

    # Scan for data files...
    ScanFiles( );

    # Check for orphanned global files...
    CheckOrphans( );
    CacheUsage( );
    CheckAge( );
    CheckSize( );

    # Add the cache directory...
    $Hash->Add( "Directory", HawkeyePublish::TypeString, $Directories{Base} );

    # Ok, summary is done...
    $Hash->Store( );
    $Hawkeye->Publish( );
}

# Scan for data files, collect info on 'em
sub ScanFiles( )
{
    my $Cmd = "find . -printf '%h %f %i %n %s %b %A@ %C@ %T@\\n' ";
    if ( ! open( FIND, "$Cmd|" ) )
    {
	print STDERR "Can't run '$Cmd'\n";
	exit 1;
    }
    while( <FIND> )
    {
	my @Results = split;
	
	# Split it all up into fields...
	my %File;
	$File{Directory} = shift @Results;
	$File{Name} = shift @Results;
	$File{Inode} = shift @Results;
	$File{Links} = shift @Results;
	$File{Bytes} = shift @Results;
	$File{Blocks} = shift @Results;
	$File{Access} = shift @Results;
	$File{Change} = shift @Results;
	$File{Modify} = shift @Results;

	# Add it into the "all count" hash
	if ( ! exists $AllInodes{$File{Inode}} )
	{
	    $AllInodes{$File{Inode}} = 1;
	}
	else
	{
	    $AllInodes{$File{Inode}} = 0;
	}
	my $FullFile = $File{Directory} . "/" . $File{Name};

	# Skip over directories...
	if ( -d $FullFile )
	{
	    next if ( $File{Directory} eq "." );
	    if ( opendir( DIR, $FullFile)  )
	    {
		 my $n = grep( ! /(^\.$)|(^\.\.$)/, readdir( DIR ) );
		 push( @EmptyDirs, $FullFile) if ( 0 == $n );
		closedir( DIR );
	    }
	    next;
	}

	# What type of file is it (based on it's name)
	$File{IsGlobal} = $File{Directory} =~ /global/;
	$File{IsLocal} = $File{Directory} =~ /global/;
	$File{IsData} = $File{Name} =~ /^data/;
	$File{IsTag} = $File{Name} =~ /^tag$/;
	$File{IsUrl} = $File{Name} =~ /^url$/;

	# Data file?
	if ( $File{IsData} )
	{
	    push @DataFiles, \%File;
	    push @{$DataInodes{$File{Inode}}}, $#DataFiles;
	}

	# Tag file handling
	elsif ( $File{IsTag} )
	{
	    if ( exists $TagInodes{$File{Inode}} )
	    {
		$TagInodes{$File{Inode}}++;
	    }
	    else
	    {
		$TagInodes{$File{Inode}} = 1;
	    }
	}

	# URL file handling
	elsif ( $File{IsUrl} )
	{
	    if ( exists $UrlInodes{$File{Inode}} )
	    {
		$UrlInodes{$File{Inode}}++;
	    }
	    else
	    {
		$UrlInodes{$File{Inode}} = 1;
	    }
	}
    }
    close( FIND ) or WarnExit( $Cmd );
}

# Find orphaned files here...
sub CheckOrphans()
{

    # Walk through all of the files...
    my $Count = 0;
    my $TotalBytes = 0;
    my $TotalBlocks = 0;
    foreach my $File ( @DataFiles )
    {
	if (  ( $File->{IsGlobal} ) && ( 1 == $File->{Links} )  )
	{
	    $Count++;
	    $TotalBytes += $File->{Bytes};
	    $TotalBlocks += $File->{Blocks};
	}
    }

    # Report it all..
    $Hash->Add( "GlobalOrphanCount", HawkeyePublish::TypeNumber, $Count );
    $Hash->Add( "GlobalOrphanBytes", HawkeyePublish::TypeNumber, $TotalBytes );
    $Hash->Add( "GlobalOrphanBlocks", HawkeyePublish::TypeNumber, $TotalBlocks );
}


# Total cache usage stats..
sub CacheUsage( )
{
    # Total data file usage..

    # Walk through all of the files...
    my $Count = 0;
    my $TotalBytes = 0;
    my $TotalBlocks = 0;
    my $MaxBytes = 0;
    foreach my $File ( @DataFiles )
    {
	if ( $File->{IsGlobal} )
	{
	    $Count++;
	    $TotalBytes += $File->{Bytes};
	    $TotalBlocks += $File->{Blocks};
	    $MaxBytes = $File->{Bytes} if ( $File->{Bytes} > $MaxBytes );
	}
    }

    # Count the total inodes & empty dirs
    my $TotalInodes = scalar keys %AllInodes;
    my $EmptyDirs = scalar @EmptyDirs;

    # Report it all..
    $Hash->Add( "GlobalDataCount", HawkeyePublish::TypeNumber, $Count );
    $Hash->Add( "GlobalDataBytes", HawkeyePublish::TypeNumber, $TotalBytes );
    $Hash->Add( "GlobalDataBlocks", HawkeyePublish::TypeNumber, $TotalBlocks );
    $Hash->Add( "TotalInodes", HawkeyePublish::TypeNumber, $TotalInodes );
    $Hash->Add( "EmptyDirs", HawkeyePublish::TypeNumber, $EmptyDirs );

    my $MeanBytes = 0;
    $MeanBytes = sprintf "%.1f", ( $TotalBytes / $Count ) if ( $Count );
    $Hash->Add( "GlobalDataBytesMean", HawkeyePublish::TypeNumber, $MeanBytes );
    $Hash->Add( "GlobalDataBytesMax", HawkeyePublish::TypeNumber, $MaxBytes );

    # Tag & URL counts
    my $TagCount = keys %TagInodes;
    my $UrlCount = keys %UrlInodes;
    $Hash->Add( "TagCount", HawkeyePublish::TypeNumber, $TagCount );
    $Hash->Add( "UrlCount", HawkeyePublish::TypeNumber, $UrlCount );

    # Run 'du'...
    my $Cmd = "du -s global local";
    if ( ! open( DU, "$Cmd|" ) )
    {
	print STDERR "Can't run '$Cmd'\n";
	return;
    }
    my %Du;
    while( <DU> )
    {
	chomp;
	if ( /^(\d+)\s(\S+)/ )
	{
	    $Du{$2} = $1;
	}
    }
    close( DU ) or WarnExit( $Cmd );
    my $TotalDu = 0;
    foreach my $Dir ( keys %Du )
    {
	$Hash->Add( "Du_" . $Dir, HawkeyePublish::TypeNumber, $Du{$Dir} );
	$TotalDu += $Du{$Dir};
    }
    $Hash->Add( "Du", HawkeyePublish::TypeNumber, $TotalDu );

    # DF
    $Cmd = "df -P .";
    if ( ! open( DF, "$Cmd|" ) )
    {
	print STDERR "Can't run '$Cmd'\n";
	return;
    }
    while( <DF> )
    {
	chomp;
	if ( ! /^Filesystem/ )
	{
	    my ( $Dev, $Blocks, $Used, $Avail, $UsePct, $Mount ) = split;
	    $UsePct =~ s/(\d+)\%/$1/;
	    $Hash->Add( "Disk_FileSystem", HawkeyePublish::TypeString, $Dev );
	    $Hash->Add( "Disk_1k_Blocks", HawkeyePublish::TypeNumber, $Blocks );
	    $Hash->Add( "Disk_Used", HawkeyePublish::TypeNumber, $Used );
	    $Hash->Add( "Disk_Avail", HawkeyePublish::TypeNumber, $Avail );
	    $Hash->Add( "Disk_UsePct", HawkeyePublish::TypeNumber, $UsePct );
	    $Hash->Add( "Disk_MountPt", HawkeyePublish::TypeString, $Mount );
	}
    }
    close( DF ) or WarnExit( $Cmd );
}

# Check file ages
sub CheckAge(  )
{
    my $CurTime = time();

    my $MaxAge = 0;
    my $TotalAge = 0;

    my $OldCount = 0;
    my $OldTotalBytes = 0;
    my $OldTotalBlocks = 0;
    my $OldTotalAge = 0;
    my $OldOrphanCount = 0;

    my $Count = 0;
    foreach my $File ( @DataFiles )
    {
	my $FileTime = $File->{Modify};
	$FileTime = $File->{Change} if ( 0 == $FileTime );
	my $Age = $CurTime - $FileTime;

	# Some quick calcs.
	$Count++ if ( $File->{IsGlobal} );
	$TotalAge += $Age;
	$MaxAge = $Age if ( $Age > $MaxAge );

	# If it's an old global file, note it..
	if (  ( $File->{IsGlobal} ) && ( $Age > $Config{MaxAge} )  )
	{
	    $OldCount++;
	    $OldTotalAge += $Age;
	    $OldTotalBytes += $File->{Bytes};
	    $OldTotalBlocks += $File->{Blocks};
	    $OldOrphanCount++ if ( 1 == $File->{Links} );
	}
    }

    # Avoid divide by zeros...
    $Count = 1 if ( 0 == $Count );

    my $OldPct = 100.0 * $OldCount / $Count;

    $Hash->Add( "OldCount", HawkeyePublish::TypeNumber, $OldCount );
    $Hash->Add( "OldOrphanCount", HawkeyePublish::TypeNumber, $OldOrphanCount );
    $OldCount = 1 if ( 0 == $OldCount );

    # Store the results...
    $Hash->Add( "GlobalMeanAge", HawkeyePublish::TypeNumber,
		sprintf "%.1f", sprintf("%.1f", $TotalAge / $Count) );
    $Hash->Add( "OldMeanAge", HawkeyePublish::TypeNumber,
		sprintf "%.1f", sprintf("%1.f", $OldTotalAge / $OldCount) );
    $Hash->Add( "OldTotalBytes", HawkeyePublish::TypeNumber, $OldTotalBytes );
    $Hash->Add( "OldMeanBytes", HawkeyePublish::TypeNumber,
		sprintf "%.1f", sprintf("%1.f", $OldTotalBytes / $OldCount) );
    $Hash->Add( "OldTotalBlocks", HawkeyePublish::TypeNumber, $OldTotalBlocks );
    $Hash->Add( "OldMeanBlocks", HawkeyePublish::TypeNumber,
		sprintf "%.1f", sprintf("%1.f", $OldTotalBlocks / $OldCount) );
    $Hash->Add( "OldPct", HawkeyePublish::TypeNumber,
		sprintf( "%.1f", $OldPct) );

    # And, the old age threshold that we used...
    $Hash->Add( "OldThreshold", HawkeyePublish::TypeNumber, $Config{MaxAge} );
}

# Check file sizes
sub CheckSize(  )
{
    my $BigCount = 0;
    my $BigTotalBytes = 0;
    my $BigTotalBlocks = 0;
    my $Count = 0;

    foreach my $File ( @DataFiles )
    {
	my $Size = $File->{Bytes};

	$Count++ if ( $File->{IsGlobal} );

	# If it's an old global file, note it..
	if (  ( $File->{IsGlobal} ) && ( $Size > $Config{MaxSize} )  )
	{
	    $BigCount++;
	    $BigTotalBytes += $File->{Bytes};
	    $BigTotalBlocks += $File->{Blocks};
	}
    }

    # Avoid divide by zeros...
    $Count = 1 if ( 0 == $Count );

    my $BigPct = 100.0 * $BigCount / $Count;

    $Hash->Add( "BigCount", HawkeyePublish::TypeNumber, $BigCount );
    $BigCount = 1 if ( 0 == $BigCount );

    # Store the results...
    $Hash->Add( "BigTotalBytes", HawkeyePublish::TypeNumber,
		$BigTotalBytes );
    $Hash->Add( "BigMeanBytes", HawkeyePublish::TypeNumber,
		sprintf "%.1f", sprintf("%.1f", $BigTotalBytes / $BigCount) );
    $Hash->Add( "BigTotalBlocks", HawkeyePublish::TypeNumber,
		$BigTotalBlocks );
    $Hash->Add( "BigMeanBlocks", HawkeyePublish::TypeNumber,
		sprintf "%.1f", sprintf("%.1f", $BigTotalBlocks / $BigCount) );
    $Hash->Add( "BigPct", HawkeyePublish::TypeNumber,
		sprintf "%.1f", $BigPct );

    # And, the old age threshold that we used...
    $Hash->Add( "BigThreshold", HawkeyePublish::TypeNumber, $Config{MaxSize} );

}
