#! /usr/bin/perl -w
##**************************************************************
##
## Copyright (C) 1990-2007, Condor Team, Computer Sciences Department,
## University of Wisconsin-Madison, WI.
## 
## Licensed under the Apache License, Version 2.0 (the "License"); you
## may not use this file except in compliance with the License.  You may
## obtain a copy of the License at
## 
##    http://www.apache.org/licenses/LICENSE-2.0
## 
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
##**************************************************************

use strict;

=pod

=head1 downlist

A module for the Hawkeye monitoring and management tool for distributed
systems (http://www.cs.wisc.edu/condor/hawkeye/)

downlist notes which machines have reported in to Hawkeye.  It compares
this list to a list of machines which are expected and reports back
to Hawkeye which machines are missing and which machines appeared
unexpectedly.


=head2 Results

machines_expected - A space seperated list of the machines that downlist
expects to find actively reporting to Hawkeye.

machines_active - A space seperated list of all machines actively reporting
to Hawkeye.

machines_unexpected - A space seperated list of machines in machines_active,
but not in machines_expected.  This represents machines reporting to
Hawkeye, but that downlist is not configured to monitor.

machines_missing.  A space seperated list of machines in machines_expected,
but not in machines_active.  This represents machines which should be 
present, but are not reporting to Hawkeye.


=head2 Arguments

These can be placed in the hawkeye_config file.

_machines - Whitespace seperated list of machines to watch.  Defaults
to not watching any machines.  Similar to _machines_file.

_machines_file - Absolute path to file containing whitespace seperated list
of machines to watch.  Defaults to not watching any machines.  Similar to
_machines.

_hawkeye_status_cmd - Program to run to pull list of active machines.  Defaults
to "hawkeye_status -format '%s\\n' Machine".
=cut


# Update the module include path
BEGIN
{
	my $Dir = $0;
	if ( $Dir =~ /(.*)\/.*/ )
	{
		push @INC, "$1";
	}
}
use HawkeyePublish;
use HawkeyeLib;

# Setup the hawkeye stuff
my $hawkeye;


# Do it
Init();
RunIt();

sub Init {
	HawkeyeLib::DoConfig( );

	$hawkeye = HawkeyePublish->new;
	$hawkeye->Quiet( 1 );
	$hawkeye->AutoIndexSet( 1 );
}

sub RunIt {
	my $hash = HawkeyeHash->new( \$hawkeye, "" );

	my(@machines_expected) = (
	);

	my $hawkeye_status_cmd = HawkeyeLib::ReadConfig("_hawkeye_status_cmd",
		"hawkeye_status -format '%s\\n' Machine");

	{
		my $machine_list = HawkeyeLib::ReadConfig("_machines", "");
		$machine_list =~ s/^\s*//;
		push @machines_expected, split /\s+/, $machine_list;
	}

	{
		my $machine_file = HawkeyeLib::ReadConfig("_machines_file", "");
		if(length $machine_file) {
			local $/;
			open IN, $machine_file 
				or die "Hawkeye: $0: Failed to open $machine_file for reading".
					" because of $!.\n";
			my $machine_list = <IN>;
			close IN;
			$machine_list =~ s/^\s*//;
			push @machines_expected, split /\s+/, $machine_list;
		}
	}

	my %machines_active;
	my %machines_expected;

	if(scalar @machines_expected == 0 ) {
		print STDERR "Not configured to probe any machines. All ".
			"machines found will be listed as 'unexpected'.\n";
	}

	foreach my $machine (@machines_expected) {
		$machines_expected{$machine} = 0;
	}

	open IN, "$hawkeye_status_cmd |"
		or die "Unable to call $hawkeye_status_cmd ($!).\n";

	my $line;
	my @machines_unexpected;
	while(defined ($line = <IN>)) {
		chomp $line;
		next if $line =~ /^\s*$/; # Skip blank lines
		$machines_active{$line} = 0;
		if(not exists $machines_expected{$line}) {
			push @machines_unexpected, $line;
		}
	}
	close IN;

	if(scalar keys %machines_active == 0) {
		print STDERR "No active machines found.  There may have been an ".
			"error calling $hawkeye_status_cmd\n";
	}

	my @machines_missing;
	foreach my $machine (keys %machines_expected) {
		if(not exists $machines_active{$machine}) {
			push @machines_missing, $machine;
		}
	}

	my $machines_expected = join ' ', sort keys %machines_expected;
	my $machines_unexpected = join ' ', sort @machines_unexpected;
	my $machines_active = join ' ', sort keys %machines_active;
	my $machines_missing = join ' ', sort @machines_missing;
	
	$hash->Add('machines_expected', 's', $machines_expected);
	$hash->Add('machines_unexpected', 's', $machines_unexpected);
	$hash->Add('machines_active', 's', $machines_active);
	$hash->Add('machines_missing', 's', $machines_missing);

	# Ok, summary is done...
	$hash->Store( );
	$hawkeye->Publish( );
}
