#!/usr/bin/perl
# gen-indices generates bug index files, and is released
# under the terms of the GPL version 2, or any later version, at your
# option. See the file README and COPYING for more information.

# Copyright (c) 2005/08/03 Anthony Towns
# Copyright 2007, 2008 by Don Armstrong <don@donarmstrong.com>.

use warnings;
use strict;

use DB_File;
use MLDBM qw(DB_FILE Storable);
use Fcntl qw/O_RDWR O_CREAT O_TRUNC/;
use File::Copy;

use Getopt::Long;
use Pod::Usage;

use File::stat;
use List::AllUtils qw(min);

use Debbugs::Common qw(make_list);

=head1 NAME

gen-indices - Generates index files for the cgi scripts

=head1 SYNOPSIS

 gen-indices [options]

 Options:
  --index-path path to index location
  --quick update changed bugs
  --debug, -d debugging level (Default 0)
  --help, -h display this help
  --man, -m display manual

=head1 OPTIONS

=over

=item B<--quick>

Only update changed bugs

=item B<--debug, -d>

Debug verbosity. (Default 0)

=item B<--help, -h>

Display brief useage information.

=item B<--man, -m>

Display this manual.

=back

=head1 EXAMPLES


=cut

# Use portable Storable images
$MLDBM::DumpMeth=q(portable);


my %options = (debug           => 0,
	       help            => 0,
	       man             => 0,
	       quick           => 0,
	       index_path      => undef,
	       );

GetOptions(\%options,'quick!','index_path|index-path=s','debug|d+','help|h|?','man|m') or pod2usage(2);
pod2usage(1) if $options{help};
pod2usage(-verbose=>2) if $options{man};

use Debbugs::Config qw(:config);
use Debbugs::Common qw(getparsedaddrs getbugcomponent lockpid);
use Debbugs::Status qw(readbug split_status_fields);
use Debbugs::Log;
use Debbugs::UTF8 qw(encode_utf8_structure);

chdir($config{spool_dir}) or die "chdir $config{spool_dir} failed: $!";

my $verbose = $options{debug};
my $indexdest = $options{index_path} || $config{spool_dir};

my $initialdir = "db-h";
my $suffix = "";

if (defined $ARGV[0] and $ARGV[0] eq "archive") {
    $initialdir = "archive";
    $suffix = "-arc";
}

if (not lockpid($config{spool_dir}.'/lock/gen-indices')) {
     if ($options{quick}) {
	  # If this is a quick run, just exit
	  print STDERR "Another gen-indices is running; stopping\n" if $verbose;
	  exit 0;
     }
     print STDERR "Another gen-indices is running; stopping\n";
     exit 1;
}

# NB: The reverse index is special; it's used to clean up during updates to bugs
my @indexes = ('package', 'tag', 'severity','owner','submitter-email','status','correspondent','affects','reverse');
my $indexes;
my %slow_index = ();
my %fast_index = ();
if (not $options{quick}) {
     # We'll trade memory for speed here if we're not doing a quick rebuild
     for my $indexes (@indexes) {
	  $fast_index{$indexes} = {};
     }
     $indexes = \%fast_index;
}
else {
     $indexes = \%slow_index;
}
my $time = undef;
my $start_time = time;
for my $i (@indexes) {
	$slow_index{$i} = {};
	if ($options{quick}) {
	     if (-e "$indexdest/by-$i${suffix}.idx") {
		  system('cp','-a',"$indexdest/by-$i${suffix}.idx","$indexdest/by-$i${suffix}.idx.new") == 0
		       or die "Error creating the new index";
		  my $stat = stat("$indexdest/by-$i${suffix}.idx") or die "Unable to stat $indexdest/by-$i${suffix}.idx";
		  $time = defined $time ? min($time,$stat->mtime) : $stat->mtime;
	     }
	     tie %{$slow_index{$i}}, MLDBM => "$indexdest/by-$i$suffix.idx.new",
		  O_RDWR|O_CREAT, 0666
		       or die "$0: can't create by-$i$suffix-idx.new: $!";
	}
	else {
	     tie %{$slow_index{$i}}, MLDBM => "$indexdest/by-$i$suffix.idx.new",
		  O_RDWR|O_CREAT|O_TRUNC, 0666
		       or die "$0: can't create by-$i$suffix-idx.new: $!";

	}
	$time = 0 if not defined $time;
}

sub addbugtoindex {
     my ($index, $bug, @values) = @_;

     if (exists $indexes->{reverse}{"$index $bug"}) {
	  # We do this insanity to work around a "feature" in MLDBM
	  for my $key (@{$indexes->{reverse}{"$index $bug"}}) {
	       my $temp = $indexes->{$index}{$key};
	       delete $temp->{$bug};
	       $indexes->{$index}{$key} = $temp;
	       $indexes->{$index}{"count $key"}--;
	  }
	  delete $indexes->{reverse}{"$index $bug"};
     }
     for my $key (@values) {
	  $indexes->{$index}->{"count $key"}++;
	  # We do this insanity to work around a "feature" in MLDBM
	  my $temp = $indexes->{$index}->{$key};
	  $temp->{$bug} = 1;
	  $indexes->{$index}->{$key} = $temp;
     }
     $indexes->{reverse}{"$index $bug"} = [@values];
}

sub emailfromrfc822 {
	my $email = shift;
	$email =~ s/\s*\(.*\)\s*//;
	$email = $1 if ($email =~ m/<(.*)>/);
	return $email;
}

my $modification_made = 0;
my $cnt = 0;

my @dirs = ($initialdir);
while (my $dir = shift @dirs) {
	printf "Doing dir %s ...\n", $dir if $verbose;

	opendir(DIR, "$dir/.") or die "opendir $dir: $!";
	my @subdirs = readdir(DIR);
	closedir(DIR);

	my @list = map { m/^(\d+)\.summary$/?($1):() } @subdirs;
	push @dirs, map { m/^(\d+)$/ && -d "$dir/$1"?("$dir/$1"):() } @subdirs;

	for my $bug (@list) {
		print "Up to $cnt bugs...\n" if (++$cnt % 100 == 0 && $verbose);
		my $stat = stat(getbugcomponent($bug,'summary',$initialdir));
		if (not defined $stat) {
		     print STDERR "Unable to stat $bug $!\n";
		     next;
		}
		next if $stat->mtime < $time;
		my ($fdata) = encode_utf8_structure(split_status_fields(readbug($bug, $initialdir)));
		$modification_made = 1;
		addbugtoindex("package", $bug, make_list($fdata->{package}));
		addbugtoindex("tag", $bug, make_list($fdata->{keywords}));
		addbugtoindex("affects", $bug, make_list($fdata->{"affects"}));
		addbugtoindex('submitter-email', $bug,
			      map {lc($_->address)} getparsedaddrs($fdata->{originator}));
		addbugtoindex("severity", $bug, $fdata->{"severity"});
		addbugtoindex("owner", $bug,
			      map {lc($_->address)} getparsedaddrs($fdata->{"owner"}));
		# handle log entries
		# do this in eval to avoid exploding on jacked logs
		eval {
		    my $log = Debbugs::Log->new(bug_num => $bug);
		    my @correspondents;
		    while (my $record = $log->read_record()) {
			next unless $record->{type} eq 'incoming-recv';
			# we use a regex here, because a full mime parse will be slow.
			my ($from) = $record->{text} =~ /^From:\s+(.+?)^\S/ism;
			push @correspondents, map {lc($_->address)} getparsedaddrs($from);
		    }
		    addbugtoindex('correspondent',$bug,@correspondents) if @correspondents;
		};
		if ($@) {
		     print STDERR "Problem dealing with log of $bug: $@";
		}
	   }
}

if (not $options{quick}) {
     # put the fast index into the slow index
     for my $key1 (keys %fast_index) {
	  for my $key2 (keys %{$fast_index{$key1}}) {
	       $slow_index{$key1}{$key2} = $fast_index{$key1}{$key2};
	  }
	  print "Dealt with index $key1\n" if $verbose;
     }
}

for my $i (@indexes) {
    untie %{$slow_index{$i}};
    # Only move if we've made changes, otherwise unlink
    if ($modification_made) {
	move("$indexdest/by-$i$suffix.idx.new", "$indexdest/by-$i$suffix.idx");
	# We do this, because old versions of touch don't support -d '@epoch'
	system('touch','-d',"1/1/1970 UTC + ${start_time}secs","$indexdest/by-$i$suffix.idx");
    }
    else {
	unlink("$indexdest/by-$i$suffix.idx.new");
    }
}

unlink($config{spool_dir}.'/lock/gen-indices')
