#!/usr/bin/perl -w

=head1 NAME

dbinfo 0.8.0

=head1 SYNOPSIS

Scans a PennMUSH database for validity, and reports some information
about the database.

 % zcat outdb.Z | ./dbinfo.pl [-a] [-q]
 % gzcat outdb.gz | ./dbinfo.pl [-a] [-q]
 % bzcat outdb.bz2 | ./dbinfo.pl [-a] [-q]
 % ./dbinfo.pl [-a] [-q]  --panic < PANIC.db
 % ./dbinfo.pl [-a] [-q] [--panic] filename

=head2 OPTIONS

       -a: Display extra information about attributes.
       -q: Only report corrupt databases and warnings.
  --panic: Treat the database as a panic dump even if
           it doesn't look like one.

=head1 DESCRIPTION

Scans a PennMUSH database, and reports some information about the
db. If the file appears corrupt (A string where a number is expected,
etc.)  it will stop, reporting what is bad in which object.  If a
database file passes this script, it can almost certainly be read by
Penn itself. The script does no internal consistency checks like the
mush's dbcks, though.

If you give a filename on the command line, compressed databases are
detected and uncompressed automatically for scanning based on file
extension.

=head1 TO DO

=over 4

=item *

Mail and Chat database checking. Right now they're just ignored.

=item *

Old-style lock checking. Right now it just skips over these.

=back


=head1 AUTHOR

Raevnos E<lt>raevnos@pennmush.orgE<gt>

=cut

#require 5.6;
#$VERSION = v0.8.0;
use strict;
#use warnings "all";
use vars qw/$a $q $panic $flags $mail $chat/;
use Getopt::Long;
use subs qw/check_database check_mail check_chat/;
use IO::Handle;
use IO::File;

GetOptions("panic" => \$panic, "a" => \$a, "q" => \$q, "mail=s" => \$mail,
           "chat=s" => \$chat);

my %dbflags = ("NO_CHAT_SYSTEM" => 0x1,
               "WARNINGS" => 0x2,
               "CREATION_TIMES" => 0x4,
               "NO_POWERS" => 0x8,
               "NEW_LOCKS" => 0x10,
               "NEW_STRINGS" => 0x20,
               "TYPE_GARBAGE" => 0x40,
               "SPLIT_IMMORTAL" => 0x80,
               "NO_TEMPLE" => 0x100,
               "LESS_GARBAGE" => 0x200,
               "AF_VISUAL" => 0x400,
               "VALUE_IS_COST" => 0x800,
               "LINK_ANYWHERE" => 0x1000,
               "NO_STARTUP_FLAG" => 0x2000,
               "PANIC" => 0x4000,
               "AF_NODUMP" => 0x8000,
               "SPIFFY_LOCKS" => 0x10000);
$flags = 0;

my $IN = undef;

my $db = shift;

if ($db && $db ne "-") {
  if ($db =~ /\.(Z|gz|bz2)$/o) {
    my $type = $1;
    my $command;
    print "Compressed database detected. Using external pipe.\n" unless $q;
    if ($type eq "Z") {
      $command = "uncompress -c $db";
    } elsif ($type eq "gz") { # gzip
      $command = "gunzip -c $db";
    } elsif ($type eq "bz2") {
      $command = "bunzip2 -c $db";
    }
    print "Reading database with $command\n" unless $q;
    $IN = new IO::File "$command |";
  } else {
    $IN = new IO::File $db, "r";
  }
} else {
  my $io = new IO::Handle;
  $IN = $io->fdopen(fileno STDIN, "r");
}

die "Couldn't open database: $!\n" if !$IN;

check_database;
if (! $IN->eof) {
  if (!$panic) {
    print <<EOW;
Data present past end of database marker.
Did you forget --panic to force checking a panic dump?
EOW
  }
} elsif ($panic) {
  print <<'EOW';
--panic switch given or panic database auto-detected, but no data past the
end-of-database marker. Is this really a panic db, or do you have @mail and
@chat disabled?
EOW
}


sub get_string {
  my $char = $IN->getc;
  if (($flags & $dbflags{"NEW_STRINGS"}) && $char eq '"') {
    while (defined ($char = $IN->getc)) {
      last if $char eq '"';
      $IN->getc if $char eq '\\'; # Skip the next character.
    }
    return 0 if !defined $char;
    $char = $IN->getc;
    return 0 if !defined $char;
    if ($char ne "\n") {
      # This is stupid, but getstring_noalloc allows it...
      print "Quoted string not ending in a newline: $char\n";
      $IN->ungetc(ord $char);
    }
    return 1;
  } else { # To-do: Handle un-quoted old strings.
    $char = <$IN>;
    return 1;
  }
}

sub get_literal {
  my @lit = split //o, shift;
  
  foreach my $char (@lit) {
    my $read = $IN->getc;
    return 0 if !defined $read || $read ne $char;
  }
  my $read = $IN->getc;
  if (!defined $read || $read ne " ") {
    return 0;
  } else {
    return 1;
  }
}

sub is_labeled_num {
  return $_[0] =~ /^$_[1] \d+$/;
}

sub get_labeled_string {
  if (get_literal $_[0]) {
    return get_string;
  } else {
    return 0;
  }
}

sub is_pos_int {
  return $_[0] =! /^\d+$/o;
}

sub is_integer {
  return $_[0] =~ /^-?\d+$/o;
}

sub check_database {
  my $line = <$IN>;
  chomp $line;

  die "Unrecognized database format!\n" unless $line =~ /^\+V(\d+)$/;

  # Database flags. Current for 1.7.5p0
  $flags = (($1 - 2) / 256) - 5;
  {
    my @setflags = ();
    while (my ($name, $bit) = each %dbflags) {
      push @setflags, $name if ($flags & $bit);
      if ($name eq "PANIC") {
        if ($panic and !($flags & $bit)) {
          print
            "--panic given, but database doesn't have PANIC flag set.\n";
        }        
      }
    }
    print "Database Flags: ",
    join(" ", sort { $dbflags{$a} <=> $dbflags{$b} } @setflags),
    "\n" unless $q;
  }

  $line = <$IN>;
  chomp $line;
  die "Unrecognized database format!\n" unless $line =~ /^~(\d+)$/;

  print "Objects in database: $1\n" unless $q;
  
  my @types = (0, 0, 0, 0, 0);
  my @attrlens;
  my $attributes = 0;
  my $obj;
  
  
 OBJECT:             
  while ($line) {
    # Line should start with the dbref line
    $line = <$IN>;
    chomp $line;
    last OBJECT if $line eq "***END OF DUMP***";
    die "Corrupt database after object #$obj\n" unless $line =~ /^!(\d+)$/o;
    
    $obj = $1;
    
    # Skip stuff we don't care about
    die "Corrupt database: Name of object #$obj\n" unless get_string;
    
    foreach ("location", "contents", "exits", "next", "parent") {
      $line = <$IN>;
      chomp $line;
      die "Corrupt database. $_ of object #$obj\n" unless is_integer $line;
    }
    
    # Locks
    if ($flags & $dbflags{"SPIFFY_LOCKS"}) {
      $line = <$IN>;
      chomp $line;
      die "Corrupt database. Lockcount of object #$obj\nn"
        unless $line =~ /^lockcount (\d+)$/o;
      foreach my $lock (1..$1) {
        $line = <$IN>;
        chomp $line;
        die "Corrupt database. name of lock $lock on object #$obj\n"
          unless $line =~ /^type "(.*)"$/o;
        my $lockname = $1;
        $line = <$IN>;
        chomp $line;
        die "Corrupt database. creator of lock $lockname (Number $lock) on object #$obj\n"
          unless is_labeled_num $line, "creator";
        $line = <$IN>;
        chomp $line;
        die "Corrupt database. flags of lock $lockname (Number $lock) on object #$obj\n"
          unless is_labeled_num $line, "flags";
        die "Corrupt database. key of lock $lockname (Number $lock) on object #$obj\n"
          unless get_labeled_string "key"
        }
      $line = <$IN>;
      chomp $line;
    } else {
    OLDLOCK:
      # We don't do anything special yet to check pre SPIFFY_LOCKS locks
      while ($line = <$IN>) {
        chomp $line;
        last OLDLOCK if $line =~ /^\d+$/o;
      }
    }
    
    # $line is now owner 
    die "Corrupt database. owner of object #$obj\n" unless is_integer $line;
    
    foreach (("zone", "pennies")) {
      $line = <$IN>;
      chomp $line;
      die "Corrupt database. $_ of object #$obj\n" unless is_integer $line;
    }
    
    # Type
    $line = <$IN>;
    chomp $line;
    die "Corrupt database. flags of object #$obj\n" unless is_integer $line;
    
    $types[$line & 0x7]++;
    
    $line = <$IN>;
    chomp $line;
    die "Corrupt database. toggles of object #$obj\n" unless is_integer $line;
    
    if (!($flags & $dbflags{"NO_POWERS"})) {
      $line = <$IN>;
      chomp $line;
      die "Corrupt database. powers of object #$obj\n" unless is_integer $line;
    }    
    
    if ($flags & $dbflags{"WARNINGS"}) {
      $line = <$IN>;
      chomp $line;
      die "Corrupt database. warnings of object #$obj\n" unless is_integer $line;
    }
    
    if ($flags & $dbflags{"CREATION_TIMES"}) {
      $line = <$IN>;
      chomp $line;
      die "Corrupt database. creation time of object #$obj\n" unless is_integer $line;
      $line = <$IN>;
      chomp $line;
      die "Corrupt database. modification time of object #$obj\n" unless is_integer $line;
    }
    
  ATTRIBUTE:
    while ($line = <$IN>) {
      chomp $line;
      # ]ATTRNAME^OWNER^FLAGS
      if ($line =~ /^](.*?)\^-?\d+\^-?\d+$/o) {
      $attributes++;
      $attrlens[length $1]++;
      # Eat up the attribute value 
      die "Corrupt database. Value of attribute $1 in object #$obj\n" unless get_string;
    } elsif ($line =~ /^<$/o) {
      last ATTRIBUTE;
    } else {
      die "Corrupt database in attribute list of object #$obj: $line\n";
    }      
  }
}

print "Breakdown by type: $types[0] rooms. $types[1] things. $types[2] exits. $types[3] players. $types[4] garbage.\n" unless $q;
print "Number of attributes: $attributes\n" unless $q;

if ($a and !$q) {
  for (my $n = 0; $n < scalar @attrlens; $n++) {
    print "Attribute names of length $n: $attrlens[$n]\n" if $attrlens[$n];
  }
}

}

sub check_mail {
  print "Checking mail databases is currently unimplemented.\n";
}

sub check_chat {
  print "Checking chat databases is currently unimpleneted.\n";
}

# Eat up the rest of the incoming DB to keep zcat and bash happy
END {
  $IN->close if $IN;
}

