#!/usr/local/bin/perl
# Meow/Data.pm



=head1 NAME

Meow::Data  - data base class

=head1 DESCRIPTION

Defines common variables and methods for Meow data.
Most variables are set by subclasses of this package/class.

=cut

package Meow::Data;

use POSIX;
use BerkeleyDB ; ## require fails !

BEGIN {
	# some data class flags
$kFromdata= 1; ## starcode, cvdoc, other primary data
$kFromacode= 2;
$kFromsybase= 4;
$kDumpxml= 8;
$kFromprog= 16;
}


sub isMeowData() { return 1; }

sub getSubclasses {
		## some perl tricks here
	# %packhash= %main::;   
	%packhash= %Meow::;   
  foreach $symname (sort keys %packhash ) { 
  	local *sym= $packhash{$symname};
  	if (defined(%sym) && $symname =~ /::$/) {
  		# my $classname= "$symname";
  		my $classname= "Meow::$symname";
  		$classname =~ s/::$//;
  		next if ($classname eq 'Meow::Data'); ## skip this class
			my $ismyclass= $classname->can('isMeowData');
			if ($ismyclass) {
	  		## print "new  $classname \n" if $main::debug;
  			my $ob= new $classname;
  			push( @dataclasslist, $ob); 
  			}
  		}
  	}
}

sub elements() { 
	getSubclasses() unless(defined @dataclasslist);
	return @dataclasslist; 
}

sub new {
	my $that= shift;
	my $class= ref($that) || $that;
	my %fields = @_;  # convert into associative array
	my $self = \%fields;
	bless $self, $class;
	$self->init();
	return $self;
	## return $self, $class; ##  bad
}


sub init {
	my $self= shift;
	## print STDERR "Meow::Data init";
	$self->{tag}= 'MEOWData' unless (exists $self->{tag} );
	$self->{name}= 'Meow data' unless (exists $self->{name} );
	$self->{symkey}= 'SYM' unless (exists $self->{symkey} );
	$self->{views}= [ 'text/html', 'text/plain', 'text/acode', 'text/acode-pretty', 'text/xml' ]
		unless (exists $self->{views} );
	$self->{makeflags}= '' unless(exists $self->{makeflags} );
	
	$self->{jflags}= '-ms20m -mx90m' unless( exists $self->{jflags} ); # was mx50m
	# $self->{japp}= 'meow.report.Report' unless (exists $self->{japp} );
	# $self->{jar}= '$jpath/meow.jar' unless (exists $self->{jar} );
}

sub isMadeFromData() {
	my $self= shift;
	return (($self->makeflags() & $Meow::Data::kFromdata) != 0);
}

=head1 Meow::Data::toMeow($fromdir,$todir)

convert source data to Meow format.

 $todir - output path

=cut

sub toMeow {
	my $self= shift;
	my($fromdir, $todir)= @_;
	
	$species= $self->species;
	warn "For $species, method Meow::Data::toMeow() is abstract."; return 1;
	
	$sourcedb= $self->sourcedb; 
	my $outfile= $todir . $self->targetdata;

	my $fh= $self->openSource(0); return 1 unless( $fh );
	while (<$fh>) {
	 
		}
	close($fh);
	
	return 0;
}


sub checksum {  
	my $self= shift;
	my( $data) = @_;
	return length($data);  ## fix later
}
	
sub putId {
	my $self= shift;
	my( $nid, $did, $cksum, $date)= @_;
	$did =~ s/^\w*://; ## fix for $sourcedb:id mixup
	my $idrec= "$did\t$date\t$cksum";
	print STDERR "Bad record for $nid,$did=$idrec \n" unless ($did && $nid>0);
	
	$err = $idb->db_put($nid, $idrec);
	$err |= $didb->db_put($did, $nid);
	return $err;
}

sub hasDID {
	my $self= shift;
	my( $did)= @_;
	$did =~ s/^\w*://; ## fix for $sourcedb:id mixup
	my $nid= undef;
	$err = $didb->db_get($did, $nid);
	return $self->tag . sprintf("%07d", $nid) if ($nid>=1);
	return undef;
}

sub hasNID {
	my $self= shift;
	my( $nid)= @_;
	my $idrec= undef;
  if ($idb->db_get($nid, $idrec) == 0) { 
		return $self->tag . sprintf("%07d", $nid) if ($nid>=1);
		}
	return undef;
}
	
sub getId {
	my $self= shift;
	my( $did, $date, $cksum)= @_;
	# my $sdb= $self->sourcedb;
	$did =~ s/^\w*://; ## fix for $sourcedb:id mixup
	
	my $nid= undef;
	$err = $didb->db_get($did, $nid);
	return $self->tag . sprintf("%07d", $nid) if ($nid>=1);

	my $idrec= undef;
	my $cursor = $idb->db_cursor(); ##? need get new here to update?
  $err = $cursor->c_get($nid, $idrec, BerkeleyDB::DB_LAST);
  $cursor->c_close();
  print STDERR "Bad last id! $nid=$value \n" if ($err || $nid <= 0);
  $nid++;
  if ($idb->db_get($nid, $idrec) == 0) { 
  	print STDERR "Next id $nid is taken by $idrec !\n";
  	}
	$self->putId( $nid, $did, $cksum, $date);
	## $idb->db_sync(); ##??
	## $didb->db_sync(); ##??
	print STDERR "New id $nid for $did\n" if $Meow::debug;
  return $self->tag . sprintf("%07d", $nid);
}


sub closeIdDb {
	my $self= shift;
	## $err = $idcursor->c_close() ;
	$err = $idb->db_close() ;
	$err = $didb->db_close() ;
	## undef $idcursor ;
	undef $idb ; # untie %idh ;   
	undef $didb ; # untie %didh ; 
}

sub nidcompare {
   my ($nid1, $nid2) = @_ ;
   return ($nid1 <=> $nid2);  ## cmp for string compare
}

sub openIdDb {
	my $self= shift;
	my($afile, $openflag)= @_;
	
	$afile =~ s/\.\S*$//;
	$idfile= $afile . '-id.bdb';
	$didfile= $afile . '-did.bdb';
 	my $flags= 0;
 	if ($openflag eq 'c') {
		unlink $idfile;
		unlink $didfile;
 		$flags= BerkeleyDB::DB_CREATE;
 		}
 	elsif ($openflag eq 'r') {
 		$flags= BerkeleyDB::DB_RDONLY;
		}
	$idb = new BerkeleyDB::Btree( -Filename => $idfile, -Flags => $flags,
			-Compare => \&nidcompare)
			 		or die "Cannot open $idfile: [flags=$flags] $!\n" ;
	## $idcursor = $idb->db_cursor();
	$didb = new BerkeleyDB::Hash( -Filename => $didfile, -Flags => $flags) 
          or die "Cannot open $didfile: [flags=$flags] $!\n" ;
}

sub idDbStatus {
	my $self= shift;
	## don't need -- db_stat works better

  print STDERR "------------------------\n";
  print STDERR "ID db [$idfile] status\n";
  my $ref= $idb->db_stat();
  if ($ref) {
  	my %stath= %{$ref};
  	foreach my $k (sort keys %stath) { print STDERR "$k =\t $stath{$k}\n"; }
  	}
	 
  print STDERR "------------------------\n";
  print STDERR "DID db [$didfile] status\n";
  my $ref= $didb->db_stat();
  if ($ref) {
  	my %stath= %{$ref};
  	foreach my $k (sort keys %stath) { print STDERR "$k =\t $stath{$k}\n"; }
  	}
  print STDERR "------------------------\n";
}


##
## ?? do this w/o berkdb. using IDs from RETE .list  - build hash at runtime?
##

sub makeIdDb {
	my $self= shift;
	my($todir)= @_;

	$total= 0;
	$species= $self->species;
	## my $afile= $self->getTarget($todir); ## gets original not link !
 	my $afile= $todir . $self->targetdata;	
	$datadate= $self->getFileDate($afile);
	
	$self->openIdDb($afile, 'c');
	local(*ALIB);
	if (open(ALIB, $afile)) {
	  $/= "# EOR\n";
	  my $data;
		while ($data= <ALIB>) {
			my $nid= $1 if ($data =~ m/\nID\|\D*(\d+)/); ## only numeric portion!
			$nid =~ s/^0+//;  #?? fail unless($nid); 
			my $did= $1 if ($data =~ m/\nDID\|([^\n]+)/);
			my $cksum= $self->checksum($data);
			my $date= $1 if ($data =~ m/\nDT\|([^\n]+)/);
			$date= $datadate unless($date);
			
			$err= $self->putId($nid, $did, $cksum, $date);
			$total++ unless($err);
	  	}
	  $/= "\n";
		close(ALIB);
		}
	$self->closeIdDb();
	
	print STDERR "makeIdDb for: $species\n";  
	print STDERR "total gene count: $total\n\n" ; 
	return 0;
}

sub printIdDb {
	my $self= shift;
	my($todir)= @_;

	$total= 0;
	$species= $self->species;
	## my $afile= $self->getTarget($todir); ## gets original not link !
 	my $afile= $todir . $self->targetdata;	
	print  "# ID data for: $species [$afile]\n";  

	$self->openIdDb($afile, 'r');
  my ($nid, $idrec) = ("", "") ;
  my	$cursor = $idb->db_cursor();
	$cursor->c_get($nid, $idrec, BerkeleyDB::DB_LAST);
	print "#Last rec: $nid -> $idrec\n" ; 
	$cursor->c_close();
	
	print  "#ID -> DID \t DATE \t CKSUM\n";  
 	$cursor = $idb->db_cursor();
  while ($cursor->c_get( $nid, $idrec, BerkeleyDB::DB_NEXT) == 0) 
  	{ print "$nid -> $idrec\n"; $total++; }
	$cursor->c_close();
	## $self->idDbStatus() if ($Meow::debug);
	$self->closeIdDb();
	print "\n# Total count: $total\n\n" ; 
	return 0;
}

sub getFileDate {
	my $self= shift;
	my($afile)= @_;
	$afile= Meow::getLinkOriginal($afile) if ( -l $afile );
	my @tm= localtime( $^T + (-M $afile) );
	return POSIX::strftime("%d-%b-%Y",@tm);
}


sub getSource {
	my $self= shift;
	my $srcindex = shift;
	
	my $data;
	if ($srcindex =~ /^\d+$/) {
		$data= ${$self->sourcedata}[$srcindex];
		}
	else { # check by name - handier call
		my @data= @{$self->sourcedata};
		foreach (@data) { if (/$srcindex/) { $data= $_; last; } }
		}
	if (!$data) { warn "Can't locate data from $srcindex"; return undef; }
	$data= $self->sourcepath . $data if ( $self->sourcepath && ($data !~ m|^[\$]|) );
	return Meow::replaceVars($data);
}

sub openSource {
	my $self= shift;
	my $srcindex = shift;
	
	my $data= $self->getSource($srcindex);	
	my $okay;
	local(*FH);
	if ($data =~ /\.(gz|Z)$/) { $okay= open( FH, "zcat $data|");  }
	else { $okay= open( FH, $data); }
	if ($okay) { return *FH; }
	else {  warn "Can't open $data"; return undef; }
}


## hgtable format:
# source        homolog         organ-  gene    blast   blast   pct     homolog
#  id             id             ism    sym.    score   prob    ident   refseq
## acode format:
## HG|species == Saccharomyces cerevisiae; gene == AUT7; MEOW:SGgn0000174 [Score=140, E=4e-34]
## may'00 added ref.db.id at end of line - for other users, don't need here

sub readHgtable {
	my $self= shift;
	my $orgpath = '$SERVER_PATH/' . $self->orgpath;
	$orgpath= Meow::replaceVars($orgpath);
	my %hgtable= ();
	%hgrefprot= (); ## add global hash
	my $hg;
	local(*H);
	if (open(H,"$orgpath/hgtable")) {
		while(<H>) {
			next unless (/^\w/); ## skip blanks, comments
			## old flds:  my($id,$hgid,$org,$sym,$score,$prob,$hgref)= split(/\t/);
			## as of 29feb2000:
			my($id,$hgid,$org,$sym,$score,$prob,$pct,$hgref)= split(/\t/);
			next unless ($id && $hgid);
			if ($id eq $hgid) {
				 ## use this to document id
				 $hgrefprot{$id} = $hgref; 
				 next;
				 }
			$org= 'Fruitfly' if ($org eq 'Ffly');## fixup
			## $hg= "species == $org; gene == $sym; MEOW:$hgid [Score=$score, E=$prob]\n";
			$hg= "species == $org; gene == $sym; MEOW:$hgid ($pct%)\n";
			$hgtable{$id} .= $hg; 
			}
		close(H);
		}
	return \%hgtable;
}

		
sub isOldTarget {
	my $self= shift;
	my ( $forceupdate, $workpath, $publicpath) = @_;
	my $data= ${$self->sourcedata}[0];
	$data= $self->sourcepath . $data if ( $self->sourcepath && ($data !~ m|^[\$]|) );

	$data= Meow::replaceVars($data);

	unless (-r $data) {
		##?  optionally fetch the data from somewhere ?
		warn "No source data $data";
		return 0;
		}
 	return $forceupdate if ($forceupdate);
 	my $targ= $self->targetdata;
	my $isold= Meow::isOldTarget( $data, "$workpath/$targ");
	$isold= Meow::isOldTarget( $data, "$publicpath/$targ") if ($isold);
	return $isold;
}

sub getTargetDate {
	my $self= shift;
	my ($publicpath) = @_;
 	my $targ= $self->targetdata;	
	$targ= "$publicpath/$targ";
	$targ= Meow::getLinkOriginal($targ) if (-l $targ);
	return undef unless(-r $targ);
	my @st= stat($targ);
	return $st[9] if ($#st>9);
	return undef;
}

sub getTarget  {
	my $self= shift;
	my ($publicpath) = @_;
 	my $targ= $self->targetdata;	
	$targ= "$publicpath/$targ";
	$targ= Meow::getLinkOriginal($targ) if (-l $targ);
	return $targ;
}


##  AUTOLOAD -- supply set/get methods for fields 
sub AUTOLOAD {
	my $self= shift;
	my $type= ref($self);
	my $name= $AUTOLOAD;
	$name =~ s/.*://;  ## drop package prefix
	## unless (exists $self->{$name} ) { warn "Can't access $name field in class $type"; }
	if (@_) { return $self->{$name}= shift; }
	else { return $self->{$name}; }
}

sub print {
	my $self= shift;
	foreach (sort keys %$self) { 
		print "$_ => ";
		my $val= $self->{$_};
		if (ref($val) =~ /HASH/) {
			foreach $k (sort keys %$val) { print "$k=>$$val{$k}\n"; } 
			}
		elsif (ref($val) =~ /ARRAY/ ) { print join(', ',@$val) ."\n"; }
		else { print "$val\n";  }
		}
}

sub toString {  ## java favorite
	my $self= shift;
	my $s= '%' . ref($self) . '= { ';
	foreach (sort keys %$self) { 
		$s .= "$_ => ";
		my $val= $self->{$_};
		if (ref($val) =~ /HASH/) {
			$s .=  '{'; 
			foreach $k (sort keys %$val) { $s .=  "$k=>\'$$val{$k}\', "; } 
			$s .=  '}, '; 
			}
		elsif (ref($val) =~ /ARRAY/ ) { $s .=  'qw[' . join(', ',@$val) .'], '; }
		else { $s .= "\'$val\', ";  }
		}
	$s .= ' }; ';
	return $s;
}

sub DESTROY {
	my $self= shift;
	##warn "DESTROY $self";
}

#-------------


##################
##?? keep any of these housekeeping data packs ?
##################

package  Meow::moHelp0;
 @ISA = qw( Meow::Data );  

sub new {
	my $class= shift;
	my $self = $class->SUPER::new(  
		tag => 'moHelp0',
		name => 'MEOW Data field list',
		srcprog => '$jpath/meow.jar',
		japp => 'meow.report.Report',
		makeflags =>  $Meow::Data::kFromprog,
		makecmd	=> '\'mime=text/html;refman;short-report\'',
		outfiles=> ['refman-fld-list.html'], 
		@_ );
	return $self;
}
#-------------

package  Meow::moHelp1;
 @ISA = qw( Meow::moHelp0 );  

sub new {
	my $class= shift;
	my $self = $class->SUPER::new(  
		tag => 'moHelp1',
		name => 'MEOW Data field detail',
		makecmd	=> '\'mime=text/html;refman;full-report\'',
		outfiles=> ['refman-fld-detail.html'], 
		@_ );
	return $self;
}
#-------------

package  Meow::moXmldtd;
 @ISA = qw( Meow::moHelp0 );  

sub new {
	my $class= shift;
	my $self = $class->SUPER::new(  
		tag => 'moXmldtd',
		name => 'Meow XML DTD',
		makecmd	=> 'mime=text/xml-dtd',
		outfiles=> ['fbacode.dtd'], #?
		@_ );
	return $self;
}
#-------------


package  Meow::moSrscodes;
 @ISA = qw( Meow::moHelp0 );  

sub new {
	my $class= shift;
	my $self = $class->SUPER::new(  
		tag => 'moSrscodes',
		name => 'MEOW SRS field codes',
		japp => 'meow.report.SRScodes',
		makecmd	=> '', 
		outfiles=> ['mosrs-codes.is'], 
		@_ );
	return $self;
}
#-------------


1; # perly
.
