#!/usr/local/bin/perl 
# Acodes.pl/.pm
# - perl module for manipulating flybase acode data
# - from fbacodelib.pl, see also java flybase libs
# - should be Acodes.pm but stupid Solaris mailer insists that is an image file
# dgg, feb'99

package Acodes;
require 5.001;

BEGIN {
$debug = 0;  
 
$RETE_ID = 3;
$RETE_VALUE = 2;
$RETE_COUNT = 1;

$fbobs= "fbobs"; # acode data path
$fbidtag = "FBgn"; # test, do for each data class
$endOfRecord= "# EOR\n"; # for non-acode readers (srs) to simplify 
$bigsize= 300000;
$asuffix= ".acode";
$create= 0;
$doidx= 0;

##? $OutFile= "STDOUT";
}


sub new {
	my($class, $filename, $create) = @_;  
	# print STDERR "new Acodes( " . join(',',@_) . " )\n" if $debug;
  my $self = {};
  bless $self;
  %db2a = ();   $self->{'db2a'}= \%db2a;
	%retef= ();   $self->{'retef'}= \%retef;
	%fldkeys= (); $self->{'fldkeys'}= \%fldkeys;
	%subkeys= (); $self->{'subkeys'}= \%subkeys;

	open(OUTF,">&STDOUT"); ##?

	## $self->open($filename,$create) if ($filename);
  return $self;
}

sub open {
	my($self,$alib,$create)= @_;
	return if (!$alib);
	my $aidx= $alib . ".idx";
	
	if ($create && $create !~ /^[ro]/) {
		local(*ALIB, *AIDX);
	  CORE::open(ALIB,">$alib") || die "Can't create $alib";
		CORE::open(AIDX,">$aidx") || die "Can't create $aidx";
		$self->{'alib'}= *ALIB;
		$self->{'aidx'}= *AIDX;
		}
	else {
		local(*INLIB, *INIDX);
		CORE::open(INLIB,$alib) || die "Can't open $alib";
		CORE::open(INIDX,$aidx) || die "Can't open $aidx";
		$self->{'inlib'}= *INLIB;
		$self->{'inidx'}= *INIDX;
		}
	return ($alib, $aidx);
}

sub create {
	my($self,$alib)= @_;
	return $self->open($alib,"w");
}

sub close  {
	my($self) = @_;
	CORE::close($self->{'alib'}) if $self->{'alib'};
	CORE::close($self->{'aidx'}) if $self->{'aidx'};
	CORE::close($self->{'inlib'}) if $self->{'inlib'};
	CORE::close($self->{'inidx'}) if $self->{'inidx'};
}

sub openout {
	my($self,$outname)= @_;
	$outname= '&STDOUT' if (!$outname);
	CORE::open(OUTF,">$outname") || die "Can't write to $outname";
	$self->{'outname'}= $outname;
	return ($outname);
}

sub closeout {
	my($self) = @_;
	## need to deal with filehandle variables!
	CORE::close(OUTF) if $self->{'outname'};
	$self->{'outname'}= undef;
}

## registerDbKeys
sub registerKeys {
	local($self, $redb2acode) = @_;
	%db2a= %{$self->{'db2a'}};
	foreach (keys(%{$redb2acode})) {
		my $val= $db2acode{$_};
		$db2a{$_}= $val if ($val);
		}
}

## removeDbKeys
sub removeKeys {
	local($self, $redb2acode) = @_;
	%db2a= %{$self->{'db2a'}};
	foreach (keys(%{$redb2acode})) { delete $db2a{$_}; }
}

sub fldkey {
	my($self,$dbkey)= @_;
	%db2a= %{$self->{'db2a'}};
	my $akey= $db2a{$dbkey};
	if (!$akey) { 
		print STDERR "dbkey with no acode equivalent: $dbkey\n";
		$db2a{$dbkey}= $dbkey;
		$akey= $dbkey; 
		}
	return $akey;
}

sub setId {
	my($self, $id, $rec) = @_;
	$self->{'id'}= $id;
	$rec->{'id'}= $id if ($rec);
	## add 'id' to record %rech ?
}

sub newRecord {
	my($self, $reckey) = @_;
	my @vec= (); ##  ! this doesn't make array ref !????
	my %fhash= ();
	my %rech= ( 'key' => $reckey, 'vec' => \@vec, 'fhash' => \%fhash );
	# $self->{'atrec'}= \%rech;
	## need to distinguish mainrec, subrecs, subsubrecs...
	return \%rech;
}

sub addSubrec {
	my($self, $mainr, $subr) = @_; 
	## get mainr from $self ?

	my @mainr= @{$mainr->{'vec'}};
	my $subkey= $subr->{'key'} ;
	my @subr= @{$subr->{'vec'}};
	push(@mainr,$subkey);
	push(@mainr,"{");
	push(@mainr, @subr);
	push(@mainr,"}");
	$mainr->{'vec'}= \@mainr; ##?? make sure its there?
}

sub addFieldsFrom {
	my($self, $mainr, $vecr, $keepFlds) = @_; 
	my @mainr= @{$mainr->{'vec'}};
	my %kos= %{$keepFlds};
	
	my @vecr= @{$vecr->{'vec'}};
	if (scalar(%kos)) { 
		foreach my $fld (@vecr) {
			my ($key)= $fld =~ /([^|]+)/;
			push(@mainr, $fld) if ($kos{$key});
			}
		}
	else { push(@mainr, @vecr); }
	$mainr->{'vec'}= \@mainr; ##?? make sure its there?
}

sub addField {
	my($self, $rec, $key, $val) = @_;
	## get rec from $self
	my @rec= @{$rec->{'vec'}};

	my $fld = $key if ($key);
	my $lastfld= $rec[$#rec];
	if ($val) {
		my($v1, $vs);
		if ($lastfld && $lastfld =~ /^$key\|/) {
			$fld= $self->appendField( $rec[$#rec], $val); ## isn't changing original $fld!
			$rec->{'vec'}= \@rec; ##?? make sure its there?
			return $fld;   
			}
		if ($val =~ /\n/) {
			($v1, $vs)= split(/\n/, $val, 2);
			$val= $v1;
			}
		$fld .= '|' . $val;
		$self->appendField( $fld, $vs) if ($vs); 
		}
	push( @rec, $fld);
	$rec->{'vec'}= \@rec; ##?? make sure its there?
	return $fld;  ##!? return ref of fld for appending?
}

sub addFieldHash {
	my($self, $rec, $key, $val) = @_;
	return $self->addField($rec,$key,$val);
	
	## not working -- apparently fhash becomes new, local copy !???
	## get rec from $self
	# my @rec= @{$rec->{'vec'}};
	# my %fhash= %{$rec->{'fhash'}};
	
	my $rvec= $rec->{'vec'};
	my $rhash= $rec->{'fhash'};

	if ($rhash->{$key}) {
		## return $self->appendField( $rhash->{$key}, $val);   ## doesn't work
		}
		
	my $fld = $key if ($key);
	if ($val) {
		my($v1, $vs);
		if ($val =~ /\n/) {
			($v1, $vs)= split(/\n/, $val, 2);
			$val= $v1;
			}
		$fld .= '|' . $val;
		$self->appendField( $fld, $vs) if ($vs); 
		}
	push( @$rvec, $fld);
	# $rec->{'vec'}= \@rec; ##?? make sure its there?
	$rhash->{$key}= $rvec->[$#$rvec]; ## $fld;
	# $rec->{'fhash'}= \%fhash; ##?? make sure its there?
	return $fld;  ##!? return ref of fld for appending?
}

sub appendField  {
	my($self, $fld, $val) = @_;
	## !?! must use $_[1] instead of $fld to change original !
	if ($val) {
		if ($val =~ /\n/) {
			my @vs= split(/\n/,$val);
			foreach $val (@vs) { $_[1] .= "\n|" . $val; }
			}
		else { $_[1] .= "\n|" . $val; }
		}
	return $_[1];
}


sub toString {
	my($self, $mainr) = @_;  
	my $reckey= $mainr->{'key'};
	my @mainr= @{$mainr->{'vec'}};
	my $doc = "$reckey\n{\n";
	foreach (@mainr) { $doc .= $_ . "\n" if ($_); }
	$doc .= "}\n";  
	return $doc;
}

sub packFields {
	my($self, $rec) = @_;  
	my @rec= @{$rec->{'vec'}};
	my %vals= ();
	foreach my $i (0..$#rec) {
		my($k,$v) = split(/\|/,$rec[$i],2);		
		my $ki= $vals{$k};
		if ($ki) { $rec[$ki] .= "\n|" . $v; $rec[$i]= ''; }
		else { $vals{$k} = $i; }
		}
	$rec->{'vec'}= \@rec; ##?? make sure its there?
}

sub putRec {
	my($self, $mainr, $id, $retefld) = @_;  
	## get $reckey, mainr from $self

	$reckey= $mainr->{'key'};
	my @mainr= @{$mainr->{'vec'}};
	my $alib= $self->{alib};
	my $aidx= $self->{aidx};
	
	$id= $self->{'id'} if !defined($id); ## or $mainr->{'id'} ??
	$retefld= $self->getRecordTableEntry($mainr) if (!$retefld);
		
  my $at= tell($alib);
	print $alib "$reckey\n{\n";
	print $alib $retefld . "\n" if ($retefld); 
	foreach (@mainr) { print $alib $_ . "\n" if ($_); 	}
	print $alib "}\n# EOR\n"; ## EOR comment isn't required

	## my $nf= scalar(@mainr);
	## print STDERR "Acodes.putRec( $reckey, $id ) = $nf\n" if $debug;
	## print STDERR "  $retefld\n" if $debug;
	
	if ($id =~ /(\d+)/) { $id= $1; }
	if ($id && $id !~ /\D/) {
	  my $size= tell($alib) - $at;
	  my $record= pack("LL", $at, $size);  # store as unsigned long, unsigned long
	  my $idloc = $id * length($record);
	  seek($aidx, $idloc, 0);
	  print $aidx $record;
	  }
}

sub addTableEntryKey {
	my($self, $key, $val) = @_;  
	%retef= %{$self->{'retef'}};
	$retef{$key}= $val;
}

sub addTableEntryKeys {
	local($self, $retehash) = @_;  
	my($key,$val);
	%retef= %{$self->{'retef'}};
	while (($key,$val) = each (%{$retehash})) {
		$retef{$key}= $val;
		## print STDERR "add RETE $key=$val\n" if $debug;
		}
}

sub getRecordTableEntry {
	my($self, $rec) = @_;  

	my	$retefld= $rec->{'retefld'};
	return $retefld if ($retefld);
	
	%retef= %{$self->{'retef'}};
	## print STDERR "get RETE hash ".join(',',keys %retef)."\n" if ($debug && $firsth<5); $firsth++;
	return undef unless(scalar(%retef));
	
	$retefld= "RETE|";

	my($k,$v);
	my %nc= ();
	my %nv= ();
	my %av= ();
	my $nf= 0;
	
	my @rec= @{$rec->{'vec'}};
	foreach (@rec) {
		($k,$v) = split(/\|/,$_,2);		
		my $iste= $retef{$k};
		next unless ($iste);
		if ($iste == $Acodes::RETE_COUNT) {
			$nc{$k}++;
			}
		elsif ($iste == $Acodes::RETE_ID) {
			$retefld .= "\t" if ($nf>0);
			$retefld .= "$k 1 $v"; $nf++;
			}
		elsif ($iste == $Acodes::RETE_VALUE) {
			$v =~ s/\n.*//; ## drop continuation lines
			$nv{$k}++;
			$av{$k}= $v if ($nv{$k} < 2);
			}
		}
	foreach $k (sort keys(%nv)) {
		$retefld .= "\t" if ($nf>0);
		$retefld .= "$k $nv{$k} $av{$k}"; $nf++;
		}
	foreach $k (sort keys(%nc)) {
		$retefld .= "\t" if ($nf>0);
		$retefld .= "$k $nc{$k}"; $nf++;
		}
		
	## print STDERR "got RETE = $retefld\n" if ($debug && $firstr<5); $firstr++;
	return undef if ($nf==0);
	$rec->{'retefld'}= $retefld;
	return $retefld;
}


sub fieldKeyProcessing {
	my($self, $key, $val) = @_;  
	%fldkeys= %{$self->{'fldkeys'}};
	$fldkeys{$key}= $val; ##? $val or 1 ?? this is on/off flag
}

sub fieldKeysProcessing {
	local($self, $rekvhash) = @_;  
	my($key,$val);
	%fldkeys= %{$self->{'fldkeys'}};
	##if ( !scalar(%{$rekvhash}) ) { $self->{'fldkeys'}= undef; } else 
  while (($key,$val) = each (%{$rekvhash})) { $fldkeys{$key}= $val; } 
	$self->{'fldkeys'}= \%fldkeys; ##? need to reset it?
}

sub subrecKeysProcessing {
	local($self, $rekvhash) = @_;  
	my($key,$val);
	%subkeys= %{$self->{'subkeys'}};
	##if ( !scalar(%{$rekvhash}) ) { $self->{'subkeys'}= undef; } else 
  while (($key,$val) = each (%{$rekvhash})) { $subkeys{$key}= $val; } 
	$self->{'subkeys'}= \%subkeys; ##? need to reset it?
}

sub dataProcessing {
	local($self, $refproc) = @_;  
	$self->{'testdata'}= $refproc; 
}

sub printrec {
	my($self, $data, $size) = @_;
	my($lastkey,$isendrec);
	my($lev)= 0;
	my($putout)= 1;
	my($infld) = 1;
	my($subkeylev) = -1;

	return if ($size<=0);
		## HACK! drop non-Dmelanogaster, and lethals?		
	return if ( $data =~ /GSYM\|[A-z]+\\/ ); 	## GSYM|Dsim\&bgr
	return if ( $data =~ /GSYM\|l\(/ ); 	    ## GSYM|l( 

	%fldkeys= %{$self->{'fldkeys'}};
	%subkeys= %{$self->{'subkeys'}};
	my(@d) = split(/\n/,$data);
	foreach (@d) {
		
		if (/^\s*\#/) {
			## comment line 
	 		print OUTF "$_\n" if ($putout>0);
			if ($lev!=0 && /# EOR/) {
				print STDERR "Error nesting records: EOR , lev=$lev\n";
				$lev= 0;
				}
			}
		elsif ( /^\{/ ) { $lev++; 
	  	print OUTF "$_\n" if ($putout>0);
			}
		elsif ( /^\}/ ) { $lev--; 
			$isendrec=1 if ($lev==0);
	  	print OUTF "$_\n" if ($putout>0);
	  	if ($lev == $subkeylev && $putout<0) { $putout= 1; $subkeylev= -1; }
			}
		
		##elsif (/^\S/ && $lev == 0) {
			## start of new main record !
			## print STDERR "Start of new main record\n";
		##	}
		
		elsif (!/\S/) { }

	  elsif (/^\s*\|/) {
	  	## fld continuation line
	  	print OUTF "$_\n" if ($putout>0 && $infld);
	  	}
	  
		else {
			my($key)= $_;
			$key =~ s=\|.*==;   
	 		$lastkey= $key;
		  my $ok= 0;
			$infld= 1; ##??
			if ( scalar(%subkeys) ) {
			 	if ($subkeys{$key} > 0) { 
				 	## print STDERR "$key = ".$subkey{$key}."\n" if $debug;
			 		$putout= 1; $ok= 1; $infld= 1; 
			 		}
				elsif ($putout > 0 && $subkeys{$key} < 0) {
				 	## print STDERR "$key = ".$subkey{$key}."\n" if $debug;
				 	$putout= -1; $ok= -1; $subkeylev= $lev; 
				 	}
				}
			if ($ok==0 && scalar(%fldkeys) ) {
			 	if ($fldkeys{$key} > 0) { $ok= 1; $infld= 1; }
			 	elsif ($fldkeys{$key} < 0) { $ok= -1; $infld= 0; }
		 		}
	  	print OUTF "$_\n" if ($ok>=0 && $putout>0);
			}

		}
}


sub buildrec {
	my($self, $data, $size) = @_;
	my($lastkey,$lastfld,$isendrec,$subr,$fldmeth);
	my($lev)= 0;
	my($putout)= 1;
	my($infld) = 1;
	my($subkeylev) = -1;
	my $mainrec= undef;
	
	return $mainrec if ($size<=0);
	
		## HACK! drop non-Dmelanogaster, and lethals?		
	## return $mainrec if ( $data =~ /GSYM\|[A-z]+\\/ ); 	## GSYM|Dsim\&bgr
	## return $mainrec if ( $data =~ /GSYM\|l\(/ ); 	    ## GSYM|l( 
	my $testproc= $self->{'testdata'};
	if (ref($testproc) =~ /CODE/) {
		my $result= &{$testproc}($data);
		return $mainrec if ($result<0);
		}
		
	my @recs= ();
	%fldkeys= %{$self->{'fldkeys'}};
	%subkeys= %{$self->{'subkeys'}};
	my(@d) = split(/\n/,$data);
	foreach (@d) {
		
		if (/^\s*\#/) {
			## comment line 
			if ($lev!=0 && /# EOR/) {
				print STDERR "Error nesting records: EOR , lev=$lev\n";
				$lev= 0;
				}
			}
		elsif ( /^\{/ ) { $lev++; 

			my $subflag= $subkeys{$lastkey};
			if ($subflag) {
				if ($subflag>0) { $putout= 1; }
				elsif ($putout>0) { $putout= -1;  $subkeylev= $lev-1;  }
				}
			elsif ($putout>0) { $putout= -1; $subkeylev= $lev-1;  }

			if ($putout>0) {
				my $newr= $self->newRecord($lastkey);  
				if ($subr) {
					$self->addSubrec($subr, $newr);
					push(@recs,$subr);
					$subr= $newr; 
					}
				else { $mainrec= $newr; $subr= $mainrec;  push(@recs,$subr); }
				}
			}
		elsif ( /^\}/ ) { $lev--; 
			$isendrec=1 if ($lev==0);
			if ($putout>0) {
				$subr= pop(@recs) if (scalar(@recs));
				}
	  	if ($lev == $subkeylev && $putout<0) { $putout= 1; $subkeylev= -1; }
			if ($lev == 1) { $putout= 1; } # bug fix !?
			}
		
		##elsif (/^\S/ && $lev == 0) {
			## start of new main record !
			## print STDERR "Start of new main record\n";
		##	}
		
		elsif (!/\S/) { }

	  elsif (/^\s*\|/) {
	  	## fld continuation line
	 		if ($infld && $putout>0) {
		 		my $key= $lastkey;
		 		my $val= $_; $val =~ s/^\s*\|//;
				if (ref( $fldmeth) =~ m/CODE/) {
					($key,$val)= &{$fldmeth}($lastkey,$val,2);
					$ok= 1 if ($key);
					}	
				if ($key) {
				my @rec= @{$subr->{'vec'}}; ## $lastfld is local var !!
				$self->appendField( $rec[$#rec], $val);
			  ## $rec[$#rec] .= "\n|" . $val; 
				$subr->{'vec'}= \@rec; ##?? make sure its there?
				}
				}
	  	}
	  
		else {
	  	my($key,$val) = split(/\|/,$_,2);
		  my $ok= 0;
			$infld= 0; ## 1; ##??
			
			$fldmeth= $fldkeys{$key};
			if ($fldmeth) {
				if (ref( $fldmeth) =~ m/CODE/) {
					($key,$val)= &{$fldmeth}($key,$val);
					$ok= 1 if ($key);
					}	
				elsif ($fldmeth<0) { $ok= -1; }
				else { $ok= 1; }
				}
			else {
				my $subflag= $subkeys{$key};
				if ($subflag) {
					if ($subflag>0) { $putout= 1; $ok= 1; }
					## elsif ($subflag<0) { $putout= -1; $ok= -1; $subkeylev= $lev;  }
					else { $putout= -1; $ok= -1; $subkeylev= $lev;  }
					}
				}
			
			# if ($key eq 'URL' && $debug) {
			#	print STDERR "putout=$putout lev=$lev ok=$ok subr=$subr $key=$val\n";
			#	}			 		
				
	 		if ($ok>0 && $putout>0) {
				$lastfld= $self->addField( $subr, $key, $val);
				$self->setId( $val, $subr) if ($key eq 'ID');
				$infld= 1;
				}
	 		$lastkey= $key;
			}

		}
	return $mainrec;
}

# read thru acode lib for random ID's
# and process them
 
sub randomrecs {
	my($self, $processproc, $maxn, $maxid, $minid) = @_;
	$maxn= 100 unless defined($maxn);
	$minid= 1 unless defined($minid);
	$maxid= 90000  unless defined($maxid); ## ? is FBgn max id ~ 92395 
	my(%gotid);

	my $alib= $self->{inlib};
	my $aidx= $self->{inidx};
	my $serr= seek($aidx,0,2);
	my $aidxlen= tell($aidx);
  ## print STDERR "Acodes::randomrecs: aidxlen=$aidxlen aidx=$aidx seekerr=$serr processrec=".ref($processproc)."\n"
	##	if ($debug);
	return unless ($aidxlen>0);
	
	my $recsize = length(pack("LL", 1, 500)); # store as unsigned long, unsigned long
	my ($data, $record, $nred); 
	my($at, $size)= (0,0); 
	for ($i= 0; $i < $maxn; ) {
		$id= int(rand($maxid)) + $minid;
		## $id= sprintf("%07d",$id);
	  next if ($gotid{$id});
		$gotid{$id}= 1;
		
		$record= ''; 
		$idloc = $id * $recsize;
		next if ($idloc>$aidxlen);
		seek($aidx, $idloc, 0);
	  $nred= read($aidx, $record, $recsize);
	  if ($nred==0) {
			## print STDERR "error reading index of $id is $at: $size >> seek $idloc\n";
	  	$nerr++; return if ($nerr>100);
	  	next;
	  	}
	  ($at, $size) = unpack("LL", $record);
	  next if ($size <= 0 || $at < 0);
	  
		## print STDERR "index of $id is $at: $size >> seek $idloc\n" if $debug;
		seek($alib, $at, 0);
		read($alib, $data, $size);
				
		if ($processproc) { &{$processproc}($self,$data,$size); }
		else { $self->printrec($data,$size); }
		
		$i++; $nerr= 0;
		}
		
}


 
sub allrecs {
	my($self, $processproc) = @_;
	my ($data, $record, $size); 
	my $alib= $self->{inlib};
	my $aidx= $self->{inidx};
  my($at, $i)= (0,0); 
	
  $/= "# EOR\n";
  while ($data= <$alib>) {
  	$size= length($data);
		if ($processproc) { &{$processproc}($self,$data,$size); }
		else { $self->printrec($data,$size); }
		$i++;
  	}
  $/= "\n";
  
	## this re-orders records according to ID# - change? to read recs in data file order?
	
#	my $recsize = length(pack("LL", 1, 500)); # store as unsigned long, unsigned long
#	while (read($aidx, $record, $recsize)) {
#	  ($at, $size) = unpack("LL", $record);
#	  next if ($size <= 0 || $at < 0);
#		seek($alib, $at, 0);
#		read($alib, $data, $size);
#		if ($processproc) { &{$processproc}($self,$data,$size); }
#		else { $self->printrec($data,$size); }
#		$i++;
#		}

}



sub indexLib {
	my($alib,$fbidtag) = @_;
	my $aidx= $alib . '.idx';
  print STDERR "index acode library $alib\n";
	local(*ALIB,*AIDX);
	CORE::open(ALIB,"<$alib") || die "Can't read $alib";
	CORE::open(AIDX,">$aidx") || die "Can't write $aidx";

	my $recid= 0;
	my $lev= 0;
	my $recstart= 0;
	my $curbyteindex= 0;
  $recsize= length(pack("LL", 1, 1)); ## 8 * 2 = 16
	 
	while (<ALIB>) {
	  chomp(); ## newline
		$isendrec= 0;
		##my($key, $val);
		my($key,$val) = split(/\|/,$_,2);
		
		if (/^\s*\#/) {
			## comment line 
			if ($lev!=0 && /# EOR/) {
				print STDERR "Error nesting records: EOR $fbid, lev=$lev\n";
				$lev= 0;
				}
			}
	  elsif ($key =~ /^[A-Z]*ID$/ && $val =~ /^[FP][BF][a-zA-Z]{2}(\d+)/ ) {
			if (($lev==1) && index($val,$fbidtag)==0) {
				$fbid= $val; 
				$recid= $1;
				## got my id
				}
			}
		elsif ( /^\{/) { $lev++; }
		elsif ( /^\}/) { $lev--; $isendrec=1 if ($lev==0); }
		elsif (/^\S/ && $lev == 0) {
			## start of new main record ?
			$recstart= $curbyteindex;
			}
		
		if ($isendrec) {
	    $size= tell(ALIB) - $recstart;
	    $record= pack("LL", $recstart, $size);  # store as unsigned long, unsigned long
	    $idloc = $recid * $recsize;
	  	print "index of $fbid/$recid is $recstart: $size >> seek $idloc\n" if $debug;
	    seek(AIDX, $idloc, 0);
	    print AIDX $record;
			}
		$lastkey= $key;
		$curbyteindex= tell(ALIB);
		}

	CORE::close(ALIB);
	CORE::close(AIDX);
  print STDERR "done indexing data\n";
}

############ old fbacodelib.pl ###############
### NOT READY TO USE FROM PACKAGE YET
###
 
 ## see above BEGIN
# $fbobs= "fbobs"; # acode data path
# $fbidtag = "FBgn"; # test, do for each data class
# $endOfRecord= "# EOR\n"; # for non-acode readers (srs) to simplify 
# $bigsize= 300000;
# $asuffix= ".acode";
# $create= 0;
# $doidx= 0;

## require "getopts.pl";

sub main {
 CORE::open(OUTF,">&STDOUT");

 eval("require(\"getopts.pl\")"); ## suck in local env if present
 &Getopts('dhrcps:t:a:R:f:o:O:IC');

$usage= <<TEOF;
$0  [opts] [ids] [< id.list] > output
-d 	-- debug
-h	-- help 
-r  -- read lib
-c	-- create lib
-p	-- comPress lib
-t FBxx	-- FB id tag ($fbidtag)
-a .acode  -- lib suffix
-f 'RETE|'	-- extract fields
-o outname	-- output to file
-O fbobs path	-- path to FBxx.acode data ($fbobs)
-I 		-- make index for  lib (-r or -t)
-s size	-- scan for big records (size) or extract fields max size
-R num	-- number of records to randomly extract
TEOF
;

&usage() if ($opt_h);

$randpick= $opt_R; ## number to pick
$debug = 1 if ($opt_d);
$create= 1 if ($opt_c);
$press= 1 if ($opt_p);
$scan= 1 if ($opt_s);
$read= 1 if ($opt_r);
if ($opt_f) {
	$bigsize= 10000;
	$exfields= $opt_f;
	}
$bigsize= $opt_s if ($opt_s && $opt_s =~ /[0-9]+/);
if ($opt_o) {
	$outname= $opt_o; 
	CORE::open(OUTF,">$outname") || die "Can't write to $outname";
	}
$doidx = 1 if ($opt_I && $outname);
$indexlib = 1 if ($opt_I && $opt_t);
	
$fbobs= $opt_O if ($opt_O);
	
$fbidtag= $opt_t if ($opt_t);
$asuffix= $opt_a if ($opt_a);

# one acode text library file and one index file per class?

$alib= $fbidtag . $asuffix;
$aidx= $fbidtag . $asuffix . ".idx";

$record= pack("LL", 1, 5000);  # dummy to get fixed rec size
$recsize= length($record); ## 8 * 2 = 16

if ($randpick) { &randomlib($randpick); }
elsif ($exfields) { &extractfields($exfields,$bigsize); }
elsif ($create) {	&makelib(); }
elsif ($read) { &readlib(); } 
elsif ($indexlib) { &indexLib(); }
else { &usage(); }

CORE::close(OUTF) if ($outname);

}

#---------------------

sub usage {
	print STDERR $usage;
	exit();
}




# read thru acode lib for given ID's
# and print them out
#
sub readlib {
	my($alib,$fbobs,$fbidtag) = @_;
	my $aidx= $alib . '.idx';
	my( $tstart,$tend,$id,$idnum,$idloc,$record,$size,$at);

	## check for FBxx prefix in id's, open proper alib, aidx
	CORE::open(ALIB,"<$fbobs/$alib") || die "Can't open $alib";
	CORE::open(AIDX,"<$fbobs/$aidx") || die "Can't open $aidx";
	if ($doidx) {
		CORE::open(AIDX2,">$outname.idx") || die "Can't create $outname.idx";
		$nid= 0;
		}
	
	if ($#ARGV>=0) {
		foreach $id (@ARGV) { &readId($id); }
		}
	else {
		foreach $id (<STDIN>) { &readId($id); }
		}

	CORE::close(ALIB); 
	CORE::close(AIDX);
	CORE::close(AIDX2) if ($doidx);
}


sub readId {
	my($id) = @_;
	my( $tstart,$tend,$idnum,$idloc,$record,$size,$at);

	print "reading $id\n" if $debug;
	$idnum= $id;
	$idnum =~ s/[FP][BF][a-zA-Z]{2}//;
	$idloc = $idnum * $recsize;
	seek(AIDX, $idloc, 0);
  read(AIDX, $record, $recsize);
  ($at, $size) = unpack("LL", $record);
	print "index of $id is $at: $size >> seek $idloc\n" if $debug;
	seek(ALIB, $at, 0);
	read(ALIB, $data, $size);
	
	$at2= tell(OUTF) if ($doidx);
	print OUTF $data;
	
	if ($doidx) {
    $e2= tell(OUTF);
    $size2= $e2 - $at2;  ## test size == size2 !?
    ##print ALIB2 $endOfRecord; ##< is this now part of rec size?
    
    $record= pack("LL", $at2, $size2);  # store as unsigned long, unsigned long
    $idloc = $idnum * $recsize;
		print "new index of $id is $at2: $size2 >> seek $idloc\n" if $debug;
    seek(AIDX2, $idloc, 0);  
    print AIDX2 $record;
		$nid++;
		}
}






#
# extract field lines for given ID's
# and print them out
#
sub extractfields {
	## want array of fldcodes ?
	my( $fldcode, $maxbytes) = @_;
	my( $tstart,$tend,$id,$idnum,$idloc,$record,$size,$at);
	$tstart= (times)[0];

	$maxbytes= 10000 if ($maxbytes==0);
	$fldcode= 'RETE|' if (!$fldcode);

	CORE::open(ALIB,"<$fbobs/$alib") || die "Can't open $alib";
	CORE::open(AIDX,"<$fbobs/$aidx") || die "Can't open $aidx";
	foreach $id (<>) {
		print "reading $id\n" if $debug;
		$idnum= $id;
		$idnum =~ s/[FP][BF][a-zA-Z]{2}//;
		$idloc = $idnum * $recsize;
		seek(AIDX, $idloc, 0);
	  read(AIDX, $record, $recsize);
	  ($at, $size) = unpack("LL", $record);
		print "index of $id is $at: $size >> seek $idloc\n" if $debug;
		$size= $maxbytes if ($size > $maxbytes);
		seek(ALIB, $at, 0);
		read(ALIB, $data, $size);
		##print $data;
		my(@d) = split(/\n/,$data);
		foreach (@d) {
		  if (index($_,$fldcode) == 0) {
		  	 print OUTF "$_\n";
		  	 ##? instead/also sort by part of RETE ?
		  	 }
			}
		}
	CORE::close(ALIB); 
	CORE::close(AIDX);
	$tend= (times)[0];
	printf STDERR "elapsed time= %.2f seconds\n", $tend - $tstart;
}





1;
.
