#!/usr/local/bin/perl
# sfsectionlib.pl
#
# generate  subsection libs for use w/ fasta
#

###  Server specific vars
$tmpdir= "/tmp/";  # this temp must lie in Gopher kingdom (below $gopath)

$SRSROOT	= "/b4/srs";   ## path to SRS software
#$SRSROOT	= "/b6/tmp/srst";
$subpath	= "$SRSROOT/tmp/";  # path to where .indices & .names files are
$subsetlib	= "srslib$$";     # temp name

#
# SRS Databanks to use
#

@nadatabanks=('genbank','gbnew', 'gbest');  
$nadatalabel= "GenBank, Genbank EST & GenBank updates";
 
#@nadatabanks=('genbank');  ## add 'gbest'
#$nadatalabel= "GenBank";

@aadatabanks=('pir','swissprot', 'swissnew'); 
$aadatalabel= "PIR, SwissProt & SwissProt updates";

#
# SRS queries to generate predefined subset libraries
#
# format is 'data-field=srs-query=index-file-name=description'

@queries = ();
push(@queries, 'org=arabidopsis=arabidopsis');
push(@queries, 'org=caenorhabditis=caenorhabditis');
push(@queries, 'org=drosophila=drosophila=fruit fly');
#push(@queries, 'org=homo sapiens=human=human'); ## too big! ~2/3 of genbank
push(@queries, 'org=oryza=oryza=rice');  
push(@queries, 'org=poaceae=poaceae=wheat,corn,rice and related'); 
push(@queries, 'org=rattus=rattus=rat');
push(@queries, 'org=murinae=murinae=rat,mouse and related');
push(@queries, 'org=saccharomyces=saccharomyces=yeast');


### external subroutines

#require "getopts.pl";
require "ctime.pl";

###  static vars
$p = "'";
$q = '"';
$doqueries = 0;
$isamino= 0;
$progname= 'sfsectionlib.pl';
$wc = 'wc'; ## local wordcount function

# write counts for html dialog in this form:
#    6415  drosophila  <input type="radio" NAME="sublib" VALUE="drosophila-na">

$outfmt= "%7d %-15s <input type=radio NAME=sublib VALUE=%s> %s\n";
$tabletop = "<pre>\nentries  section from $datalabel\n";
$tableend = "</pre>\n";

##
## MAIN
##
 
### read command-line
##&Getopts('ah:q');  --DAMN THIS IS BOMBING w/ Perl5 ! ! !

$nocommand= 1;
foreach $i ( 0 .. $#ARGV) {
  $_= $ARGV[$i];
  if (/^-a/) { $opt_a= 1; $nocommand= 0; }
  elsif (/^-h/) { $opt_h= $ARGV[$i+1]; $i++; $nocommand= 0; }
  elsif (/^-q/) { $opt_q= 1; $nocommand= 0; }
}

if ($opt_q) { $doqueries = 1; }
if ($opt_a) { $isamino= 1; }
&usage() if $nocommand;

#######

#
# Do analysis
#

if ($isamino) {
	@databanks= @aadatabanks;
	$datalabel = $aadatalabel;
	$tabsec= "aa";  
	}
else {
	@databanks= @nadatabanks;
	$datalabel = $nadatalabel; 
  $tabsec= "na";  
	}

if ($doqueries) {
	# ! 1st remove old indices, as getz -fse will APPEND to any current indices
	system( "/bin/rm $subpath/*-$tabsec.indices");
	system( "/bin/rm $subpath/*-$tabsec.names");
	}

	
	
@htable= ();
foreach $qry (@queries) {

 	$srsq= ''; $desc=''; $filen=''; $query=''; $field='';
	($field,$query,$filen, $desc) = split(/=/,$qry);
	$srsq = "-l $p" . join(' ',@databanks) . "$p [SQ-$field:$query*]";
	$subsetlib= $filen . '-' . $tabsec;  
  
  if ($doqueries) { &SRSQuery( $srsq, $subsetlib, "/dev/null"); }
  
  $subfile= "$subpath$subsetlib.indices";
  $indexcount = `$wc $subfile`;
  if ($indexcount > 0) {
  	push(@htable, sprintf( $outfmt, $indexcount, $filen, $subsetlib, $desc));
  	}
}
  
($def,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
    $atime,$mtime,$ctime,$blksize,$blocks) = stat($subfile);
$subfiletime= &ctime($mtime);


if ($opt_h) {
	#print STDERR "edit file $opt_h\n";
	$htemp = "$tmpdir/hf$$";
	open(HF, $opt_h) || die "Can't open $opt_h";
	open(OUT, "> $htemp") || die "Can't create $htemp";
	$doout= 1;
	while (<HF>) {
		#print STDERR $_; #debug
		if ($doout) { print OUT $_; }
		if (/<\!$tabsec-table-start>/) { 
		$doout= 0; 
		##print OUT $tabletop, @htable; 
		print OUT "<pre>\nentries  section from $datalabel \n";
		print OUT @htable;
		}

		elsif (/<\!$tabsec-table-end>/) { 
		$doout= 1; 
		print OUT "Subset indices updated on $subfiletime";
	  	print OUT $tableend, $_; 
	  	}
		}
	close(OUT);
	close(HF);
	system("/bin/mv $htemp $opt_h");
	}
else {
	print @htable;
	}


exit(0);

#-------------------------------

sub usage
{
	print <<TEOF
$progname [-a] [-q] [-h sectionlib.html]
     -a   generate Amino Acid library sections (Nucleic is default)
     -q   perform the SRS queries
     -h sectionlib.html   store descriptions of the sections into
           the section library html document
TEOF
;
	exit(0);
}

sub SRSQuery {

	local($params, $subsetlib, $outfile) = @_;
	$srsapp= "getz"; # or "wgetz" ??
	
	$ENV{'SRSROOT'} = $SRSROOT;
	
	# find out operating system and define a symbolic name
	$OS = `uname`;
	if ($OS == 'SunOS' && `uname -r` =~ /^[56].*/) { $OS = 'Solaris'; }
	
	$_= $OS;
	OSCASE: {
		if (/AIX/)    { $OS_SPECIFIC = "aix"; last OSCASE; }
		if (/HP-UX/)  { $OS_SPECIFIC = "hpux"; last OSCASE; }
		if (/IRIX/)   { $OS_SPECIFIC = "irix"; last OSCASE; }
		if (/IRIX64/) { $OS_SPECIFIC = "irix64"; last OSCASE; }
		if (/IRIX/)   { $OS_SPECIFIC = "irix"; last OSCASE; }
		if (/SunOS/)  { $OS_SPECIFIC = "sunos"; last OSCASE; }
		if (/Solaris/){ $OS_SPECIFIC = "solaris"; last OSCASE; }
		if (/OSF1/)   { $OS_SPECIFIC = "osf"; last OSCASE; }
		if (/ULTRIX/) { $OS_SPECIFIC = "ultrix"; last OSCASE; }
		die "Unsupported operating system: $OS";
	}
	
	# SRS internal directories
	
	$ENV{'SRSDAT'} = "$SRSROOT/etc";
	$ENV{'SRSSDL'} = "$SRSROOT/odd";
	$ENV{'SRSSOU'} = "$SRSROOT/src";
	$ENV{'SRSCOM'} = "$SRSROOT/etc";
	$ENV{'SRSETC'} = "$SRSROOT/etc";
	$ENV{'SRSWWW'} = "$SRSROOT/www";
	$ENV{'SRSWWWTMP'} = "$SRSROOT/www/tmp";
	$ENV{'SRSINX'} = "$SRSROOT/index";
	$ENV{'SRSSEC'} = "$SRSROOT/etc/$OS_SPECIFIC";
	$SRSEXE = "$SRSROOT/bin/$OS_SPECIFIC";
	$ENV{'SRSEXE'} = "$SRSEXE";
	$ENV{'SRSTEMP'} = $subpath;

	# run getz 
	#set noglob = turn off file name expansion...[]!
	$cmd= "set noglob; nice $SRSEXE/$srsapp -fse $subsetlib $params > $outfile";
	print("$cmd\n");
	system("$cmd");
}


# try wgetz instead of getz...  this is input form for wgetz -
# libList=GENBANK&retrieveFieldName1=Organism&retrieveStr1=drosophilidae
# &retrieveFieldName2=AllText&retrieveStr2=esterase&retrieveFieldName3=AllText
# &retrieveStr3=
# &retrieveFieldName4=AllText&retrieveStr4=&makeWild=on&operator=AND
# &userId%7C%7CdoQuery=810692647_129.79.18.11%7C%7Cyes&seqFormat=PIR
# &entryType=entries&listEntriesChunkSize=50&viewEntriesChunkSize=10

.
