modified_ariaoverview.v1.2.pl
modified_ariaoverview.v1.2.pl
—
text/x-perl,
74 KB (76071 bytes)
File contents
#!/usr/local/bin/perl
# Set tab spacing to 4 when viewing this file!
######################
# AUTHOR INFORMATION #
######################
# Problems, questions or suggestions? Contact the author:
#
# Wim Vranken (wim@bri.nrc.ca)
# Biomolecular NMR
# Biotechnology Research Institute
# Montreal
# Canada
##########
# USAGE: #
##########
# aria.overview.pl <options>
#
# Start this script in your <ARIAproject>/run?? directory.
#
# Use aria.overview.pl -h for list of <options>
##########
# OUTPUT #
##########
# Description of output files (default for <output> is 'overview')
#
# <output>.info Info on unambiguous restraints long-range restraints that initially
# had an ambiguous short-range possibility
# Info on number of unambiguous long-range restraints between residues
# Info on not well supported unambiguous long-range restraints.
# Info on number of intra/seq/medium/long range contacts per spectrum
#
# <output).newunambig New unambiguous assignments made by ARIA
#
# <output).newambig New ambiguous assignments made by ARIA
#
# <output>.info.detail Detailed info on long-range possibilities in ambiguous restraints.
#
# <output>.aria Overview of aria output (pdb, violations, ...)
#
# <output>.ps Postscript file displaying condensed restraint information
#
# <output>.restr (optional) Info on long/med/seq/intra contacts per spectrum & per residue
#
# <output>.pdb.xmgr XMGR (now GRACE) graph file displaying information on PDB files
# <output>.restraints.xmgr XMGR (now GRACE) graph file displaying information on restraints
#################
# MODIFICATIONS #
#################
# Kevin Gardner 26/10/2001 - two modifications for more flexible .tbl reading.
# Version 1.1 (06/02/2001)
# Includes modifications to 1.0 by A. Bonvin (18/01/2001)
# (can be set with -nw (print new unamb/amb assignments) option)
# Xmgr graphs more universal format
# Prints number of intra/seq/medium/long range contacts per spectrum to .info
#
# Version 1.2 (13/06/2001)
# Added short violations list sorted by ppm value to .info file
# Added -pr option to print file with number of restraints per residue
# Fixed 'Odd number of elements in hash list' warning on SGI machines.
# Patched count number of intra/seq/med/long based on unambig/ambig,
# program now also distinguishes between these types.
# Contact map now generated (as default) from unambig.tbl and ambig.tbl.
# Can use old way (using spectrum data) with -sp on flag.
# Fixed additional ',[,]' printing problem in .info file
#############################################################
# FOLLOWING ARE OPTIONS FOR PROGRAM #
#############################################################
$helptext .="
-h print help
-f <name> output file name (DEFAULT 'overview')
-n xx number of iterations to check (DEFAULT 8)
-ns xx start number for iterations to check (DEFAULT 0)
-d on/off print details on long-range ambiguous assignments (DEFAULT OFF)
-c on/off print calibration info
-nw on/off print list new unambiguous/unambiguous assignments
-vl on/off print consistent violations for LAST iteration
-va on/off print consistent violations for ALL iterations
(DEFAULT OFF & overrides -vl)
-vc on/off use 'compressed' strong violations format
-r on/off print restraint overviews
-rc on/off print spectra restraint overviews
-p on/off print pdb file energy overview
-pr on/off print number of restraints per residues to .restr file (DEFAULT OFF)
-np xx number of lowest overall energy pdb files for overview
-sp on/off use spectra data instead of ambig/unambig.tbl for
generation of contact map (DEFAULT OFF)
Default is 'on' unless specified.
";
#############################################################
# Main routines
# -------------
# Initialize program
&initialize;
# Get command flag data
&getflags;
# Open output files
&openoutput;
# Get directory names, specify number of iterations.
&getiterationdirs($it_total,$it_start);
# Get spectrum names.
&getspecinfo;
# Get info for restraint set in data directory
&getorigrestr;
# Get information for each iteration
&getiterationinfo;
# Print violation and calibration info per spectrum.
&printspectruminfo;
# Print restraint overview (conditional)
&printrestraintoverview;
# Print pdb file information overview (conditional)
&printpdboverview;
# Print xmgr graphs (conditional)
&printxmgrgraphs;
# Detailed reports
&printdetails;
# Calculate matrices
&calculatematrices;
# Report new unambiguous assignments (conditional)
&reportunambig;
# Report new ambiguous assignments (conditional)
&reportambig;
# Report 'suspect' contacts based on contact map
&reportsuspects;
# Print ps graph.
&printps;
# The end
print "\n\nDone!\n";
################################
# Main routines in above order #
################################
sub initialize {
# Initialize some program variables
# Elements found ARIA output pdb file headers (in order of appearance)...
@PDBinfolist=('E(overall)','E(bonds)','E(angles)','E(improper)','E(vdw)','E(noe)',
'E(cdih)','E(coup)','E(sani)','','rms(bonds)','rms(angles)',
'rms(improper)','rms(noe)','rms(cdih)','rms(coup)','rms(sani)',
'','viol(noe)','viol(cdih)','viol(coup)','viol(sani)');
# XMGR (now GRACE) graph elements that are displayed
@Egraphlist=('E(overall)','E(noe)','E(vdw)','E(cdih)');
@rmsgraphlist=('rms(angles)','rms(improper)','rms(noe)','rms(cdih)');
@violgraphlist=('viol(noe)','viol(cdih)');
# Methyl names for atom name compression.
$methyls="ALA HB|ILE HG2|ILE HD1|LEU HD1|LEU HD2|MET HE|THR HG2|VAL HG1|VAL HG2";
# For title printing
@itname=('Initial iteration','First iteration','Second iteration','Third iteration','Fourth iteration','Fifth iteration','Sixth iteration','Seventh iteration','Eight iteration','Ninth iteration','Tenth iteration');
# Defaults for program run
$outputfile="overview";
$it_start=0;
$it_total=8;
$pdbbest=10;
$violprintall = 'off';
$detailprint = "off";
$newasnprint = 'on';
$printrestperres = 'off';
$graph='on';
$specmatrix='off';
$matrixtype='restraints';
# Identify script name, run date and user
($scriptname,$scriptname_short)=&getscriptname;
($date_time,$date)=&gettime;
($user)=&iduser;
# Get project name and run number
($projectname,$run)=&getprojectinfo;
}
sub getscriptname {
my($sn,$sns);
# Get scriptname with and without path name
$sn=$sns=$0;
$sn=~ s/\/\//\//g;
$sns=~ s/\/.+\///g;
return $sn,$sns;
}
sub getflags {
for ($a=0;$a<=$#ARGV;$a+=2) {
if ($ARGV[$a] eq '-h') {
&print_help;
exit;
}
elsif ($ARGV[$a] eq '-f') {
$outputfile=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-d' && $ARGV[$a+1]=~ /on|off/) {
$detailprint=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-ns' && $ARGV[$a+1]=~ /\d+/) {
$it_start=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-n' && $ARGV[$a+1]=~ /\d+/) {
$it_total=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-c' && $ARGV[$a+1]=~ /on|off/) {
$calibprint=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-nw' && $ARGV[$a+1]=~ /on|off/) {
$newasnprint=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-vl' && $ARGV[$a+1]=~ /on|off/) {
$violprintlast=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-va' && $ARGV[$a+1]=~ /on|off/) {
$violprintall=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-vc' && $ARGV[$a+1]=~ /on|off/) {
$violcompress=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-r' && $ARGV[$a+1]=~ /on|off/) {
$restrover=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-rc' && $ARGV[$a+1]=~ /on|off/) {
$restrover_spec=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-p' && $ARGV[$a+1]=~ /on|off/) {
$pdbover=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-np' && $ARGV[$a+1]=~ /\d+/) {
$pdbbest=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-pr' && $ARGV[$a+1]=~ /on|off/) {
$printrestperres=$ARGV[$a+1];
}
elsif ($ARGV[$a] eq '-sp' && $ARGV[$a+1]=~ /on|off/) {
$specmatrix=$ARGV[$a+1];
if ($specmatrix eq 'on') {
$matrixtype="spectra";
}
}
else {
&print_help;
print "Unrecognized option: $ARGV[$a] with $ARGV[$a+1]. Please check above options...\n";
exit;
}
}
}
sub openoutput {
# Open output file(s)
&openoutputfile("$outputfile.aria",OUTPUT,"$scriptname called by $user on $date.\n\n");
&openoutputfile("$outputfile.info",REPORT,"$scriptname called by $user on $date.\n\n");
if ($newasnprint eq 'on') {
&openoutputfile("$outputfile.newunambig",NEWUNAMBIG,"$scriptname called by $user on $date.\n\n");
&openoutputfile("$outputfile.newambig",NEWAMBIG,"$scriptname called by $user on $date.\n\n");
}
if ($detailprint eq 'on') {
&openoutputfile("$outputfile.info.detail",DETAIL,"$scriptname called by $user on $date.\n\n");
}
if ($printrestperres eq 'on') {
&openoutputfile("$outputfile.restr",RESTPERRES,"$scriptname called by $user on $date.\n\n");
}
}
sub getiterationdirs {
# Gets the existing iteration directories
# so that program still works if some already deleted.
my ($it_nums,$it_start) = @_;
my ($it,$it_hit) = 0;
for ($it=$it_start;$it<=($it_start+$it_nums);$it++) {
$status=opendir (DIR, "structures/it$it");
if ($status == 0) {&send_warning("Not using iteration $it: $!.\n",STDOUT);next;}
@dirlist = readdir DIR ;
closedir DIR;
if ($#dirlist <= 2) {
&send_warning("Directory for iteration $it empty.\n",STDOUT);
next;
}
else {
# Check if ambig and unambig.tbl files generated - if not, skip directory
@ambig = grep(/ambig\.tbl/, @dirlist);
if ($#ambig < 1) {
&send_warning("No ambig.tbl and unambig.tbl file for iteration $it.\n",STDOUT);
next;
}
}
$it_dir[$it_hit]=$it;$it_hit++;
}
# Check if final_data directory in last iteration
$status=opendir (DIR, "structures/it".$it_dir[$#it_dir]."/finaldata");
if ($status == 0) {
&send_warning("No finaldata directory found for iteration ".$it_dir[$#it_dir].".\n",STDOUT);
$it_final=$it_dir[$#it_dir];
}
else {
@dirlist = readdir DIR ;
closedir DIR;
if ($#dirlist < 5) {
&send_warning("Finaldata directory iteration ".$it_dir[$#it_dir]." empty.\n\n",STDOUT);
$it_final=$it_dir[$#it_dir];
}
else {
$it_final=$it_dir[$#it_dir]+1;
$it_dir[$#it_dir+1]=$it_final;
$itname[$it_final]="Final data";
}
}
if (!defined @it_dir) {&send_fatal("No iterations found. Make sure you are in an aria 'run' directory!\n");}
}
sub getspecinfo {
my ($spectot,$i,$j,$it)=0;
my (@dirlist,@spec_list_temp) ="";
print ("Getting spectrum names...\n\n");
$status=open (RUNCNS,"run.cns");
if ($status == 1) {
while (defined ($_=<RUNCNS>)) {
if (/^\{===>\} aspectrum_/) {
s/^\{===>\} aspectrum_\d//;
s/_pro//;
s/=|\"|\;\s*//g;chomp;
if ($_ ne "") {
push(@spec_list_temp,$_);
}
}
elsif (/{===>} assignstruct_/) {
s/{===>} assignstruct_//;s/_pro//;s/ //g;chomp;
$it=$_;$it =~ s/=.*//;
s/\d+=//g;chop;
$strongviol_structurestotal[$it]=$_;
$strongviol_structurestotal[$it+1]=$_;
}
}
}
else {
&send_warning("run.cns file not found: $!.\n",STDOUT);
# What follows is not a very quick routine, but does make sure no
# spectrum is missed (if already deleted somewhere).
for ($i=0;$i<=$#it_dir;$i++) {
$it=$it_dir[$i];
if ($itname[$it] !~ /Final/) {
$filetext="it$it";
}
else {
$filetext="it".($it-1)."/finaldata";
}
opendir (DIR, "structures/$filetext");
@dirlist = grep /^calib_\w+_it\d+.out$|\.tbl$/, readdir DIR ;
closedir DIR;
while ($j <= $#dirlist) {
if ($dirlist[$j] =~ /ambig.tbl$/) {
splice(@dirlist,$j,1);next;
}
elsif ($dirlist[$j] =~ /.tbl$/) {
$dirlist[$j] =~ s/.tbl$//;
}
elsif ($dirlist[$j] =~ /^calib_\w+_it\d+.out$/) {
$dirlist[$j] =~ s/^calib\_//;
$dirlist[$j] =~ s/_it$it.out$//;
}
if ((grep(/^$dirlist[$j]$/,@spec_list_temp))==0) {
push(@spec_list_temp,$dirlist[$j]);
}
$j++;
}
}
}
close RUNCNS;
@spec_list = sort @spec_list_temp;
}
sub getorigrestr {
# Gets the information of the original restraint list(s) from the data directory
# Note that %assignment is not used in rest of program for now.
my ($status,$origres_num) =0;
my (@peakinfo) = "";
print ("\n");
foreach $spectrum (@spec_list) {
$origres_num = 0;
$status=open (ORIGRES,"data/$spectrum/$spectrum.tbl");
if ($status == 0) {&send_warning("No original .tbl file for spectrum $spectrum: $!.\n",STDOUT);next;}
print ("Reading data/$spectrum/$spectrum.tbl...\n");
while (defined ($_=<ORIGRES>)) {
# Skip comment lines
next if (/\s*\!/);
chomp;
# Modification by Kevin Gardner 26/10/2001.
if (/^\s*assi/ || /^\s*ASSI/) {
$origres_num++;$peakbox3=0;$ppm3=0;
$assignment{$spectrum}[$origres_num]=$_."\n"; # read original info
if (/resid/ || /\{/) {
$peakbox1=0;
next;
}
s/assi \( attr store1 \<//;
s/and attr store1 \>//;
($high_ppm,$low_ppm,@void)=split(' ');
$peakbox1=($high_ppm-$low_ppm);
next;
}
elsif (/^\s*OR/) {
$assignment{$spectrum}[$origres_num].=$_."\n";
}
elsif (/^\s*\(/) {
$assignment{$spectrum}[$origres_num].=$_."\n";
if (/resid/ || /\{/) {
$peakbox2=0;
next;
}
s/^\s+\( attr store1 \<//;
s/and attr store1 \>//;
($high_ppm,$low_ppm,@void)=split(' ');
$peakbox2=($high_ppm-$low_ppm);
next;
}
elsif (/^\s+and/) {
$assignment{$spectrum}[$origres_num].=$_."\n";
s/^\s+and bondedto\s+\( attr store1 \<//;
s/and attr store1 \>//;
($high_ppm,$low_ppm,@void)=split(' ');
$peakbox3=($high_ppm-$low_ppm);
$ppm3=($high_ppm+$low_ppm)/2;
next;
}
elsif (/ peak /) {
$peakinfo{$spectrum}[$origres_num]=$_;
if (/weight/) {
( /peak\s+(\S+)/ ) && ( $peaknum=$1 );
( /volume\s+(\S+)/ ) && ( $vol=$1 );
( /ppm1\s+(\S+)/ ) && ( $ppm1=$1 );
( /ppm2\s+(\S+)/ ) && ( $ppm2=$1 );
#This block Copyright 2002 Christian Gunning and Ryan Richter
#Does the following line still work? Above handles change better.
($v0,$v1,$v2,$v3,$peaknum,$v5,$v6,$v7,$vol,$v9,$ppm1,$v11,$ppm2,@void)=split(' ');
}
else {
( /peak\s+(\S+)/ ) && ( $peaknum=$1 );
( /volume\s+(\S+)/ ) && ( $vol=$1 );
( /ppm1\s+(\S+)/ ) && ( $ppm1=$1 );
( /ppm2\s+(\S+)/ ) && ( $ppm2=$1 );
#This block Copyright 2002 Christian Gunning and Ryan Richter
#The following line doesn't handle an nmrview setup:
#($v0,$v1,$v2,$v3,$peaknum,$v5,$vol,$v7,$ppm1,$v9,$ppm2)=split(' ');
}
$origres_ppm1{$spectrum}[$peaknum]=$ppm1;
$origres_ppm2{$spectrum}[$peaknum]=$ppm2;
$origres_ppm3{$spectrum}[$peaknum]=$ppm3;
$origlistid{$spectrum}[$origres_num]=$peaknum;
$origres_vol{$spectrum}[$peaknum]=$vol;
$origres_pb1{$spectrum}[$peaknum]=$peakbox1;
$origres_pb2{$spectrum}[$peaknum]=$peakbox2;
$origres_pb3{$spectrum}[$peaknum]=$peakbox3;
$origres_listid{$spectrum}[$peaknum]=$origres_num;
#$peakinspec{$peaknum.','.$ppm1.','.$ppm2}=$spectrum;
next;
}
}
}
}
sub getiterationinfo {
# Go through the 'normal' iterations. Print errors if files not found.
for ($it=0;$it<=$#it_dir;$it++) {
$it_cur=$it_dir[$it];
if ($itname[$it_cur] !~ /Final/ && $pdbover ne 'off') {
$pdbread[$it_cur]=&getpdbinfo($it_cur);
}
&readitfiles($it_cur);
}
}
sub printspectruminfo {
print "\nWriting spectrum overviews...\n";
foreach $spectrum (@spec_list) {
&printcalibinfo($spectrum) if ($calibprint ne 'off');
&printviolationinfo($spectrum) if ($violprintall ne 'off' || $violprintlast ne 'off');
}
}
sub printrestraintoverview {
if ($restrover ne 'off') {
print "\nWriting restraint overviews...\n";
&print_title("Restraint information overview",OUTPUT);
print OUTPUT ("\n * n/u = restraints not used compared to iteration 0");
print OUTPUT ("\n * ratio = ambiguous restraints/unambiguous restraints");
print OUTPUT ("\n * avg_amb = ambiguous possibilities/ambiguous restraints\n");
for ($it=0;$it<=$#it_dir;$it++) {
$it_cur=$it_dir[$it];
if ($itname[$it_cur] =~ /Final/) {
$it_text="Final data";
}
else {$it_text="Iteration $it_cur";}
&printrestrinfo($it_cur,$it_text);
&printrestrinfo_spec($it_cur) if ($restrover_spec ne 'off');
}
}
}
sub printpdboverview {
if ($pdbover ne 'off') {
print "\nWriting PDB overviews...\n";
&print_title("PDB file energies overview",OUTPUT);
print OUTPUT ("\n * format: minimum ... <average> ... maximum\n");
for ($it=0;$it<=$#it_dir;$it++) {
$it_cur=$it_dir[$it];
next if ($pdbread[$it_cur] == 0 || $itname[$it_cur] =~ /Final/);
&printpdbinfo($it_cur);
}
}
}
sub printxmgrgraphs {
if ($graph eq 'on') {
print "\nWriting XMGR graphs...\n";
&makegraph('restraints');
&makegraph('pdb');
}
}
sub printdetails {
print "\nMaking initial matrices...\n";
# Note that the reportdetails subs are necessary for matrix generation!
&print_title("Overview of type of restraints .",REPORT);
printf REPORT ("\n(ambiguous restraint possibilities divided by total number possibilities for restraint)\n");
foreach $spectrum (@spec_list) {
&reportdetails($spectrum,$it_dir[0],'off');
&reportdetails($spectrum,$it_final,$detailprint);
}
printf REPORT ("\n");
if ($printrestperres eq 'on') {
&printrestperrestotal;
}
}
sub calculatematrices {
print "\nMaking dependent matrices...\n";
# Make and print 'dependent' contact map.
$contact_avg[$it_final]=&makedependentmatrix ($it_final,$contact_all_sum[$it_final],$contact_all_max[$it_final],@contact_all);
if ($it_final != $it_dir[0]) {
$contact_avg[$it_dir[0]]=&makedependentmatrix ($it_dir[0],$contact_all_sum[$it_dir[0]],$contact_all_max[$it_dir[0]],@contact_all);
}
}
sub reportsuspects {
print "\nChecking for suspicious assignments...\n";
# Prints suspected contacts.
foreach $spectrum (@spec_list) {
&print_title("Spectrum: $spectrum",REPORT);
&reportbadass($spectrum,$it_final);
}
}
sub reportunambig{
if ($newasnprint eq 'on') {
print "\nChecking for unambiguous assignments...\n";
# Prints unambiguous contacts.
foreach $spectrum (@spec_list) {
&print_title("Spectrum: $spectrum",NEWUNAMBIG);
&reportunambass($spectrum,$it_final);
}
}
}
sub reportambig{
if ($newasnprint eq 'on') {
print "\nChecking for ambiguous assignments...\n";
# Prints ambiguous contacts.
foreach $spectrum (@spec_list) {
&print_title("Spectrum: $spectrum",NEWAMBIG);
&reportambass($spectrum,$it_final);
}
}
}
sub printps {
print "\nWriting postscript file...\n";
&print_pspage ($outputfile.".ps","Helvetica",$it_dir[0],$itname[$it_dir[0]],$it_final,$itname[$it_final]);
}
####################################
# Subroutines for printing data... #
####################################
sub printrestperrestotal {
my (@type_list) = ('long','med','seq','intra');
print RESTPERRES "All spectra combined\n\n ";
foreach $type (@type_list) {
printf RESTPERRES ("%8s",$type);
}
for $res (1...$num_residues) {
$line = sprintf ("\n%3d",$res);
foreach $type (@type_list) {
$line.=sprintf(" %5.1f ",$contact_info_all{$type}[$res]);
}
print RESTPERRES $line;
}
print RESTPERRES"\n\n";
}
sub printpdbinfo {
my ($it)=@_;
print OUTPUT ("\nIteration $it (on $pdbcount[$it]/$pdball[$it] structures): \n\n");
foreach $name (@PDBinfolist) {
if ($name eq '') {print OUTPUT "\n";next;}
($min,$avg,$max)=&minavgmax($pdb{$name}[$it]);
if ($avg == 0) {
print OUTPUT (" *All values zero for $name.\n");
next;
}
($sd)=&sd($avg,$pdb{$name}[$it]);
if ($name=~ /^E/) {
printf OUTPUT (" *%-15s: %.2f ... <%.2f> ... %.2f [+/- %.2f]\n",
$name,$min,$avg,$max,$sd);
}
elsif ($name=~ /^rms/) {
printf OUTPUT (" *%-13s: %.3f ... <%.3f> ... %.3f [+/- %.3f]\n",
$name,$min,$avg,$max,$sd);
}
elsif ($name=~ /^viol/) {
printf OUTPUT (" *%-12s: %d ... <%.1f> ... %d\n",
$name,$min,$avg,$max);
}
}
print OUTPUT ("\n");
}
sub printrestrinfo {
my ($it,$ittext)=@_;
my ($ratio,$avg_amb)=0;
print OUTPUT ("\n$ittext: \n\n");
if ($restr[$it] == 0) {print OUTPUT " No general info available.\n";return;}
if ($unambig_restr[$it] != 0) {$ratio=$ambig_restr[$it]/$unambig_restr[$it];}
else {$ratio=0;}
if ($ambig_restr[$it] != 0) {$avg_amb=$ambig_restr_poss[$it]/$ambig_restr[$it];}
else {$avg_amb=0;}
if ($restr_tot[0] != 0) {$del=$restr_tot[0]-$ambig_restr[$it]-$unambig_restr[$it];}
else {$del='n/a'};
print OUTPUT (" Unambig ambig n/u ratio ambig_poss avg_amb\n\n");
printf OUTPUT (" %5d %5d %4s) %3.2f %5d %3.1f (%-25s\n\n",
$unambig_restr[$it],$ambig_restr[$it],"(".$del,
$ratio,$ambig_restr_poss[$it],$avg_amb,"Final)");
printf OUTPUT (" (unamb intra %.1f, seq %.1f, med %.1f, long %.1f)\n",$restrcnt{"intraunamb"}[$it],
$restrcnt{"sequnamb"}[$it],$restrcnt{"medunamb"}[$it],$restrcnt{"longunamb"}[$it]);
printf OUTPUT (" (ambig intra %.1f, seq %.1f, med %.1f, long %.1f)\n",$restrcnt{"intraamb"}[$it],
$restrcnt{"seqamb"}[$it],$restrcnt{"medamb"}[$it],$restrcnt{"longamb"}[$it]);
printf OUTPUT (" (all intra %.1f, seq %.1f, med %.1f, long %.1f)\n",$restrcnt{"intra"}[$it],
$restrcnt{"seq"}[$it],$restrcnt{"med"}[$it],$restrcnt{"long"}[$it]);
if ($graph eq 'on') {
$unambig_rest_list.=$it." $unambig_restr[$it]\n";
$ambig_rest_list.=$it." $ambig_restr[$it]\n";
$all_rest_list.=$it." ".($ambig_restr[$it]+$unambig_restr[$it])."\n";
$avg_amb_list.=$it." $avg_amb\n";
$ratio_list.=$it." $ratio\n";
}
}
sub printrestrinfo_spec {
my ($it)=@_;
my ($ratio,$avg_amb)=0;
print OUTPUT ("\n");
foreach $spectrum (@spec_list) {
if ($restr{$spectrum}[$it] == 0) {print OUTPUT (" No info available ($spectrum)\n");next;}
if ($unambig_restr{$spectrum}[$it] != 0) {$ratio=$ambig_restr{$spectrum}[$it]/$unambig_restr{$spectrum}[$it];}
else {$ratio=0;}
if ($ambig_restr{$spectrum}[$it] != 0) {$avg_amb=$ambig_restr_poss{$spectrum}[$it]/$ambig_restr{$spectrum}[$it];}
else {$avg_amb=0;}
if ($restr_tot{$spectrum}[0] != 0) {$del=$restr_tot{$spectrum}[0]-$ambig_restr{$spectrum}[$it]-$unambig_restr{$spectrum}[$it];}
else {$del='n/a'};
printf OUTPUT (" %5d %5d %4s) %3.2f %5d %3.1f (%-25s\n",
$unambig_restr{$spectrum}[$it],
$ambig_restr{$spectrum}[$it],"(".$del,
$ratio,$ambig_restr_poss{$spectrum}[$it],
$avg_amb,$spectrum.")");
if ($graph eq 'on') {
$unambig_rest_list{$spectrum}.=$it." $unambig_restr{$spectrum}[$it]\n";
$ambig_rest_list{$spectrum}.=$it." $ambig_restr{$spectrum}[$it]\n";
$all_rest_list{$spectrum}.=$it." ".($ambig_restr{$spectrum}[$it]+$unambig_restr{$spectrum}[$it])."\n";
$avg_amb_list{$spectrum}.=$it." $avg_amb\n";
$ratio_list{$spectrum}.=$it." $ratio\n";
}
}
print OUTPUT ("\n");
}
sub printcalibinfo {
my ($spectrum)=@_;
&print_title("Calibration analysis for spectrum $spectrum",OUTPUT);
for ($it=0;$it<=$#it_dir;$it++) {
$it_cur=$it_dir[$it];
if ($itname[$it_cur] =~ /Final/) {
$it_text="Final data";
}
else {$it_text="Iteration $it";}
printf OUTPUT ("\n$it_text: %3e %3e.",
$calibinfo{$spectrum}[$it_cur][0],$calibinfo{$spectrum}[$it_cur][1]);
}
print OUTPUT "\n";
}
sub printviolationinfo {
my ($spectrum)=@_;
my ($it,$i,$it_start) =0;
my ($violtot)=0;
my (@violated);
# Prints the violations from the calib file.
# Note that the actual number of structures used for violation analysis
# is defined by 'assignstruct_?_pro=...'! In run.cns.
&print_title("Violation analysis for spectrum $spectrum",OUTPUT);
if ($violprintall eq 'off') {$it_start=$#it_dir;}
else {$it_start = 0;}
for ($i=$it_start;$i<=$#it_dir;$i++) {
undef @viol;
$it=$it_dir[$i];
if ($itname[$it] =~ /Final/) {
$ittext="Final Data";
}
else {
$ittext="Iteration $it";
}
$strongviol_num=0;
if (defined $strongviol{$spectrum}[$it][$strongviol_num]) {
$violtot=$#{$strongviol{$spectrum}[$it]}+1;
print OUTPUT "\n*$ittext: Removed $violtot violated restraints for $spectrum.\n";
print OUTPUT "\nList of consistent violations not used in calculation....\n";
while (defined $strongviol{$spectrum}[$it][$strongviol_num]) {
# Uncomment to print short list of violated peaks to screen.
#print $strongviol{$spectrum}[$it][$strongviol_num].",";
printf OUTPUT ("\n *Peak %-5d [%.2f-%.2f] +++ %.2f (in %d/%d)\n",
$strongviol{$spectrum}[$it][$strongviol_num],
$strongviol_lol{$spectrum}[$it][$strongviol_num],
$strongviol_upl{$spectrum}[$it][$strongviol_num],
$strongviol_rms{$spectrum}[$it][$strongviol_num],
$strongviol_thres{$spectrum}[$it][$strongviol_num],
$strongviol_structurestotal[$it]);
$peak_num=$strongviol{$spectrum}[$it][$strongviol_num];
push (@{$viol[0]},$peak_num);
push (@{$viol[1]},$origres_ppm1{$spectrum}[$peak_num]);
push (@{$viol[2]},$origres_ppm2{$spectrum}[$peak_num]);
foreach $el ('i','j') {
@{$atname{$el}}=split(',',$strongviol{$el}{$spectrum}[$it][$strongviol_num]);
if ($violcompress ne 'off') {
@{$atname{$el}}=&mergeatomnames(@{$atname{$el}});
}
}
for ($line=0;($line<=$#{$atname{'i'}} || $line<=$#{$atname{'j'}});$line++) {
@atcomp_i=split(' ',$atname{'i'}[$line]);
@atcomp_j=split(' ',$atname{'j'}[$line]);
printf OUTPUT (" %3s %-3s %-5s %3s %-3s %-5s\n",
$atcomp_i[1],$atcomp_i[0],$atcomp_i[2],
$atcomp_j[1],$atcomp_j[0],$atcomp_j[2]);
}
$strongviol_num++;
}
# Uncomment to print short list of violated peaks to screen.
#print "\n";
# Routine to print chemical shift values with peak nums (sorted & short format)
for ($x=0;$x<=$#{$viol[0]};$x++) {
for ($y=($x+1);$y<=$#{$viol[0]};$y++) {
if ($viol[1][$x] < $viol[1][$y]) {
for $k (0...2) {
($viol[$k][$x],$viol[$k][$y])=&swap($viol[$k][$x],$viol[$k][$y])
}
}
}
}
&print_title ("Short violation list for spectrum $spectrum",REPORT);
for ($x=0;$x<=$#{$viol[0]};$x++) {
printf REPORT (" *Peak %4d ppm1 %7.2f ppm2 %7.2f\n",$viol[0][$x],$viol[1][$x],$viol[2][$x]);
}
print REPORT "\n";
}
elsif ($calib{$spectrum}[$it]==0) {
print OUTPUT "\n$ittext: No data available.\n";
}
else {print OUTPUT "\n$ittext: No consistent strong violations.\n";}
}
}
sub reportunambass {
my ($spectrum,$it) = @_;
my ($rsdnum1,$rsdnum2);
my ($unamblist,$ambiglist,$longlist_ok,$longlist_check);
# Loop over all peaks (restraints)
for $restr (1...$#{$restr{$spectrum}[$it]}) {
# only look at unambiguous restraints.
next if ($restr{$spectrum}[$it][$restr] != 1);
if ($#{$restr_rsdnum{$spectrum}[$it][$restr][0]} = 1) {
$rsdnum1=$restr_rsdnum{$spectrum}[$it][$restr][0][0];
$rsdnum2=$restr_rsdnum{$spectrum}[$it][$restr][1][0];
$peakid=$restr_topeak{$spectrum}[$it][$restr];
$restr_orig=$peak_torestr{$spectrum}[$it[0]][$peakid];
($res1,$res2)=sort {$a <=> $b} ($rsdnum1,$rsdnum2);
$ppm1=$origres_ppm1{$spectrum}[$peakid];
$ppm2=$origres_ppm2{$spectrum}[$peakid];
$ppm3=$origres_ppm3{$spectrum}[$peakid];
# Report only if it was originally not assigned
if ($restr{$spectrum}[$it[0]][$restr_orig] != 1) {
if ($ppm3 != 0)
{ $unamblist.=sprintf "*Peak $peakid $spectrum (ppm1 $ppm1, ppm2 $ppm2, ppm3 $ppm3):";}
else
{ $unamblist.=sprintf "*Peak $peakid $spectrum (ppm1 $ppm1, ppm2 $ppm2):";};
$unamblist.=sprintf ("%3d %-5s %3s %-5s\n",
$restr_rsdnum{$spectrum}[$it][$restr][0][0],
$restr_atname{$spectrum}[$it][$restr][0][0],
$restr_rsdnum{$spectrum}[$it][$restr][1][0],
$restr_atname{$spectrum}[$it][$restr][1][0]);
}
}
}
&print_title("New unambiguous assignments",NEWUNAMBIG);
print NEWUNAMBIG $unamblist;
}
sub reportambass {
my ($spectrum,$it) = @_;
my ($rsdnum1,$rsdnum2);
my ($unamblist,$amblist,$longlist_ok,$longlist_check);
# Loop over all peaks (restraints)
for $restr (1...$#{$restr{$spectrum}[$it]}) {
# only look at ambiguous restraints.
if ($#{$restr_rsdnum{$spectrum}[$it][$restr][0]} > 1) {
$peakid=$restr_topeak{$spectrum}[$it][$restr];
$restr_orig=$peak_torestr{$spectrum}[$it[0]][$peakid];
($res1,$res2)=sort {$a <=> $b} ($rsdnum1,$rsdnum2);
$ppm1=$origres_ppm1{$spectrum}[$peakid];
$ppm2=$origres_ppm2{$spectrum}[$peakid];
$ppm3=$origres_ppm3{$spectrum}[$peakid];
# Report only if it was originally not assigned
if ($restr{$spectrum}[$it[0]][$restr_orig] != 1) {
$stop=0;
for $k (0...$#{$restr_rsdnum{$spectrum}[$it][$restr][0]}) {
if ($ppm3 != 0)
{ $amblist.=sprintf "*Peak $peakid $spectrum (ppm1 $ppm1, ppm2 $ppm2, ppm3 $ppm3):";}
else
{ $amblist.=sprintf "*Peak $peakid $spectrum (ppm1 $ppm1, ppm2 $ppm2):";};
$amblist.=sprintf ("%3d %-5s %3s %-5s\n",
$restr_rsdnum{$spectrum}[$it][$restr][0][$k],
$restr_atname{$spectrum}[$it][$restr][0][$k],
$restr_rsdnum{$spectrum}[$it][$restr][1][$k],
$restr_atname{$spectrum}[$it][$restr][1][$k]);
}
$amblist.=sprintf ("\n");
}
}
}
&print_title("New ambiguous assignments",NEWAMBIG);
print NEWAMBIG $amblist;
}
sub reportbadass {
my ($spectrum,$it) = @_;
my ($rsdnum1,$rsdnum2);
my ($unamblist,$longlist_ok,$longlist_check);
# Loop over all peaks (restraints)
# In this loop unambiguous long range contacts that had
# an intraresidue or sequential ambiguous possibility are
# detected
for $restr (1...$#{$restr{$spectrum}[$it]}) {
# only look at unambiguous restraints.
next if ($restr{$spectrum}[$it][$restr] != 1);
$rsdnum1=$restr_rsdnum{$spectrum}[$it][$restr][0][0];
$rsdnum2=$restr_rsdnum{$spectrum}[$it][$restr][1][0];
# Detect an unambiguous medium or long range NOE...
if ( (($rsdnum1-$rsdnum2)**2) > 1) {
$peakid=$restr_topeak{$spectrum}[$it][$restr];
$restr_orig=$peak_torestr{$spectrum}[$it[0]][$peakid];
($res1,$res2)=sort {$a <=> $b} ($rsdnum1,$rsdnum2);
# If it was originally ambiguously assigned, check if there
# was a sequential or intraresidue possibility.
if ($restr{$spectrum}[$it[0]][$restr_orig] != 1) {
$stop=0;
for $k (0...$#{$restr_rsdnum{$spectrum}[$it[0]][$restr_orig][0]}) {
$rsdnumt1=$restr_rsdnum{$spectrum}[$it[0]][$restr_orig][0][$k];
$rsdnumt2=$restr_rsdnum{$spectrum}[$it[0]][$restr_orig][1][$k];
if ((($rsdnumt1-$rsdnumt2)**2) <= 1) {
$ppm1=$origres_ppm1{$spectrum}[$peakid];
$ppm2=$origres_ppm2{$spectrum}[$peakid];
$unamblist.=sprintf "\n*Peak $peakid in spectrum $spectrum (ppm1 $ppm1, ppm2 $ppm2)\n";
$unamblist.=sprintf (" Final unambiguous assignment: %3d %-5s %3s %-5s\n",
$restr_rsdnum{$spectrum}[$it][$restr][0][0],
$restr_atname{$spectrum}[$it][$restr][0][0],
$restr_rsdnum{$spectrum}[$it][$restr][1][0],
$restr_atname{$spectrum}[$it][$restr][1][0]);
$unamblist.=sprintf (" Original ambiguous possibility: %3d %-5s %3s %-5s\n",
$rsdnumt1,
$restr_atname{$spectrum}[$it[0]][$restr_orig][0][$k],
$rsdnumt2,
$restr_atname{$spectrum}[$it[0]][$restr_orig][1][$k]);
$stop=1;last;
}
last if ($stop==1);
}
}
}
}
# Here detect restraints that only have assignment possibilities corresponding to
# the more unlikely areas of the contact map
for $restr (1...$#{$restr{$spectrum}[$it]}) {
$contact_maphit=0;$reslist="";
for $k (0...$#{$restr_rsdnum{$spectrum}[$it][$restr][0]}) {
$rsdnum1=$restr_rsdnum{$spectrum}[$it][$restr][0][$k];
$rsdnum2=$restr_rsdnum{$spectrum}[$it][$restr][1][$k];
($res1,$res2) = sort {$a <=> $b} ($rsdnum1,$rsdnum2);
# Sometimes additional line for unambiguous ones - quick fix
# for that here
next if ($res1 eq '' || $res2 eq '');
# Keep track of residue combinations that are one. If so,
# skip this restraint
if ($contact_dep[$it][$res1][$res2] > $contact_avg[$it]) {
$contact_maphit=1;
last;
}
else {
if ($reslist !~ /\[$rsdnum1\,$rsdnum2\]\,/) {
$reslist.="[$rsdnum1,$rsdnum2],";
}
}
}
if ($contact_maphit!=1) {
$peakid=$restr_topeak{$spectrum}[$it][$restr];
$ppm1=$origres_ppm1{$spectrum}[$peakid];
$ppm2=$origres_ppm2{$spectrum}[$peakid];
chop $reslist;
$longlist_check .= sprintf (" * Peak %5d %s (ppm1 %.3f, ppm2 %.3f).\n",$peakid,$reslist,$ppm1,$ppm2);
}
}
&print_title("Unambiguous long range with possible intra or sequential assignment",REPORT);
print REPORT $unamblist;
&print_title("Long range contacts - check",REPORT);
print REPORT "\n".$longlist_check;
}
sub reportdetails {
my ($spectrum,$it,$printing) = @_;
my ($rsdnum1,$rsdnum2,$double_it);
my (@longinfo,%info_count,$count,$res1,$res2)=0;
my (%contact,%contact_info,%info);
my (@contact_res1,@contact_res2,$detaillist,@contact,$type)="";
my (@type_list)=('long','med','seq','intra');
undef @contact;
$distvolmax = $restr_distvolmax{$spectrum}[$it];
#print $distvolmax."\n";
# Loop over all peaks (restraints)
for $restr (1...$#{$restr{$spectrum}[$it]}) {
undef @longinfo;
undef @contact_res1;
undef @contact_res2;
undef %contact;
undef %info_count;
$total=0;
$info_count{'long'}=0;
$addtext="";
if ($it_dir[0] == $it_final) {$double_it=2;}
else {$double_it=1;}
# Loop over all possibilities for spectrum.tbl list!
for $k (0...$#{$restr_rsdnum{$spectrum}[$it][$restr][0]}) {
$rsdnum1=$restr_rsdnum{$spectrum}[$it][$restr][0][$k];
$rsdnum2=$restr_rsdnum{$spectrum}[$it][$restr][1][$k];
$atname2=$restr_atname{$spectrum}[$it][$restr][1][$k];
# Make a multiplication factor for this particular contact (see below where $contact[$it][][] is assigned)
$atname2=~ s/\%//;
chop $atname2;
($res1,$res2) = sort {$a <=> $b} ($rsdnum1,$rsdnum2);
push (@contact_res1,$res1);
push (@contact_res2,$res2);
if ($res2 > $num_residues) {$num_residues=$res2;}
# Long range combinations (more than 3 residues apart)
if ((($rsdnum1-$rsdnum2)**2) > 9) {
$type='long';
$longinfo[$info_count{$type}]=$k;
}
# Sequential
elsif ((($rsdnum1-$rsdnum2)**2) == 1) {
$type='seq';
}
# Intraresidue
elsif (($rsdnum1-$rsdnum2) == 0) {
$type='intra';
}
# Medium range
else {
$type='med';
}
$info_count{$type}++;
$contact{$type}[$rsdnum1]++;
$contact{$type}[$rsdnum2]++;
}
# Calculate results for the matrix. Weighs the contacts on the number of total
# assignment possibilities for a restraint!
$total=$#{$restr_rsdnum{$spectrum}[$it][$restr][0]}+1;
foreach $type (@type_list) {
$info{$type}{$spectrum}[$it]+=$info_count{$type}/$total;
}
# Only run if -sp flag is set to on - default is off. This is an addition
# on 13/06/2001 - now uses unambig/ambig.tbl as default to calculate
# contact matrix
if ($specmatrix eq 'on') {
$int_f=($restr_distvol{$spectrum}[$it][$restr])*5/$distvolmax;
for $l (0...$#contact_res1) {
$res1=$contact_res1[$l];
$res2=$contact_res2[$l];
$contact[$it][$res1][$res2]+=($int_f/$total);
}
}
# Count types of restraint per residue if necessary (only for last iteration read)
if ($printrestperres eq 'on' && $it==$it_final) {
foreach $type (@type_list) {
# Is OK - only have to go to maximal res num found
for $res (1...$num_residues) {
$contact_info{$type}[$res]+=$contact{$type}[$res]/$total;
}
}
}
# Don't look at unambiguous restraints and non-long range in detailed overview.
next if ($restr{$spectrum}[$it][$restr] == 1);
next if ($info_count{'long'} == 0);
# Print out info if long range detected.
$peakid=$restr_topeak{$spectrum}[$it][$restr];
if ($info_count{'long'} != $total) {
$addtext=sprintf("(other possibilities: %d intrares, %d sequential, %d medium range)",
$info_count{'intra'},$info_count{'seq'},$info_count{'med'});
}
$detaillist.=sprintf ("*Peak %d $addtext\n",$peakid);
for $count (0...($info_count{'long'}-1)) {
$rsdnum1=$restr_rsdnum{$spectrum}[$it][$restr][0][$longinfo[$count]];
$atname1=$restr_atname{$spectrum}[$it][$restr][0][$longinfo[$count]];
$rsdnum2=$restr_rsdnum{$spectrum}[$it][$restr][1][$longinfo[$count]];
$atname2=$restr_atname{$spectrum}[$it][$restr][1][$longinfo[$count]];
($res1,$res2) = sort {$a <=> $b} ($rsdnum1,$rsdnum2);
$detaillist.= sprintf (" * Atom %3d %-4s to %3d %-4s (%3d\-%-3d)\n",$rsdnum1,$atname1,$rsdnum2,$atname2,$res1,$res2);
}
$detaillist .= sprintf "\n";
}
printf REPORT "\nDistribution contacts iteration $it, spectrum $spectrum:\n";
printf REPORT "\n Intraresidue: %6.1f",$info{'intra'}{$spectrum}[$it];
printf REPORT "\n Sequential: %6.1f",$info{'seq'}{$spectrum}[$it];
printf REPORT "\n Medium range: %6.1f",$info{'med'}{$spectrum}[$it];
printf REPORT "\n Long range: %6.1f\n",$info{'long'}{$spectrum}[$it];
# Put matrix data in global matrix variables (if -sp flag set to on)
if ($specmatrix eq 'on') {
$k_max = $#{$contact[$it]};
for $k (1...$k_max) {
for $l (1...$#{$contact[$it][$k_max]}) {
next if ($k>$l);
$contact_all[$it][$k][$l]+=$contact[$it][$k][$l]; # for the full overview (all spectra)
$contact_all_sum[$it]+=$contact[$it][$k][$l]; # to get an 'average'
$contact_all_max[$it]=$contact_all[$it][$k][$l] if ($contact_all_max[$it]<$contact_all[$it][$k][$l]);
}
}
}
if ($printrestperres eq 'on' && $it==$it_final) {
print RESTPERRES "Spectrum $spectrum\n\n ";
foreach $type (@type_list) {
printf RESTPERRES ("%8s",$type);
}
for $res (1...$num_residues) {
$line = sprintf ("\n%3d",$res);
foreach $type (@type_list) {
$line.=sprintf(" %5.1f ",$contact_info{$type}[$res]);
#Keep track of total over all spectra
$contact_info_all{$type}[$res]+=$contact_info{$type}[$res];
}
print RESTPERRES $line;
}
print RESTPERRES"\n\n";
}
if ($printing eq 'on') {
# Print the list of peaks
&print_title("Spectrum $spectrum: Long range possibilities in ambiguous restraints.",DETAIL);
print DETAIL "\n";
print DETAIL $detaillist;
}
}
####################################
# Subroutines for reading files... #
####################################
sub getpdbinfo {
my ($it) = @_;
my (@pdblist,$gone,@eflds) = "";
my ($pdbnum,$pdbcount,$pdbfile,$max_value) = 0;
my (%E);
opendir (DIR, "structures/it$it");
@pdblist = grep /\d+.pdb$/, readdir DIR ;
closedir DIR;
$pdball[$it]=$#pdblist+1;
if ($#pdblist < 0) {
&send_warning("No .pdb files found for iteration $it.\n",STDOUT);
return 0;
}
$status=open(PDBFILELIST,"structures/it$it/file.nam");
if ($status==0 && $#pdblist >= $pdbbest) { # Get overall energy info from pdb files if no file.nam
print "\nGetting info from PDB files!\n";
foreach $pdbfile (@pdblist) {
$status=open (PDB,"structures/it$it/$pdbfile");
if ($status == 0) {next;}
while (defined ($_ = <PDB>)) {
if (/^REMARK\s+overall/) {
$_=<PDB>;
s/^REMARK energies://;
s/\s//g;
@eflds=split(',');
$E{$pdbfile}=$eflds[0];
$Ecount++;
if ($Ecount>$pdbbest) {
$max_value=$E{$pdbfile};
$gone=$pdbfile;
foreach $pdbkey (keys %E) {
if ($E{$pdbkey} > $max_value) {
$gone=$pdbkey;
$max_value=$E{$pdbkey};
}
}
delete $E{$gone};
}
last;
}
}
}
undef @pdblist;
@pdblist = keys %E;
}
elsif ($status==1 && $#pdblist >= $pdbbest) {
undef @pdblist;
while (defined ($_=<PDBFILELIST>)) {
next if (!/.pdb/);
chomp;
$pdbcount++;
push (@pdblist,$_) if ($pdbcount <= $pdbbest);
}
}
close PDBFILELIST;
$root_name = $pdblist[0];
$root_name =~ s/_\d+\.pdb$//g;
print ("\nReading .pdb files...\n");
foreach $pdbfile (@pdblist) {
$status=open (PDB,"structures/it$it/$pdbfile");
if ($status == 0) {&sendwarning("Could not open pdb file: $!.\n",STDOUT);next;}
$pdbcount[$it]++;
undef @pdb_infolist;
$pdbfile =~ s/^$root_name\_//;
$pdbfile =~ s/.pdb$//;
while (defined ($_ = <PDB>)) {
if (/^REMARK\s+[overall|bonds|noe]/) {
$_=<PDB>;
s/^REMARK\s+.+://;
s/\s//g;
push(@pdb_infolist,split(','));
#pop(@pdb_infolist);
next;
}
elsif (/^ATOM/) {last;}
}
for $i (0...$#PDBinfolist) {
$pdb{$PDBinfolist[$i]}[$it][$pdbnum]=$pdb_infolist[$i];
}
#uncomment next for list of energies for pdb files
#printf ("$pdbnum %8.2f %8.2f\n",$pdb_infolist[0],$pdb_infolist[5]);
$pdbnum++;
close PDB;
}
return 1;
}
sub getcalibinfo {
# Info here: the list of 'problem' restraints is the same for
# each calibration - can basically skip this once you've done
# it for an iteration... if not just display the info once!
# So reads in data/.../....tbl EACH time anew!!!
my ($it) = @_;
my (@caliblist,@caliblist_sort) = "";
my ($restr_na,$error_before,$noe_end) = 0;
if ($itname[$it] =~ /Final/) {
$final=($it-1).'/finaldata';
$it_text="it".($it-1)."/finaldata/";
$itr=$it-1;
}
else {$final=$it;$it_text="it$it/";$itr=$it;}
print ("\n");
foreach $calib (@spec_list) {
$status=open (CALIB,"structures/it$final/calib_$calib\_it$itr.out");
if ($status == 0) {&send_warning("No $calib\_it$itr.out file for iteration $it_text: $!.\n",STDOUT);return;}
$strongviol_num=0;
$restr_na=0;
print ("Reading it$final/calib_$calib\_it$itr.out...\n");
if ($restr_prob{$calib} != 1) {
&print_title("Spectrum $calib: errors while reading original restraint list",OUTPUT);
}
while (defined ($_ = <CALIB>)) {
if (/reading restraint/ && $restr_prob{$calib} != 1) {
s/\s*reading restraint\s+//g;
chomp;
$restraint=$_;
$_=<CALIB>;
if (!/restraint successfully/) {
$restr_na{$calib}[$restr_na]=$restraint;
$restr_na++;
$peakid=$origlistid{$calib}[$restraint];
$ppm_list=sprintf ("ppm1 %6.2f ppm2 %6.2f",$origres_ppm1{$calib}[$peakid],$origres_ppm2{$calib}[$peakid]);
if ($origres_ppm3{$calib}[$peakid] != 0) {$ppm_list.=sprintf (" ppm3 %6.2f",$origres_ppm3{$calib}[$peakid]);}
if (/NOESET\-ERR/) {
printf OUTPUT (" *Restraint %-5d (No assignment! Peak %-5d $ppm_list)\n",$restraint,$peakid);
}
elsif (/SELRPN\-ERR/) {
printf OUTPUT (" *Restraint %-5d (Selection error! Peak %-5d $ppm_list)\n",$restraint,$peakid);
}
else {
printf OUTPUT (" *Restraint %-5d (Unknown error! Peak %-5d $ppm_list)\n",$restraint,$peakid);
}
}
next;
}
elsif (/NOE\-ERR\: unrecognized command\:/ && $restraint != $error_before && $restr_prob{$calib} != 1) {
$error_before=$restraint;
$peakid=$origlistid{$calib}[$restraint];
printf OUTPUT (" *Error reading after restraint %-5d (Peak %-5d)\n",$restraint,$peakid);
next;
}
elsif (/========== restraint/) {
/\d+/;
$strongviol{$calib}[$it][$strongviol_num]=$&;
$continue=1;$type='';
while ($continue==1) {
$_=<CALIB>;
s/^\s+//;s/\s+$//;s/\s+/ /g; # strip and unify spaces
if (/set-i-atoms/) {
$type='i';
}
elsif (/set-j-atoms/) {
$type='j';
}
elsif (/Violations Above/) {
s/Violations?|Above|Threshold//g;
s/Rms|\,|\=|Target|distance|\(|\)|\-|\+|\///g;
chop $strongviol{'i'}{$calib}[$it][$strongviol_num];
chop $strongviol{'j'}{$calib}[$it][$strongviol_num];
@el=split(' ');
$strongviol_thres{$calib}[$it][$strongviol_num]=$el[0];
$strongviol_rms{$calib}[$it][$strongviol_num]=$el[1];
$strongviol_lol{$calib}[$it][$strongviol_num]=$el[2]-$el[3];
$strongviol_upl{$calib}[$it][$strongviol_num]=$el[2]+$el[4];
$strongviol_num++;
$continue=0;
}
elsif (/\s*\d+\s+/) {
$strongviol{$type}{$calib}[$it][$strongviol_num].=$_."," if ($type =~ /i|j/);
}
}
}
elsif (/CHPR: d\^-6, int/) {
s/CHPR: d\^-6, int\s+//;
@fld=split(' ');
$calibinfo{$calib}[$it][0]=$fld[0];
$calibinfo{$calib}[$it][1]=$fld[2];
next;
}
}
$restr_prob{$calib} = 1;
close CALIB;
}
}
sub getspectblinfo {
my ($it) = @_;
my ($restr,$restr_poss,$line)=0;
if ($itname[$it] =~ /Final/) {
$final=($it-1).'/finaldata';
$it_text="it".($it-1)."/finaldata/";
}
else {$final=$it;$it_text="it$it/";}
print ("\n");
foreach $spectrum (@spec_list) {
$status=open (SPECTBL,"structures/it$final/$spectrum.tbl");
if ($status == 0) {&send_warning("No $spectrum.tbl file for iteration $it: $!.\n",STDOUT);return;}
print ("Reading $it_text$spectrum.tbl...\n");
$unambiguous=0;$line=0;$restr=0;$restr_poss=0;
while (defined ($_ = <SPECTBL>)) {
chomp;
next if (/^\s*\!/);
next if (/^\s*$/);
if (/^ ASSI/) {
$restr++;
$ambig_restr_poss{$spectrum}[$it]+=$restr_poss;
$restr_poss=0;
$line=0;
$unambiguous=1;
$unambig_restr{$spectrum}[$it]++;
}
elsif (/^ OR/) {
$line=0;
$restr_poss++;
if ($restr_poss == 1) {
$ambig_restr_poss{$spectrum}[$it]++;
$unambig_restr{$spectrum}[$it]--;
$ambig_restr{$spectrum}[$it]++;
}
}
elsif (/^\s*\(/) {
# Modification by Kevin Gardner 26/10/2001.
($rsdnum,$atname) = /resid\s+(\d+).+name\s+([\w\%\#]+)[\s\)]/;
$restr_rsdnum{$spectrum}[$it][$restr][$line][$restr_poss]=$rsdnum;
$restr_atname{$spectrum}[$it][$restr][$line][$restr_poss]=$atname;
$atname[$line]=$atname;
$atname[$line]=~ s/\%//;
chop $atname[$line];
$rsdnum[$line]=$rsdnum;
# Each OR 'combination' is stored, not a compressed form where
# information is lost about the possible combinations.
# In this loop the unambiguous or ambiguous status of the
# restraint is checked IF $line==1 and for BOTH lines!
if ($line == 1 && $unambiguous==1) {
for $j (0...($restr_poss-1)) {
if ($restr_rsdnum{$spectrum}[$it][$restr][0][$j] != $rsdnum[0] ||
$restr_rsdnum{$spectrum}[$it][$restr][1][$j] != $rsdnum[1]) {
$unambiguous=0;last;
}
elsif ($restr_atname{$spectrum}[$it][$restr][0][$j] !~ /$atname[0]/ ||
$restr_atname{$spectrum}[$it][$restr][1][$j] !~ /$atname[1]/) {
$unambiguous=0;last;
}
}
$restr{$spectrum}[$it][$restr]=$unambiguous;
}
$line++;
}
elsif (/ peak /) {
# This gets volume of peak for calculation contact matrix. Other option is
# to use upper distance limit.
($v0,$v1,$v2,$v3,$peaknum,$v5,$v6,$v7,$vol,$v9,$ppm1,$v11,$ppm2)=split(' ');
$peak_torestr{$spectrum}[$it][$peaknum]=$restr;
$restr_topeak{$spectrum}[$it][$restr]=$peaknum;
if ($vol >=0) {$restr_distvol{$spectrum}[$it][$restr]=($vol**0.16667);}
else {$restr_distvol{$spectrum}[$it][$restr]=0};
#$restr_intvol{$spectrum}[$it][$restr]=($int**0.16667);
$restr_distvolmax{$spectrum}[$it]=$restr_distvol{$spectrum}[$it][$restr] if ($restr_distvol{$spectrum}[$it][$restr] > $restr_distvolmax{$spectrum}[$it]);
}
}
$restr_tot{$spectrum}[$it]=$unambig_restr{$spectrum}[$it]+$ambig_restr{$spectrum}[$it];
close SPECTBL;
}
}
sub getrestrinfo {
# BADLY WRITTEN ROUTINE! Lotsa duplicate code - should be smoother.
my ($it) = @_;
my ($res)="";
my ($dis)=0;
my (@type_list) = ('long','med','seq','intra');
my ($tempcnt)=-1;
my (@contact_res1,@contact_res2)=0;
my ($contact_weight)=0;
if ($itname[$it] =~ /Final/) {
$final=($it-1).'/finaldata';
$it_text="it".($it-1)."/finaldata/";
}
else {$final=$it;$it_text="it$it/";}
$restr[$it]=open (AMBIG,"structures/it$final/ambig.tbl");
if ($restr[$it] == 0) {&send_warning("No ambig.tbl file for iteration $it_text: $!.\n",STDOUT);return;}
print ("\nReading ".$it_text."ambig.tbl...\n");
while (defined ($_ = <AMBIG>)) {
if (/^ ASSI/) {
# This misses out on last restraint - added quick patch bottom
foreach $type (@type_list) {
$restrcnt{$type}[$it]+=$restrcnttemp{$type}/$tempcnt;
$restrcnt{$type."amb"}[$it]+=$restrcnttemp{$type}/$tempcnt;
$restrcnttemp{$type}=0;
}
# Add to restraint contact matrix, reset, if -sp to off
if ($specmatrix ne 'on') {
$contact_weight/=$tempcnt;
for $l (0...$#contact_res1) {
$res1=$contact_res1[$l];
$res2=$contact_res2[$l];
$contact_all[$it][$res1][$res2]+=$contact_weight;
$contact_all_sum[$it]+=$contact_weight; # to get an 'average'
$contact_all_max[$it]=$contact_all[$it][$res1][$res2] if ($contact_all_max[$it]<$contact_all[$it][$res1][$res2]);
}
}
undef @contact_res1;
undef @contact_res2;
# Keep track of numbers...
$ambig_restr_poss[$it]+=$tempcnt if ($tempcnt != -1);
$ambig_restr[$it]++;
$tempcnt=1;
$res="";
}
elsif (/^ OR/) {
$tempcnt++;
$res="";
}
elsif (/peak/) {
($med,$min,$plus,$v3,$v4,$v5,$wght,$v7,$vol,@void)=split(' ');
# By default, use ($med+($plus-$min)/2)^-6 value times $wght*1000 for 'weight' restraint.
# Can alternatively use upper limit, volume, ...
$contact_weight=(($med+($plus-$min)/2)**-6)*$wght*1000;
}
elsif (/segid/){
s/.+and resid\s+|\s+and name.+\n//g;
# Second line
if ($res ne "") {
# Count long, intra, ...
$restrcnttemp{&checkcontact($res,$_)}++;
# Keeps track of residues for contact map
($res1,$res2) = sort {$a <=> $b} ($res,$_);
push (@contact_res1,$res1);
push (@contact_res2,$res2);
}
# First line
else {
$res=$_;
}
}
}
# Quick patch for counting last restraint.
foreach $type (@type_list) {
$restrcnt{$type}[$it]+=$restrcnttemp{$type}/$tempcnt;
$restrcnt{$type."amb"}[$it]+=$restrcnttemp{$type}/$tempcnt;
$restrcnttemp{$type}=0;
}
# Add to restraint contact matrix, also quick patch.
if ($specmatrix ne 'on') {
$contact_weight/=$tempcnt;
for $l (0...$#contact_res1) {
$res1=$contact_res1[$l];
$res2=$contact_res2[$l];
$contact_all[$it][$res1][$res2]+=$contact_weight;
$contact_all_sum[$it]+=$contact_weight; # to get an 'average'
$contact_all_max[$it]=$contact_all[$it][$res1][$res2] if ($contact_all_max[$it]<$contact_all[$it][$res1][$res2]);
}
}
close AMBIG;
$restr[$it]=open (UNAMBIG,"structures/it$final/unambig.tbl");
if ($restr[$it] == 0) {&send_warning("No unambig.tbl file for iteration $it_text: $!.\n",STDOUT);return;}
print ("Reading ".$it_text."unambig.tbl...\n");
while (defined ($_ = <UNAMBIG>)) {
if (/^ ASSI/) {
$unambig_restr[$it]++;
$res="";
}
elsif (/^ OR/) {
$res="NO";
}
elsif (/segid/ && $res ne "NO"){
s/.+and resid\s+|\s+and name.+\n//g;
if ($res ne "") {
$restrcnttemp{&checkcontact($res,$_)}++;
($res1,$res2) = sort {$a <=> $b} ($res,$_);
}
$res=$_;
}
elsif (/peak/) {
foreach $type (@type_list) {
$restrcnt{$type}[$it]+=$restrcnttemp{$type};
$restrcnt{$type."unamb"}[$it]+=$restrcnttemp{$type};
$restrcnttemp{$type}=0;
}
if ($specmatrix ne 'on') {
($med,$min,$plus,$v3,$v4,$v5,$wght,$v7,$vol,@void)=split(' ');
# By default, use ($med+($plus-$min)/2)^-6 value times $wght*1000 for 'weight' restraint.
# Can alternatively use upper limit, volume, ...
$contact_weight=(($med+($plus-$min)/2)**-6)*$wght*1000;
$contact_all[$it][$res1][$res2]+=$contact_weight;
$contact_all_sum[$it]+=$contact_weight; # to get an 'average'
$contact_all_max[$it]=$contact_all[$it][$res1][$res2] if ($contact_all_max[$it]<$contact_all[$it][$res1][$res2]);
}
}
}
close UNAMBIG;
$restr_tot[$it]=$unambig_restr[$it]+$ambig_restr[$it];
}
sub checkcontact {
my ($res1,$res2)=@_;
$dis=sqrt (($res1-$res2)**2);
if ($dis==0) {
return 'intra';
}
elsif ($dis==1) {
return 'seq';
}
elsif ($dis<=9) {
return 'med';
}
else {
return 'long';
}
}
####################################
# Other scriptspecific subroutines #
####################################
sub readitfiles {
my ($it)=@_;
&print_title ("Reading \l$itname[$it]");
&getcalibinfo($it) if ($calibprint ne 'off' || $violprintall ne 'off' || $violprintlast ne 'off');
&getspectblinfo($it) if ($restrover_spec ne 'off');
&getrestrinfo($it) if ($restrover ne 'off');
}
sub makedependentmatrix {
my ($it,$all,$all_max,@pointer) = @_;
my ($k_max)=$#{$pointer[$it]};
my ($l_max)=$#{$pointer[$it]->[$k_max]};
my ($dep_max,$avg,$factor)=0;
$avg=$all*2/($num_residues**2);
$factor=($all_max/$avg)*10/$num_residues;
$limit_min=$avg/$factor;
for $k (1...$num_residues) {
for $l ($k...$num_residues) {
$poss=9;
$poss-=3 if ($k==$l);
$poss-=3 if ($k==1);
$poss-=3 if ($l==$num_residues);
$poss-=1 if ($l-$k==1);
$poss+=1 if ($l==$num_residues && $k==1);
for $r (($k-1)...($k+1)) {
for $s (($l-1)...($l+1)) {
$contact_dep[$it][$k][$l]+=$pointer[$it]->[$r]->[$s];
}
}
$contact_dep[$it][$k][$l]=($contact_dep[$it][$k][$l])/$poss;
$dep_max = $contact_dep[$it][$k][$l] if ($dep_max < $contact_dep[$it][$k][$l]);
# Determine 'filtered' map on lower half display.
#
# 0 if dep. value smaller than $limit_min (depends on max & min value & num residues).
# 0 if dep. value smaller than $limit_min*2 AND actual value smaller than $limit_min*2
# 1 in all other cases.
if ($contact_dep[$it][$k][$l] >= $limit_min*2) {
$contact_map[$it][$k][$l]=1;
next;
}
else {
if ($contact_dep[$it][$k][$l] < $limit_min) {
$contact_map[$it][$k][$l]=0;
}
elsif ($contact_dep[$it][$k][$l] > $limit_min && $pointer[$it]->[$r]->[$s] < $limit_min*2) {
$contact_map[$it][$k][$l]=0;
}
else {$contact_map[$it][$k][$l]=1;}
}
}
}
$contact_dep_max[$it] = $dep_max;
return $avg;
}
sub print_help {
print "\n ".$scriptname_short." recognizes the following options:\n";
print $helptext;
}
sub mergeatomnames {
my (@atomlist) = @_;
my ($metlist,$metsep,@atfound,$atom,$atfoundlist,$atfoundsep)="";
my ($atcount)=0;
my $sep="";
while (defined ($atomlist[$atcount])) {
$atom = $atomlist[$atcount];
if ($atom =~ /$methyls/) {
# For methyls
if ($atom !~ /$metlist/ || $metlist eq "") {
if ($metlist eq "") {$sep="";}
else {$sep="|";}
$metlist.=$sep.$atom;
chop $metlist;
$atomlist[$atcount] =~ s/\d$/\#/;
}
else {
splice(@atomlist,$atcount,1);
$atcount--;
}
}
else {
# For recurring same name
@atfound=grep (/$atom/,@atomlist);
if ($#atfound > 0) {
splice(@atomlist,$atcount,1);
$atcount--;
}
# For methylenes
elsif ($atom =~ /\d$/) {
chop $atom;
@atfound=grep (/$atom/,@atomlist);
if ($atom =~ /TRP/ && $atom !~ /HB/) {;}
elsif ($#atfound == 1) {
splice(@atomlist,$atcount,1);
$atcount--;
if ($atfoundlist eq "") {$sep="";}
else {$sep="|";}
$atfoundlist.=$sep.$atom;
}
elsif ($atom =~ /$atfoundlist/ && $atfoundlist ne "") {
$atomlist[$atcount] =~ s/\d$/\#/;
}
}
}
$atcount++;
if ($atcount < 0) {
print "Bad ".$peak_num." $el $atcount\n";
}
}
return @atomlist;
}
#######################
# General use subs... #
#######################
sub swap {
my ($swap1,$swap2)=@_;
return $swap2,$swap1;
}
sub print_title {
my ($title,$output) = @_;
if (!defined $output) {$output=STDOUT};
$title_cap=$title="# ".$title." #";
$title_cap =~ s/./#/g;
print $output "\n".$title_cap."\n".$title."\n".$title_cap."\n";
}
sub send_warning {
my ($warning,$output) = @_;
print $output $warning;
return;
}
sub send_fatal {
my ($warning) = @_;
print "Program exiting!\n";
die $warning;
return;
}
sub minavgmax {
my ($arraypointer) = @_;
my ($min,$avg,$max,$num,$tot);
return if ($#{$arraypointer} < 0);
$min = $arraypointer->[0];
for $num ( 0 .. $#{$arraypointer}) {
if ($max < $arraypointer->[$num]) {
$max = $arraypointer->[$num];
}
if ($min > $arraypointer->[$num]) {
$min = $arraypointer->[$num];
}
$tot += $arraypointer->[$num];
}
$avg = $tot / ($#{$arraypointer} + 1);
return ($min,$avg,$max);
}
sub sd {
my ($avg,$arraypointer) = @_;
my ($sd,$num,$sse);
for $num ( 0 .. $#{$arraypointer}) {
$sse += (($arraypointer->[$num])-$avg)**2;
}
if ($#{$arraypointer} !=0) {
$sd = sqrt ($sse/$#{$arraypointer});
}
else {
$sd = sqrt ($sse/($#{$arraypointer}+1));
}
return ($sd);
}
sub gettime {
my ($d,$dt);
# Get time and date
$dt = `date '+%d/%m/%Y at %H:%M:%S'`;
$d = `date '+%d.%m.%Y'`;
chomp $d;chomp $dt;
return $dt,$d;
}
sub iduser {
# ID person calling script...
$_ = `who am i`;
$_ = substr($_,0,index($_," "));
s/^.+\!//;
return $_;
}
sub getprojectinfo {
my ($cur_dir,@dir_comp);
# Get project name info...
$cur_dir=`pwd`;
chomp $cur_dir;
@dir_comp=split(/\//,$cur_dir);
return $dir_comp[($#dir_comp-1)],$dir_comp[$#dir_comp];
}
sub openoutputfile {
my ($file,$handle,$text)=@_;
# Open output file with header line
open ($handle,">$file") || die "Unable to open file $file: $!";
print $handle $text;
}
#######################################
# Subroutines for making xmgr file... #
#######################################
sub makegraph {
my ($type) =@_;
my (@coord,@graph_text,$ydiv,$yadd,$spectrum,$tot_spec,$min,$avg,$max) = 0;
my ($name) ="";
open(GRAPHFILE,">$outputfile.$type.xmgr")|| die "Unable to make xmgr file: $!";
&print_graph_initialize(GRAPHFILE);
if ($type eq 'restraints') {
# Print overall stats. Is fixed.
# Info in lists comes from print_restr subroutines.
chomp $unambig_rest_list;
chomp $ambig_rest_list;
chomp $all_rest_list;
@coord=(0.11,0.44,0.65,0.88);
@graph_text=("Restraint progression",'Iterations (overall)','Number of restraints');
&print_graph('xydydy',GRAPHFILE,0,@coord,@graph_text,'All',$all_rest_list,4,'Unambiguous',$unambig_rest_list,3,'Ambiguous',$ambig_rest_list,2);
@coord=(0.59,0.92,0.65,0.88);
@graph_text=("Relative progression",'Iterations (overall)','Ratio');
&print_graph('xydydy',GRAPHFILE,1,@coord,@graph_text,'Poss amb',$avg_amb_list,2,'Amb/unamb',$ratio_list,100);
# Print graph stats
$tot_spec = $#spec_list+1;
if ($tot_spec > 1 ) {$ydiv=(0.6/$tot_spec);$yadd=0.05;$ysub=0.07;}
else {$ydiv=0.28;$yadd=0.3;$ysub=0.04;}
for $i (0...$#spec_list) {
$spectrum=$spec_list[$i];
@coord=(0.11,0.44,($i*$ydiv)+$yadd,($i*$ydiv)+$ydiv+$yadd-$ysub);
@graph_text=("","Iterations (detail $spectrum)",'Number of restraints');
&print_graph('xydydy',GRAPHFILE,2+($i*2),@coord,@graph_text,'All',$all_rest_list{$spectrum},4,'Unambiguous',$unambig_rest_list{$spectrum},3,'Ambiguous',$ambig_rest_list{$spectrum},2);
@coord=(0.59,0.92,($i*$ydiv)+$yadd,($i*$ydiv)+$ydiv+$yadd-$ysub);
@graph_text=("","Iterations (detail $spectrum)",'Ratio');
&print_graph('xydydy',GRAPHFILE,3+($i*2),@coord,@graph_text,'Poss amb',$avg_amb_list{$spectrum},2,'Amb/unamb',$ratio_list{$spectrum},100);
}
}
if ($type eq 'pdb') {
# No new pdb files for final data - don't print that.
if ($it_dir[$#it_dir]+1==$it_final) {
$it_last=$it_final-1;
}
else {
$it_last=$it_final;
}
if ((($it_last)-3)<0) {$detail_start=0;}
else {$detail_start=$it_last-3;}
@rest=&makegraphpdbdata('xydydy',$it_dir[0],$it_last,@Egraphlist);
@coord=(0.11,0.44,0.65,0.88);
@graph_text=('Energy progression','','Energy');
&print_graph('xydydy',GRAPHFILE,0,@coord,@graph_text,@rest);
@rest=&makegraphpdbdata('xydydy',$detail_start,$it_last,@Egraphlist);
@coord=(0.59,0.92,0.65,0.88);
@graph_text=('Energy progression detail','','Energy');
&print_graph('xydydy',GRAPHFILE,1,@coord,@graph_text,@rest);
@rest=&makegraphpdbdata('xydydy',$it_dir[0],$it_last,@rmsgraphlist);
@coord=(0.11,0.44,0.37,0.55);
@graph_text=('Rms progression','','rms');
&print_graph('xydydy',GRAPHFILE,2,@coord,@graph_text,@rest);
@rest=&makegraphpdbdata('xydydy',$detail_start,$it_last,@rmsgraphlist);
@coord=(0.59,0.92,0.37,0.55);
@graph_text=('Rms progression detail','','rms');
&print_graph('xydydy',GRAPHFILE,3,@coord,@graph_text,@rest);
@rest=&makegraphpdbdata('xydydy',$it_dir[0],$it_last,@violgraphlist);
@coord=(0.11,0.44,0.07,0.25);
@graph_text=('Violations progression','Iterations','Number');
&print_graph('xydydy',GRAPHFILE,4,@coord,@graph_text,@rest);
@rest=&makegraphpdbdata('xydydy',$detail_start,$it_last,@violgraphlist);
@coord=(0.59,0.92,0.07,0.25);
@graph_text=('Violations progression detail','Iterations','Number');
&print_graph('xydydy',GRAPHFILE,5,@coord,@graph_text,@rest);
}
}
sub makegraphpdbdata {
my ($gtype,$start,$stop,@list) = @_;
my (%list);
my (@datalist);
for ($it=$start;$it<=$stop;$it++) {
foreach $name (@list) {
($min,$avg,$max)=&minavgmax($pdb{$name}[$it]);
next if (!defined $avg);
#($sd)=&sd($avg,$pdb{$name}[$it]);
if ($gtype eq 'xydydy') {
$list{$name}.="$it $avg ".($avg-$min)." ".($max-$avg)."\n";
}
elsif ($gtype eq 'xy') {
$list{$name}.="$it $avg \n";
}
}
}
$color=2;
foreach $name (@list) {
chomp $list{$name};
next if $list{$name} eq "";
$short=$name;
$short=~ s/\w+\(//;
$short=~ s/\)$//;
push (@datalist,$short,$list{$name},$color);
$color++;
}
return @datalist;
}
sub print_graph {
my ($graphtype,$graphfile,$graphnum,$xmin,$xmax,$ymin,$ymax,$title,$xlabel,$ylabel,@set) = @_;
my ($xaxislabel,$yaxislabel,$graphname)="";
my ($i)=0;
$graphname="g$graphnum";
$xaxislabel="\@ xaxis label \"$xlabel\"\n" if ($xlabel ne "");
$yaxislabel="\@ yaxis label \"$ylabel\"" if ($ylabel ne "");
$title="\@ title \"$title\"\n" if ($title ne "");
print $graphfile <<GRAPH_TEXT;
@ $graphname on
@ $graphname label off
@ $graphname hidden false
@ $graphname type xy
@ $graphname autoscale type AUTO
@ $graphname fixedpoint off
@ $graphname fixedpoint type 0
@ $graphname fixedpoint xy 0.000000, 0.000000
@ $graphname fixedpoint format general general
@ $graphname fixedpoint prec 2, 2
@ with $graphname
@ world xmin 0
@ world xmax 200
@ world ymin 0
@ world ymax 10000000
@ stack world 0, 0, 0, 0 tick 0, 0, 0, 0
@ view xmin $xmin
@ view xmax $xmax
@ view ymin $ymin
@ view ymax $ymax
@ title font 4
@ title size 0.9
@ title color 1
@ title linewidth 1
@ xaxis tick on
@ xaxis tick major 1
@ xaxis tick minor 0.5
@ xaxis label layout para
@ xaxis label place spec
@ xaxis label place 0.00, 0.03
@ xaxis label char size 0.850000
@ xaxis label font 4
@ xaxis label color 1
@ xaxis label linewidth 1
@ xaxis ticklabel on
@ xaxis ticklabel char size 0.790000
@ xaxis ticklabel font 4
@ xaxis tick major on
@ xaxis tick minor off
@ xaxis tick size 0.500000
@ xaxis tick minor size 0.250000
@ xaxis tick op bottom
@ yaxis tick on
@ yaxis tick major 100
@ yaxis tick minor 50
@ yaxis label layout para
@ yaxis label place auto
@ yaxis label char size 0.800000
@ yaxis label font 4
@ yaxis label color 1
@ yaxis label linewidth 1
@ yaxis ticklabel on
@ yaxis ticklabel char size 0.790000
@ yaxis ticklabel font 4
@ yaxis tick major on
@ yaxis tick minor on
@ yaxis tick size 0.500000
@ yaxis tick minor size 0.250000
@ yaxis tick op left
@ legend char size 0.600000
@ legend font 4
@ legend vgap 2
@ legend hgap 1
@ legend length 2
$title$subtitle$xaxislabel$yaxislabel
@ legend on
GRAPH_TEXT
for ($i=0;$i<=$#set;$i+=3) {
if ($graphtype =~ /xydydy/) {
print $graphfile "\@ s".($i/3)." errorbar length 0.30000\n";
}
print $graphfile "\@ legend x1 ".($xmax-0.15)."\n";
print $graphfile "\@ legend y1 ".($ymax-0.02)."\n";
print $graphfile "\@ legend string ".($i/3)." \"$set[$i]\"\n";
print $graphfile "\@ s".($i/3)." color $set[$i+2]\n";
print $graphfile "\@ s".($i/3)." symbol 2\n";
print $graphfile "\@ s".($i/3)." symbol color $set[$i+2]\n";
print $graphfile "\@ s".($i/3)." symbol fill 1\n";
print $graphfile "\@ s".($i/3)." symbol linewidth 1\n";
print $graphfile "\@ s".($i/3)." symbol size 0.480000\n";
print $graphfile "\@ s".($i/3)." symbol center false\n";
print $graphfile "\@ s".($i/3)." symbol char 0\n";
print $graphfile "\@ s".($i/3)." on\n";
print $graphfile "\@ s".($i/3)." type $graphtype\n";
if ($set[$i+1] ne '') {print $graphfile "$set[$i+1]\n\&\n\@ autoscale\n";}
}
}
sub print_graph_initialize {
my ($graphfile) = @_;
print $graphfile <<GRAPH_TEXT;
@ version 40100
@ page layout free
@ ps linewidth begin 1
@ ps linewidth increment 2
@ hardcopy device 2
@ page 5
@ page inout 5
@ page layout portrait
@ link page off
@ auto redraw on
@ default linestyle 1
@ default linewidth 1
@ default color 1
@ default char size 0.6000000
@ default font 3
@ default font source 0
@ default symbol size 0.0500000
@ with string
@ string on
@ string loctype view
@ string 0.5, 0.96
@ string font 4
@ string just 2
@ string char size 1.1
@ string def "Project: $projectname, $run ($pdbbest lowest E structs)"
GRAPH_TEXT
}
##############################
# Subs for printing PS graph #
##############################
sub print_pspage {
my ($psoutput,$font,$it_firstgraph,$firsttext,$it_secondgraph,$secondtext)=@_;
open (PSFILE,">$psoutput")|| die "Could not open $psoutput: $!";
&print_psheader($font);
&print_psgraphtext("Weighed ambiguous contact map for project: $projectname ($run)",18,60,400);
&print_psgraphtext("(matrices based on $matrixtype)",10,570,100);
&print_psgraph($firsttext,14,150,500,40,390,10,$it_firstgraph);
&print_psgraph($secondtext,14,150,500,420,770,10,$it_secondgraph);
&print_psgraphtext("By user $user on $date.",10,570,630);
&print_pstail();
close PSFILE;
}
sub print_psgraph {
my ($text,$text_font_point,$xmin,$xmax,$ymin,$ymax,$font_point,$it) = @_;
my ($max,$maxl,$minl) =0;
&print_psgraphtext($text,$text_font_point,$xmin-$text_font_point*2,$ymin+($ymax-$ymin)/2);
&print_psgraphbox($xmin,$xmax,$ymin,$ymax);
&print_psyaxis($xmin,$xmax,$ymin,$ymax,$font_point,@{$contact_dep[$it]});
&print_psxaxis($xmin,$xmax,$ymin,$ymax,$font_point,@{$contact_dep[$it]});
($minl,$maxl,$max)=&print_psboxes_upper($xmin,$xmax,$ymin,$ymax,$contact_dep_max[$it],$contact_avg[$it],@{$contact_dep[$it]});
&print_psboxes_lower($xmin,$xmax,$ymin,$ymax,@{$contact_map[$it]});
&print_psgraphtext("(max value $max, lower limit $minl, upper shading limit $maxl)",$font_point,$xmax+$font_point*1.5,$ymin+($ymax-$ymin)/2);
}
sub print_psyaxis {
# Attention! Landscape printing so y axis is x coordinates!
my ($xmin,$xmax,$ymin,$ymax,$font_point,@pointer) = @_;
my ($lineprint,$textprint) ="";
print PSFILE ("%% This part does the residues and x axis ticks\n\n");
$xbu = ($xmax-$xmin)/$num_residues;
$tick_length = $font_point/5;
for ($i=0;$i<$num_residues;$i++) {
$xtext=$xmin+$xbu*($i+0.5);
$xtick=$xmin+$xbu*($i+1);
if ((($i+1)/5) == int (($i+1)/5)) {
$lineprint.=sprintf("%3.2f %3.2f %3.2f %3.2f L\n",$xtick,($ymin-$tick_length*1.5),$xtick,$ymax);
}
else {$lineprint.=sprintf ("%3.2f %3.2f %3.2f %3.2f L\n",$xtick,($ymin-$tick_length),$xtick,$ymin);}
if (($i+1) ==1 || $num_residues <= 40 || ($num_residues > 40 && (($i+1)/5) == int (($i+1)/5))) {
$textprint.=sprintf ("\n%3.2f %3.2f moveto\n",$xtext,$ymin-$font_point*1.5);
$textprint.=sprintf ("(%d) %.1f CenterRot90 Rot90\n(%d) %.1f Print\ngrestore\n",($i+1),$font_point,($i+1),$font_point);
}
}
print PSFILE ("\n0.10 setlinewidth\nCol03\n$lineprint\n");
print PSFILE ("\nCol01\n$textprint\n");
}
sub print_psxaxis {
# Attention! Landscape printing so x axis is y coordinates!
my ($xmin,$xmax,$ymin,$ymax,$font_point,@pointer) = @_;
my ($lineprint,$textprint) ="";
print PSFILE ("%% This part does the residues and y axis ticks\n\n");
$ybu = ($ymax-$ymin)/$num_residues;
$tick_length = $font_point/5;
for ($i=0;$i<$num_residues;$i++) {
$ytext=$ymin+$ybu*($i+0.5);
$ytick=$ymin+$ybu*($i+1);
$text=($i+1)-(int(($i+1)/10)*10);
if ((($i+1)/5) == int (($i+1)/5)) {
$lineprint.=sprintf ("%3.2f %3.2f %3.2f %3.2f L\n",($xmin-$tick_length*1.5),$ytick,$xmax,$ytick);
}
else {$lineprint.=sprintf ("%3.2f %3.2f %3.2f %3.2f L\n",($xmin-$tick_length),$ytick,$xmin,$ytick);}
if ($text == 0) {$text=$i+1;}
if ($i == 0 || $num_residues <= 40 || ($num_residues > 40 && ($text/5) == int ($text/5))) {
$textprint.=sprintf ("\n%3.2f %3.2f moveto\n",$xmin-$font_point,$ytext);
$textprint.=sprintf ("(%d) %.1f CenterRot90 Rot90\n(%d) %.1f Print\ngrestore\n",$text,$font_point,$text,$font_point);
}
}
print PSFILE ("\n0.10 setlinewidth\nCol03\n$lineprint\n");
print PSFILE ("\nCol01\n$textprint\n");
}
sub print_psboxes_upper {
my ($xmin,$xmax,$ymin,$ymax,$max,$avg,@pointer) = @_;
my ($xbu,$ybu,$limit_max,$limit_min,$factor,$scale,$log_min)=0;
$xbu = ($xmax-$xmin)/($num_residues);
$ybu = ($ymax-$ymin)/($num_residues);
if ($max==0) {$max=0.5;}
$factor=($max/$avg)*10/$num_residues;
$limit_max=$max/$factor;
$limit_min=$avg/$factor;
$log_min=log($limit_min+1);
$scale=(log($limit_max+1)-$log_min)/9;
print PSFILE ("\n0.10 setlinewidth\n");
for $k (1...$num_residues) {
for $l ($k...$num_residues) {
$xlow=$xmin + ($xbu*($k-1));
$xhigh=$xmin + ($xbu*$k);
$ylow=$ymin + ($ybu*($l-1));
$yhigh=$ymin + ($ybu*$l);
if ($pointer[$k]->[$l] > $limit_max) {$color=9;}
elsif ($pointer[$k]->[$l] < $limit_min) {next;}
else {$color = sprintf ("%.0f",((log($pointer[$k]->[$l]+1)-$log_min)/$scale));}
print PSFILE ("Col1$color\n");
printf PSFILE ("%5.2f %5.2f %5.2f %5.2f ",$xlow,$ylow,$xlow,$yhigh);
printf PSFILE ("%5.2f %5.2f %5.2f %5.2f Pline4\n",$xhigh,$yhigh,$xhigh,$ylow);
}
}
return ((sprintf "%.2f",$limit_min),(sprintf "%.2f",$limit_max),(sprintf "%.1f",$max));
}
sub print_psboxes_lower {
my ($xmin,$xmax,$ymin,$ymax,@pointer) = @_;
$xbu = ($xmax-$xmin)/$num_residues;
$ybu = ($ymax-$ymin)/$num_residues;
print PSFILE ("\n0.10 setlinewidth\n");
for $k (1...$num_residues) {
for $l (1...($k-1)) {
$xlow=$xmin + ($xbu*($k-1));
$xhigh=$xmin + ($xbu*$k);
$ylow=$ymin + ($ybu*($l-1));
$yhigh=$ymin + ($ybu*($l));
if ($pointer[$l]->[$k] < 1) {next;}
else {$color = 5;}
print PSFILE ("Col0$color\n");
printf PSFILE ("%5.2f %5.2f %5.2f %5.2f ",$xlow,$ylow,$xlow,$yhigh);
printf PSFILE ("%5.2f %5.2f %5.2f %5.2f Pline4\n",$xhigh,$yhigh,$xhigh,$ylow);
}
}
print PSFILE ("\nCol01\n");
}
# Prints the ps header.
sub print_psheader {
my ($font)=@_;
print PSFILE <<END_PS;
%!PS-Adobe-3.0
%%Creator: $user
%%DocumentNeededResources: font $font Symbol
%%BoundingBox: (atend)
%%Pages: 1
%%Title: $scriptname_short graph
%%EndComments
%%BeginProlog
/L { moveto lineto stroke } bind def
/Col { setrgbcolor } bind def
/Col01 {gsave 0.0000 0.0000 0.0000 setrgbcolor } def
/Col02 {gsave 1.0000 1.0000 1.0000 setrgbcolor } def
/Col03 {gsave 0.2000 0.2000 0.2000 setrgbcolor } def
/Col05 {gsave 0.5000 0.5000 0.5000 setrgbcolor } def
/Col09 {gsave 0.9000 0.9000 0.9000 setrgbcolor } def
/Col10 {gsave 1.0000 1.0000 1.0000 setrgbcolor } def
/Col11 {gsave 1.0000 0.9000 0.9000 setrgbcolor } def
/Col12 {gsave 1.0000 0.8000 0.8000 setrgbcolor } def
/Col13 {gsave 1.0000 0.7000 0.7000 setrgbcolor } def
/Col14 {gsave 1.0000 0.6000 0.6000 setrgbcolor } def
/Col15 {gsave 1.0000 0.5000 0.5000 setrgbcolor } def
/Col16 {gsave 1.0000 0.4000 0.4000 setrgbcolor } def
/Col17 {gsave 1.0000 0.3000 0.3000 setrgbcolor } def
/Col18 {gsave 1.0000 0.2000 0.2000 setrgbcolor } def
/Col19 {gsave 1.0000 0.1000 0.1000 setrgbcolor } def
/Poly4 { moveto lineto lineto lineto fill grestore } bind def
/Pline4 { 8 copy Poly4 moveto lineto lineto lineto closepath stroke } bind def
/Print { /$font findfont exch scalefont setfont show } bind def
/Gprint { /Symbol findfont exch scalefont setfont show } bind def
/Center {
dup /$font findfont exch scalefont setfont
exch stringwidth pop -2 div exch -3 div rmoveto
} bind def
/CenterRot90 {
dup /$font findfont exch scalefont setfont
exch stringwidth pop -2 div exch 3 div exch rmoveto
} bind def
/UncenterRot90 {
dup /$font findfont exch scalefont setfont
exch stringwidth } bind def
/Rot90 { gsave currentpoint translate 90 rotate } bind def
%%EndProlog
%%BeginSetup
1 setlinecap 1 setlinejoin 1 setlinewidth 0 setgray [ ] 0 setdash newpath
%%EndSetup
%%Page: 1 1
1 1 scale
0 0 translate
/psoverview save def
closepath
gsave 1.0000 setgray fill grestore
stroke gsave
0.10 setlinewidth
END_PS
}
sub print_psgraphbox {
my ($xmin,$xmax,$ymin,$ymax)=@_;
print PSFILE <<END_PS;
%% This part draws the graph box
0.80 setlinewidth
Col09
$xmin $ymin $xmin $ymax
$xmax $ymax $xmax $ymin Pline4
END_PS
}
sub print_psgraphtext {
my ($Graph_title,$font_point,$x,$y)=@_;
print PSFILE <<END_PS;
%% This part does the text
$x $y moveto
($Graph_title)
$font_point CenterRot90 Rot90
($Graph_title)
$font_point Print
grestore
END_PS
}
sub print_pstail {
print PSFILE <<END_PS;
psoverview restore
showpage
%%Trailer
%%BoundingBox: 0 0 612 792
%%EOF
END_PS
}

ARIA is part of the ELIXIR infrastructure.