|
Hi, I am attaching a program that will do what you want. After entering the file names (one fixed and the other moving), the program asks for anchor atom names, i.e. a few atoms in your ligand that you want to use as reference atoms for computing the alignment. If you do not give any atom names, all atoms of the ligand will be used as the anchor atoms. You can repeat this process to align all of your pdb files to one reference file. |
#!/usr/bin/perl ############################################################################## use warnings; use strict; ############################################################################## # # DESCRIPTION: LSQKAB # AK 3/08 # ## Reference: ## A solution for the best rotation to relate two sets of vectors ## -- Wonfgang Kabsch, Acta Cryst. A32, 922, 1976 # ############################################################################## my ( @R, @RtR, @avector, @mu, @bvector, @U ); ############################################################################## my $line = '-' x 70 . "\n"; print $line; print " Enter reference pdb name: ";
#my $ref_pdb = "1mht.pdb";
my $ref_pdb = <STDIN>;
chomp $ref_pdb;
print " Enter moving pdb name: ";
#my $mov_pdb = "1x1b.pdb";
my $mov_pdb = <STDIN>;
chomp $mov_pdb;
print " Ligand residue chain and number must be same in both files.\n";
print " Enter residue chain and number: [Z 999] ";
#my $chain_num = "Z 999";
my $chain_num = <STDIN>;
chomp $chain_num;
my ( $chain_mol, $num_mol ) = split " ", $chain_num;
print " Enter anchor atoms: (case sensitive) ";
#my $anchor_atoms = "N1 N3 N6 N7 N9 C2' C4' SD CG C";
my $anchor_atoms = <STDIN>;
chomp $anchor_atoms;
unless ($anchor_atoms) {
print " ==> All atoms in the residue '$chain_num' will be used for LSQ.\n";
}
print $line;
print "Reference pdb:\t$ref_pdb\n";
print "Moving pdb:\t$mov_pdb\n";
print "Range:\t$chain_mol-$num_mol\n";
##############################################################################
my %pdb_ref = ReadPDB($ref_pdb);
my %pdb_mov = ReadPDB($mov_pdb);
## Verify that the anchor atoms are present in both files in that residue.
Delete_missing_anchor_atoms();
print "Anchor atoms:\t$anchor_atoms\n";
print $line;
### find centroids
my @center_ref = Center( \%pdb_ref );
my @center_mov = Center( \%pdb_mov );
### shift molecule to origin
Move_to_origin( @center_ref, \%pdb_ref );
Move_to_origin( @center_mov, \%pdb_mov );
Construct_R(); ### r[ij] = sum_atoms(yi * xj)
Construct_RtR(); ### RtR = R-transpose * R
#Print_matrix(\...@r);
#Print_matrix(\...@rtr);
Eigen(@RtR); ### Eigen values (mu) /Eigen vectors (a-vector) of RtR
#print "\nEigen values = @mu\n";
#Print_matrix(\...@avector);
Construct_bvector(); ### bvector = (R * a-vector)/sqrt(eigen value)
#Print_matrix(\...@bvector);
Construct_U(); ### U = Rotation matrix = b-vector * a-vector
Print_matrix( \...@u );
my @translation = Apply_trans( @center_ref, @center_mov, @U );
printf "%12.6f\t", $_ for @translation;
print "\n";
Rotate_moving_pdb(); ### transform the moving structure
##############################################################################
sub Apply_trans {
my ( $cxr, $cyr, $czr, $cxm, $cym, $czm, @mat ) = @_;
my @in = ( $cxm, $cym, $czm );
my @out = ( $cxr, $cyr, $czr );
for ( my $i = 0 ; $i < 3 ; $i++ ) {
for ( my $j = 0 ; $j < 3 ; $j++ ) {
$out[$i] -= $mat[$i][$j] * $in[$j];
}
}
return @out;
}
###############################################################
sub Center {
my $coord = shift;
my ( $sumx, $sumy, $sumz );
my $n = 0;
foreach my $atom ( keys %{$coord} ) {
$sumx += $coord->{$atom}{x};
$sumy += $coord->{$atom}{y};
$sumz += $coord->{$atom}{z};
$n++;
}
$sumx /= $n;
$sumy /= $n;
$sumz /= $n;
return ( $sumx, $sumy, $sumz );
}
###############################################################
sub Construct_R {
my @x;
my @y;
my $n = 0;
foreach my $atom ( keys %pdb_ref ) {
$x[$n][0] = $pdb_mov{$atom}{x};
$x[$n][1] = $pdb_mov{$atom}{y};
$x[$n][2] = $pdb_mov{$atom}{z};
$y[$n][0] = $pdb_ref{$atom}{x};
$y[$n][1] = $pdb_ref{$atom}{y};
$y[$n][2] = $pdb_ref{$atom}{z};
$n++;
}
my $natoms = $n;
for ( my $i = 0 ; $i < 3 ; $i++ ) {
for ( my $j = 0 ; $j < 3 ; $j++ ) {
$R[$i][$j] = 0;
for ( my $n = 0 ; $n < $natoms ; $n++ ) {
$R[$i][$j] += $y[$n][$i] * $x[$n][$j];
}
}
}
}
##############################################################################
sub Construct_bvector {
for ( my $i = 0 ; $i < 3 ; $i++ ) {
for ( my $j = 0 ; $j < 3 ; $j++ ) {
$bvector[$i][$j] = 0;
for ( my $k = 0 ; $k < 3 ; $k++ ) {
$bvector[$i][$j] += $R[$j][$k] * $avector[$i][$k];
}
$bvector[$i][$j] /= sqrt( $mu[$i] );
}
}
}
##############################################################################
sub Construct_RtR {
my @x;
my @y;
my $n = 0;
foreach my $atom ( keys %pdb_ref ) {
$x[$n][0] = $pdb_mov{$atom}{x};
$x[$n][1] = $pdb_mov{$atom}{y};
$x[$n][2] = $pdb_mov{$atom}{z};
$y[$n][0] = $pdb_ref{$atom}{x};
$y[$n][1] = $pdb_ref{$atom}{y};
$y[$n][2] = $pdb_ref{$atom}{z};
$n++;
}
my $natoms = $n;
for ( my $i = 0 ; $i < 3 ; $i++ ) {
for ( my $j = 0 ; $j < 3 ; $j++ ) {
$RtR[$i][$j] = 0;
for ( my $k = 0 ; $k < 3 ; $k++ ) {
my $sum1 = 0;
my $sum2 = 0;
for ( my $n = 0 ; $n < $natoms ; $n++ ) {
$sum1 += $y[$n][$k] * $x[$n][$i];
$sum2 += $y[$n][$k] * $x[$n][$j];
}
$RtR[$i][$j] += $sum1 * $sum2;
}
}
}
}
##############################################################################
sub Construct_U {
for ( my $i = 0 ; $i < 3 ; $i++ ) {
for ( my $j = 0 ; $j < 3 ; $j++ ) {
$U[$i][$j] = 0;
for ( my $k = 0 ; $k < 3 ; $k++ ) {
$U[$i][$j] += $bvector[$k][$i] * $avector[$k][$j];
}
}
}
}
##############################################################################
sub Delete_missing_anchor_atoms {
foreach my $atom ( keys %pdb_ref ) {
unless ( defined $pdb_mov{$atom} ) {
delete $pdb_ref{$atom}{$_} for ( 'x', 'y', 'z' );
delete $pdb_ref{$atom};
}
}
$anchor_atoms = join " ", sort keys %pdb_ref;
}
###############################################################
sub Eigen { ### Jacobi Eigen vector calculation
my @mat = @_;
my @b = my @z = ();
my ( $tresh, $theta, $tau, $t, $sm, $s, $h, $g, $c );
my $mat_dim = scalar @mat;
for ( my $row = 0 ; $row < $mat_dim ; $row++ )
{ #Initialize to the identity matrix.
for ( my $col = 0 ; $col < $mat_dim ; $col++ ) {
$avector[$row][$col] = 0.0;
}
$avector[$row][$row] = 1.0;
}
for ( my $row = 0 ; $row < $mat_dim ; $row++ )
{ # Set $b & $mu = diagonal of $mat
$b[$row] = $mu[$row] = $mat[$row][$row];
$z[$row] = 0.0;
}
### Try out a maximum of 50 iterations.
for ( my $i = 1 ; $i <= 50 ; $i++ ) {
$sm = 0.0;
for ( my $row = 0 ; $row < $mat_dim - 1 ; $row++ )
{ # Sum o .-diagonal elements.
for ( my $col = $row + 1 ; $col < $mat_dim ; $col++ ) {
$sm += abs( $mat[$row][$col] );
}
}
if ( $sm < 1E-6 ) {
##########################################
# sort the eigen values and eigen vectors
for ( my $i = 0 ; $i < $mat_dim - 1 ; $i++ ) {
my $k = $i;
my $p = $mu[$k];
for ( my $j = $i + 1 ; $j < $mat_dim ; $j++ ) {
if ( $mu[$j] >= $p ) {
$k = $j;
$p = $mu[$k];
}
}
if ( $k != $i ) {
$mu[$k] = $mu[$i];
$mu[$i] = $p;
for ( my $j = 0 ; $j < $mat_dim ; $j++ ) {
( $avector[$j][$i], $avector[$j][$k] ) =
( $avector[$j][$k], $avector[$j][$i] );
}
}
}
############################################
@avector = Transpose(@avector);
return;
}
$tresh = ( $i < 4 ) ? 0.2 * $sm / ( $mat_dim**2 ) : 0.0;
for ( my $row = 0 ; $row < $mat_dim - 1 ; $row++ ) {
for ( my $col = $row + 1 ; $col < $mat_dim ; $col++ ) {
$g = 100.0 * abs( $mat[$row][$col] );
#After four sweeps,skip the rotation if the off-diagonal element is small.
if ( $i > 4
&& abs( $mu[$row] ) + $g == abs( $mu[$row] )
&& abs( $mu[$col] ) + $g == abs( $mu[$col] ) )
{
$mat[$row][$col] = 0.0;
}
elsif ( abs( $mat[$row][$col] ) > $tresh ) {
$h = $mu[$col] - $mu[$row];
if ( abs($h) + $g == abs($h) ) {
$t = $mat[$row][$col] / $h; # $t =1/(2theta)
}
else {
$theta = 0.5 * $h / $mat[$row][$col];
$t = 1.0 / ( abs($theta) + sqrt( 1.0 + $theta**2 ) );
$t = -$t if ( $theta < 0.0 );
}
$c = 1.0 / sqrt( 1 + $t**2 );
$s = $t * $c;
$tau = $s / ( 1.0 + $c );
$h = $t * $mat[$row][$col];
$z[$row] -= $h;
$z[$col] += $h;
$mu[$row] -= $h;
$mu[$col] += $h;
$mat[$row][$col] = 0.0;
for ( my $j = 0 ; $j < $row ; $j++ ) {
$g = $mat[$j][$row];
$h = $mat[$j][$col];
$mat[$j][$row] = $g - $s * ( $h + $g * $tau );
$mat[$j][$col] = $h + $s * ( $g - $h * $tau );
}
for ( my $j = $row + 1 ; $j < $col ; $j++ )
{ # Case of rotations p<$j<q .
$g = $mat[$row][$j];
$h = $mat[$j][$col];
$mat[$row][$j] = $g - $s * ( $h + $g * $tau );
$mat[$j][$col] = $h + $s * ( $g - $h * $tau );
}
for ( my $j = $col + 1 ; $j < $mat_dim ; $j++ )
{ # Case of rotations q<$j ¡Ãn .
$g = $mat[$row][$j];
$h = $mat[$col][$j];
$mat[$row][$j] = $g - $s * ( $h + $g * $tau );
$mat[$col][$j] = $h + $s * ( $g - $h * $tau );
}
for ( my $j = 0 ; $j < $mat_dim ; $j++ ) {
$g = $avector[$j][$row];
$h = $avector[$j][$col];
$avector[$j][$row] = $g - $s * ( $h + $g * $tau );
$avector[$j][$col] = $h + $s * ( $g - $h * $tau );
}
}
}
}
for ( my $row = 0 ; $row < $mat_dim ; $row++ ) {
$b[$row] += $z[$row];
$mu[$row] = $b[$row]; # Update $mu with the sum of ta pq
$z[$row] = 0.0; # and reinitialize $z
}
}
}
#############################################################################
sub Move_to_origin {
my ( $sumx, $sumy, $sumz, $coord ) = @_;
foreach my $atom ( keys %{$coord} ) {
$coord->{$atom}{x} -= $sumx;
$coord->{$atom}{y} -= $sumy;
$coord->{$atom}{z} -= $sumz;
}
}
###############################################################
sub Print_matrix {
my $m = shift;
print " Matrix \n";
for ( my $i = 0 ; $i < 3 ; $i++ ) {
for ( my $j = 0 ; $j < 3 ; $j++ ) {
printf "%12.6f\t", $m->[$i][$j];
}
print "\n";
}
}
##############################################################################
sub ReadPDB {
my $file = shift;
-e $file || die " Can't find file $file!\n";
open PDB, $file || die " Can't read file $file!\n";
my %coords;
my $anchor_atoms_defined = $anchor_atoms ? 1 : 0;
while ( my $line = <PDB> ) {
if ( $line =~ /^ATOM/ or $line =~ /^HETATM/ ) {
my $chain = substr( $line, 21, 1 );
my $num = substr( $line, 22, 4 );
if ( $chain eq $chain_mol and $num == $num_mol ) {
my $atom = Trim( substr( $line, 12, 4 ) );
$anchor_atoms .= "$atom " unless $anchor_atoms_defined;
foreach my $a_atom ( split " ", $anchor_atoms ) {
if ( $a_atom eq $atom ) {
$coords{$atom}{x} = substr( $line, 30, 8 );
$coords{$atom}{y} = substr( $line, 38, 8 );
$coords{$atom}{z} = substr( $line, 46, 8 );
}
}
}
}
}
close PDB;
return %coords;
}
#############################################################################
sub Rotate_moving_pdb {
open PDB, $mov_pdb;
my $out = $mov_pdb;
$out =~ s/\.pdb$/-aligned.pdb/;
open OUT, ">$out";
while ( my $line = <PDB> ) {
if ( $line =~ /^ATOM/ or $line =~ /^HETATM/ ) {
my $x = substr( $line, 30, 8 );
my $y = substr( $line, 38, 8 );
my $z = substr( $line, 46, 8 );
my @in = ( $x, $y, $z );
my @out;
for ( my $i = 0 ; $i < 3 ; $i++ ) {
$out[$i] = $translation[$i];
for ( my $j = 0 ; $j < 3 ; $j++ ) {
$out[$i] += $U[$i][$j] * $in[$j];
}
}
$_ = sprintf "%8.3f", $_ for @out;
substr( $line, 30, 8 ) = $out[0];
substr( $line, 38, 8 ) = $out[1];
substr( $line, 46, 8 ) = $out[2];
}
print OUT $line;
}
close PDB;
close OUT;
print $line;
print " The transformed coordinates are written into '$out'\n";
print $line;
}
###############################################################
sub Transpose {
my @mat = @_;
my $dim = @mat;
for ( my $i = 0 ; $i < $dim - 1 ; $i++ ) {
for ( my $j = $i + 1 ; $j < $dim ; $j++ ) {
( $mat[$j][$i], $mat[$i][$j] ) = ( $mat[$i][$j], $mat[$j][$i] );
}
}
return @mat;
}
###############################################################
sub Trim {
my @out = @_ ? @_ : $_;
$_ = join( ' ', split(' ') ) for @out;
return wantarray ? @out : "@out";
}
###############################################################
Thanks Abhinav Abhinav Kumar JCSG @ SSRL, MS 99 2575 Sand Hill Road Menlo Park, CA 945025-7015 Phone: 650 926 2992 Fax: 650 926 3292 On Apr 12, 2009, at 2:49 PM, rui wrote: Hi, All |
