I wrote some code to split a molecule up by its ring systems to make fragments. 
The code is after the signature of this email.

Structure editing was more difficult than I expected. I had a bond that I 
wanted to break, and to create a dummy "*" atom for each new fragment, to 
indicate the type of connection which was broken; "[1*]" means the other atom 
was aliphatic and "[2*]" means the other atom was aromatic.

For example, in "c1ccccc1O" I want to see "c1ccccc1[1*].[2*]O"

The difficulty was that I needed to keep track of the expected atom index of 
newly added atoms. That is:

# Keep track of the size, because that will be the newly added atom's index
n = mol.GetNumAtoms()
emol = Chem.EditableMol(mol)

atom1 = mol.GetAtomWithIdx(atom1_idx)
atom2 = mol.GetAtomWithIdx(atom2_idx)
bond_type = mol.GetBondBetweenAtoms(atom1_idx, atom2_idx).GetBondType()

emol.RemoveBond(atom1_idx, atom2_idx)
add_dummy_atom(emol, n, atom1, atom2, bond_type)
n += 1
add_dummy_atom(emol, n, atom2, atom1, bond_type)
n += 1

where

def add_dummy_atom(emol, new_idx, atom1, atom2, bond_type):
    new_atom = Chem.Atom(0)
    if atom2.GetIsAromatic():
        new_atom.SetMass(2.0)
    else:
        new_atom.SetMass(1.0)
    emol.AddAtom(new_atom)
    emol.AddBond(atom1.GetIdx(), new_idx, bond_type)


Do I need to keep track of the expected atom index myself, or is there a nicer 
way to do this?

My thought was that I could get the atom index after it has been added to the 
EditableMolecule, as in:

def add_dummy_atom(emol, atom1, atom2, bond_type):
    new_atom = Chem.Atom(0)
    if atom2.GetIsAromatic():
        new_atom.SetMass(2.0)
    else:
        new_atom.SetMass(1.0)
    emol.AddAtom(new_atom)
    new_idx = new_atom.GetIdx()   #  <----- This always returns 0
    emol.AddBond(atom1.GetIdx(), new_idx, bond_type)

That does not work. The newly added atom's GetIdx() always returns 0.


                                Andrew
                                [email protected]

# Break a molecule into ring-groups.

# Attachment points are tagged with "[1*]" if the atom it should go to
# is aliphatic, and "[2*]" if the atom it should go to is aromatic.

from rdkit import Chem

# Constructed molecule with different edge conditions.
mol = Chem.MolFromSmiles("c1nccn1C2CC(=S)CCC2(CN)CO")

bonds_to_break = set()
def add_bond(atom1_idx, atom2_idx):
    if atom1_idx < atom2_idx:
        bonds_to_break.add( (atom1_idx, atom2_idx) )
    else:
        bonds_to_break.add( (atom2_idx, atom1_idx) )

# Break bonds between rings which are in different ring systems

separate_ring_groups = Chem.MolFromSmarts("[R]!@[R]")
for atom_indices in mol.GetSubstructMatches(separate_ring_groups):
    add_bond(atom_indices[0], atom_indices[1])

# Break other bonds which connect a side group to a ring system
fragment_pattern = Chem.MolFromSmarts("[R]!@[!R]")

for atom_indices in mol.GetSubstructMatches(fragment_pattern):
    add_bond(atom_indices[0], atom_indices[1])

#print "Going to break bonds", " ".join(str(pair) for pair in bonds_to_break)

# Break the bonds A-B into A-[1*] if B is aliphatic and A-[2*] if B is
# aromatic.

def add_dummy_atom(emol, new_idx, atom1, atom2, bond_type):
    new_atom = Chem.Atom(0)
    if atom2.GetIsAromatic():
        new_atom.SetMass(2.0)
    else:
        new_atom.SetMass(1.0)
    emol.AddAtom(new_atom)
    #print "Making connection from", atom1.GetIdx(), "to", new_idx
    emol.AddBond(atom1.GetIdx(), new_idx, bond_type)

emol = Chem.EditableMol(mol)
# Manually track the size because that's the only way to
# know the index of a newly added atom.
n = mol.GetNumAtoms()
for (atom1_idx, atom2_idx) in bonds_to_break:
    #print "Breaking", atom1_idx, atom2_idx
    atom1 = mol.GetAtomWithIdx(atom1_idx)
    atom2 = mol.GetAtomWithIdx(atom2_idx)
    bond_type = mol.GetBondBetweenAtoms(atom1_idx, atom2_idx).GetBondType()
    emol.RemoveBond(atom1_idx, atom2_idx)
    add_dummy_atom(emol, n, atom1, atom2, bond_type)
    n += 1
    add_dummy_atom(emol, n, atom2, atom1, bond_type)
    n += 1

mol = emol.GetMol()

smiles = Chem.MolToSmiles(mol, isomericSmiles=True)
print "SMILES:", smiles
print "Fragments are:"
for term in smiles.split("."):
    print term


------------------------------------------------------------------------------
Keep Your Developer Skills Current with LearnDevNow!
The most comprehensive online learning library for Microsoft developers
is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3,
Metro Style Apps, more. Free future releases when you subscribe now!
http://p.sf.net/sfu/learndevnow-d2d
_______________________________________________
Rdkit-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to