Hi Christian,
I had a look at my old code and the outcome is actually  intended.
Here's some 'historic background' and explanation regarding the code:

-- RedScaff means that all linking atoms are deleted and the rings
are connected using the ring atoms where trhe linker chain was.
-- Scaff leaves the linker chains at their place so you get what you
(hopefully??) wanted to get.

Now, the snipped I had posted quick and dirty had some additional
lines that force each atom to be a carbon (which is not what you wanted,
as I guess) That code simply has to be commented out to arrive at the
molecular core as  you wanted to have it.

As the script was rather a dirty hack intended to inspire RDKiters,
I now cleaned up at least the raw 'code-clups' and added some doku.
The attached script should do what you expected. If not, then feel free to
adapt it to fit your needs.

@Greg:
Now that some people are interested in the code:
May this script find a save harbour in the Contrib folder?

All the best!
Markus


Christian de Bouillé wrote:
Hi Greg

I have tested the script from
Markus Kossner

Starting string
Oc1ccccc1N1CCN(C(=S)NCc2cc3c(cc2)OCO3)CC1

Scaffold with mode Scaff
c1cc(ccc1)N1CCN(c2cc3OCOc3cc2)CC1

This version with Scaff keeps
the heteroatoms and aromaticity

But the chain between the phenyl and the piperidine
is not kept

Is someone who can help me

There is similar script PyMCS
10 years old

May be someone has a copy

Best Regards
Christian

#!/usr/bin/env python2.6
# encoding: utf-8


import os,sys
from Chem import AllChem as Chem

def flatten(x):
  """flatten(sequence) -> list
  Returns a single, flat list which contains all elements retrieved
  from the sequence and all nested sub-sequences (iterables).
  Examples:
  >>> [1, 2, [3,4], (5,6)]
  [1, 2, [3, 4], (5, 6)]
  >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, MyVector(8,9,10)])
  [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]"""
  result = []
  for el in x:
      if hasattr(el, "__iter__") and not isinstance(el, basestring):
          result.extend(flatten(el))
      else:
          result.append(el)
  return result

def GetFrame(mol,mode='RedScaff'):
  '''return a ganeric molecule defining the reduced scaffold of the
molecule.
  mode can be 'RedScaff' and 'Scaff'. The second mode will turn every atom
to a carbon and every bond to a single bond!'''
  ring=mol.GetRingInfo()
  RingAtoms= flatten(ring.AtomRings())
  NonRingAtoms=[ atom.GetIdx() for atom in mol.GetAtoms() if atom.GetIdx()
not in RingAtoms ]
  RingNeighbors=[]
  Paths=[]
  for NonRingAtom in NonRingAtoms:
      for neighbor in mol.GetAtomWithIdx(NonRingAtom).GetNeighbors():
          if neighbor.GetIdx() in RingAtoms:
              RingNeighbors.append(NonRingAtom)
Paths.append([neighbor.GetIdx(),NonRingAtom]) #The ring Atoms
having a non ring Nieghbor will be the start of a walk
              break
PosLinkers=[x for x in NonRingAtoms if x not in RingNeighbors] #Only these
Atoms are Possible Linkers of two rings
  Framework=[ x for x in RingAtoms ]
  Linkers=[]
  while len(Paths)>0:
      NewPaths=[]
      for P in Paths:
          if P==None:
              print 'ooh, there is still a bug somewere '
          else:
              for neighbor in mol.GetAtomWithIdx(P[-1]).GetNeighbors():
                  if neighbor.GetIdx() not in P:
                      if neighbor.GetIdx() in NonRingAtoms:
                          n=P[:]
                          n.append(neighbor.GetIdx())
                          NewPaths.append(n[:])
                      elif neighbor.GetIdx() in RingAtoms:
                          n=P[:]
                          n.append(neighbor.GetIdx())
                          Linkers.append(n)
                          Framework=Framework+P[:]

      Paths=NewPaths[:]
  #print 'Linkers:',Linkers
  #print 'RingAtoms:',RingAtoms
  if mode=='Scaff':
      Framework=list(set(Framework))
      todel=[]
      NonRingAtoms.sort(reverse=True)
      em=Chem.EditableMol(mol)
      BondsToAdd=[ sorted([i[0],i[-1]]) for i in Linkers ]
      mem=[]
      for i in BondsToAdd:
          if i not in mem:
              em.AddBond(i[0],i[1],Chem.BondType.SINGLE)
              mem.append(i)
      for i in NonRingAtoms:
          todel.append(i)
      for i in todel:
          em.RemoveAtom(i)
      m=em.GetMol()
      return m

  if mode=='RedScaff':
      Framework=list(set(Framework))
      todel=[]
      NonRingAtoms.sort(reverse=True)
      for i in NonRingAtoms:
          if i != None:
              if i not in Framework:
                  todel.append(i)
      em=Chem.EditableMol(mol)
      for i in todel:
          em.RemoveAtom(i)
      m=em.GetMol()
      #===================================#
      #Now do the flattening of atoms and bonds!
#Any heavy atom will become a carbon and any bond will become a single
bond!!        #
      #===================================#
      for atom in m.GetAtoms(): #
          atom.SetAtomicNum(6) #
          atom.SetFormalCharge(0) #
      for bond in m.GetBonds(): #
          bond.SetBondType(Chem.BondType.SINGLE)                 #
      Chem.SanitizeMol(m) #
      #===================================#
      return m

if len(sys.argv) < 2:
  print "No input file provided: Frames.py <file-to-process.sdf>"
  sys.exit(1)

suppl=Chem.SDMolSupplier(sys.argv[1])

FrameDict={}

for mol in suppl:
  if mol == 'None': continue
  try:
      m=GetFrame(mol,mode='RedScaff')
      if FrameDict.has_key(Chem.MolToSmiles(m)):
          FrameDict[Chem.MolToSmiles(m)].append(mol)
      else:
          FrameDict[Chem.MolToSmiles(m)]=[mol,]
  except:
      print '--------------------------------'
      print 'could not process the molecule with the following name:'
      print mol.GetProp('_Name')

counter=0
w=open('frames.smi','w')
d=Chem.SDWriter('frames.sdf')
for key,item in FrameDict.items():
  counter+=1
  #d=Chem.SDWriter(str(counter)+'.sdf')
  for i in item:
      i.SetProp('Scaffold',key)
      i.SetProp('Cluster',str(counter))
      d.write(i)
  print key,len(item)
  w.write(key+'\n')
w.close
print 'number of Frames found: %d' %(counter)



------------------------------------------------------------------------------
Protect Your Site and Customers from Malware Attacks
Learn about various malware tactics and how to avoid them. Understand malware threats, the impact they can have on your business, and how you can protect your company and customers by using code signing.
http://p.sf.net/sfu/oracle-sfdevnl
_______________________________________________
Rdkit-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

#!/usr/bin/python
# encoding: utf-8

#	Jan 2011	(markus kossner)	Cleaned up the code, added some documentation
#	somwhere around Aug 2008	(markus kossner)	created
#    
#    This script extracts the molecular framework for a database of molecules.
#    You can use two modes (hard coded): 
#    - Scaff:	The molecular frame is extracted
#    - RedScaff:	All linking chains between rings are deleted. The rings are directly connected.
#    
#    You can comment in/out the code snippets indicated by the comments 
#    to force each atom of the frame to be a Carbon.
#    
#    Usage: Frames.py <database.sdf>
#    Output: 
#    - sd files containing all molecules belonging to one frame (1.sdf, 2.sdf etc)
#    - frames.smi containing the (caninical) smiles and count of occurrence
#

import os,sys
from Chem import AllChem as Chem

def flatten(x):
    """flatten(sequence) -> list
    Returns a single, flat list which contains all elements retrieved
    from the sequence and all nested sub-sequences (iterables).
    Examples:
    >>> [1, 2, [3,4], (5,6)]
    [1, 2, [3, 4], (5, 6)]
    >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, MyVector(8,9,10)])
    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]"""
    result = []
    for el in x:
        if hasattr(el, "__iter__") and not isinstance(el, basestring):
            result.extend(flatten(el))
        else:
            result.append(el)
    return result


def GetFrame(mol, mode='Scaff'):
	'''return a ganeric molecule defining the reduced scaffold of the input mol.
	mode can be 'Scaff' or 'RedScaff':
	
	Scaff	->	chop off the side chains and return the scaffold
	
	RedScaff	->	remove all linking chains and connect the rings 
	directly at the atoms where the linker was
	'''
	
	ring = mol.GetRingInfo()
	RingAtoms = flatten(ring.AtomRings())
	NonRingAtoms = [ atom.GetIdx() for atom in mol.GetAtoms() if atom.GetIdx() not in RingAtoms ]
	RingNeighbors = []
	Paths = []
	for NonRingAtom in NonRingAtoms:
		for neighbor in mol.GetAtomWithIdx(NonRingAtom).GetNeighbors():
			if neighbor.GetIdx() in RingAtoms:
				RingNeighbors.append(NonRingAtom)
				Paths.append([neighbor.GetIdx(),NonRingAtom]) #The ring Atoms having a non ring Nieghbor will be the start of a walk
				break
	PosConnectors = [x for x in NonRingAtoms if x not in RingNeighbors] #Only these Atoms are potential starting points of a Linker chain
	#print 'PosConnectors:'
	#print PosConnectors	
	Framework = [ x for x in RingAtoms ]
	#Start a list of pathways which we will have to walk 
	#print 'Path atoms:'
	#print Paths
	Linkers = []
	while len(Paths)>0:
		NewPaths = []
		for P in Paths:
			if P == None:
				print 'ooh'
			else:
				for neighbor in mol.GetAtomWithIdx(P[-1]).GetNeighbors():
					if neighbor.GetIdx() not in P:
						if neighbor.GetIdx() in NonRingAtoms:
							n = P[:]
							n.append(neighbor.GetIdx())
							NewPaths.append(n[:])
						elif neighbor.GetIdx() in RingAtoms:
							#print 'adding the following path to Framework:'
							#print P
							n = P[:]
							n.append(neighbor.GetIdx())
							Linkers.append(n)
							Framework=Framework+P[:]

		Paths = NewPaths[:]
	#print 'Linkers:',Linkers
	#print 'RingAtoms:',RingAtoms
	#em.AddBond(3,4,Chem.BondType.SINGLE)
	if mode == 'RedScaff':
		Framework = list(set(Framework))
		todel = []
		NonRingAtoms.sort(reverse=True)
		em = Chem.EditableMol(mol)
		BondsToAdd = [ sorted([i[0],i[-1]]) for i in Linkers ]
		mem = []
		for i in BondsToAdd:
			if i not in mem:
				em.AddBond(i[0],i[1],Chem.BondType.SINGLE)
				mem.append(i)
		for i in NonRingAtoms:
			todel.append(i)
		for i in todel:
			em.RemoveAtom(i)
		m = em.GetMol()
		#===================================#
		#  Now do the flattening of atoms and bonds!
		#  Any heavy atom will become a carbon and any bond will become a single bond!	#
		#===================================#
#		for atom in m.GetAtoms():                                                 #
#			atom.SetAtomicNum(6)                                                    #
#			atom.SetFormalCharge(0)                                                #
#		for bond in m.GetBonds():                                                   #
#			bond.SetBondType(Chem.BondType.SINGLE)                 #
#		Chem.SanitizeMol(m)                                                          #
		#===================================#
		return m

	if mode == 'Scaff':
		Framework = list(set(Framework))
		todel = []
		NonRingAtoms.sort(reverse=True)
		for i in NonRingAtoms:
			if i != None:
				if i not in Framework:
					todel.append(i)
		em = Chem.EditableMol(mol)
		for i in todel:
			em.RemoveAtom(i)
		m = em.GetMol()
		#===================================#
		#  Now do the flattening of atoms and bonds!
		#  Any heavy atom will become a carbon and any bond will become a single bond!!		#
		#===================================#
#		for atom in m.GetAtoms():                                                 #
#			atom.SetAtomicNum(6)                                                    #
#			atom.SetFormalCharge(0)                                                #
#		for bond in m.GetBonds():                                                   #
#			bond.SetBondType(Chem.BondType.SINGLE)                 #
#		Chem.SanitizeMol(m)                                                          #
		#===================================#
		return m

if __name__=='__main__':
	if len(sys.argv) < 2:
		print "No input file provided: Frames.py filetosprocess.ext"
		sys.exit(1)


	suppl = Chem.SDMolSupplier(sys.argv[1])
	FrameDict = {}

	for mol in suppl:
		m = GetFrame(mol)
		cansmiles = Chem.MolToSmiles(m, isomericSmiles=True)
		if FrameDict.has_key(cansmiles):
			FrameDict[cansmiles].append(mol)
		else:
			FrameDict[cansmiles]=[mol,]

	counter=0
	w=open('frames.smi','w')
	for key,item in FrameDict.items():
		counter+=1
		d=Chem.SDWriter(str(counter)+'.sdf')
		for i in item:
			i.SetProp('Scaffold',key)
			i.SetProp('Cluster',str(counter))
			d.write(i)
		print key,len(item)
		w.write(key+'\t'+str(len(item))+'\n')
	w.close
	print 'number of Clusters: %d' %(counter)
------------------------------------------------------------------------------
Protect Your Site and Customers from Malware Attacks
Learn about various malware tactics and how to avoid them. Understand 
malware threats, the impact they can have on your business, and how you 
can protect your company and customers by using code signing.
http://p.sf.net/sfu/oracle-sfdevnl
_______________________________________________
Rdkit-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to