Hello all,

Here is the story. 
1) Create (somehow) a structure as IMolecule   (e.g. benzene with alternating
single/double bonds)
2) Run HueckelAromaticityDetector to perceive aromaticity
3) Write the structure into CML
4) Read the structure back from CML into a new IMolecule 
5) Now the new IMolecule has bond orders 1.5 , while the origin one as in 1)
has bond orders 1.0 and 2.0 

This doesn't break isomorphism test and fingerprints, but does break atom
types recognition as per HybridizationStateATMatcher.  In this example, atom
types of the original molecule will be C.sp2,  while in the new molecule will
be Caromatic.sp2.

Not sure this breaks anything else than my atom environments code, but I am
wondering what's the best way to handle the issue.

Attached is JUnit test.

Best regards,
Nina


/**
 * Created on 2007-27-4
 *
 */
package ambit.test;

import java.io.StringReader;
import java.io.StringWriter;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;

import junit.framework.TestCase;

import org.openscience.cdk.Atom;
import org.openscience.cdk.Bond;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.Molecule;
import org.openscience.cdk.aromaticity.HueckelAromaticityDetector;
import org.openscience.cdk.atomtype.HybridizationStateATMatcher;
import org.openscience.cdk.atomtype.IAtomTypeMatcher;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.fingerprint.Fingerprinter;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IAtomType;
import org.openscience.cdk.interfaces.IChemFile;
import org.openscience.cdk.interfaces.IMolecule;
import org.openscience.cdk.io.CMLReader;
import org.openscience.cdk.io.CMLWriter;
import org.openscience.cdk.isomorphism.UniversalIsomorphismTester;
import org.openscience.cdk.templates.MoleculeFactory;
import org.openscience.cdk.tools.HydrogenAdder;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;

/**
 * CML stores aromatic bonds as "A" , when reading back, these bonds get order 
1.5 , regardless that in the original moleucle bonds have alternating 
single/double order.
 * This leads to atom type recognition that differs in the original molecule 
and the one read from CML.  
 * @author Nina Jeliazkova <br>
 *         <b>Modified</b> 2005-4-7
 */
public class cmlTest extends TestCase {


        /**
         * 
         * @return
         */
        protected IMolecule getMolecule() {
                  Molecule mol = new Molecule();
                  Atom a1 = new Atom("C");
                  mol.addAtom(a1);
                  Atom a2 = new Atom("C");
                  mol.addAtom(a2);
                  Atom a3 = new Atom("C");
                  mol.addAtom(a3);
                  Atom a4 = new Atom("C");
                  mol.addAtom(a4);
                  Atom a5 = new Atom("C");
                  mol.addAtom(a5);
                  Atom a6 = new Atom("C");
                  mol.addAtom(a6);
                  Bond b1 = new Bond(a1, a2, 2.0);
                  mol.addBond(b1);
                  Bond b2 = new Bond(a2, a3, 1.0);
                  mol.addBond(b2);
                  Bond b3 = new Bond(a3, a4, 2.0);
                  mol.addBond(b3);
                  Bond b4 = new Bond(a4, a5, 1.0);
                  mol.addBond(b4);
                  Bond b5 = new Bond(a5, a6, 2.0);
                  mol.addBond(b5);
                  Bond b6 = new Bond(a6, a1, 1.0);
                  mol.addBond(b6);
                return mol;
        }

        protected String getCML(IMolecule mol) throws Exception {
                        StringWriter w = new StringWriter();
                        new CMLWriter(w).write(mol);
                        
                        return w.toString();
        }
        public void testRoundtrip() {
                roundtrip(false);
        }
        public void testRoundtripAromaticity() {
                roundtrip(true);
        }       
        public void roundtrip(boolean checkAromaticity) {

                IMolecule origin = getMolecule();
                HydrogenAdder h = new HydrogenAdder();
                
                try {
                        h.addExplicitHydrogensToSatisfyValency(origin);
                        if (checkAromaticity) {
                                
HueckelAromaticityDetector.detectAromaticity(origin);
                        }                       
                        String cml = getCML(origin);
                        /*
                         * Just if you are curious to inspect CML ...
                         * System.out.println(cml); 
                         */
                        

                        /*
                         * Not sure why CMLReader(Reader) is deprecated , with 
StringBufferInputStream now deprecated, 
                         * how do we write the same code without deprecated 
methods?
                         */
                        CMLReader reader = new CMLReader(new StringReader(cml));
                        IChemFile chemFile = 
DefaultChemObjectBuilder.getInstance().newChemFile();
                        reader.read(chemFile);
                        /*
                         * Could we have List<IMolecule> as return type from 
ChemFileManipulator.getAllAtomContainers()  (and similar functions)?
                         * Then type safety will be better supported and there 
will be no need for a cast each (IMolecule) list.get(i) 
                         */
                        List molecules = 
ChemFileManipulator.getAllAtomContainers(chemFile);
                        assertEquals(1,molecules.size());
                        
                        /*  
                         *  Now if we have checkAromaticity enabled, bond 
orders in the molecules.get(0) are 1.5 (aromatic) and bond orders in the 
original molecule are 1.0 and 2.0 respectively.
                         *  This doesn't break isomorphism check and 
fingerprints so far. 
                         */
                        
assertTrue(UniversalIsomorphismTester.isIsomorph(origin,(IAtomContainer)molecules.get(0)));
                        
                        Fingerprinter fp = new Fingerprinter();
                        
assertEquals(fp.getFingerprint(origin),fp.getFingerprint((IAtomContainer)molecules.get(0)));

                        /*
                         * ... but breaks atom types recognition
                         */
                        IAtomTypeMatcher atm = new 
HybridizationStateATMatcher();
                        Hashtable<String,Integer> at = getAtomTypes(atm, 
origin);
                        Hashtable<String,Integer> atnew = getAtomTypes(atm, 
(IMolecule)molecules.get(0));
                        
                        /*
                         * Finally: 
                         * if checkAromaticity == false , C atoms from both 
origin and molecules.get(0) will be recognized as Csp2
                         * if checkAromaticity == true , C atoms from origin 
will be Csp2, but molecules.get(0) will be recognized as Caromatic.sp2
                         * 
                         * TODO remove printlns 
                         */
                        
                        System.out.println("Perceiving 
aromaticity=\t"+checkAromaticity);
                        System.out.println("Atom types from original 
molecule\t"+at);
                        System.out.println("Atom types from molecule read from 
CML\t"+atnew);
                        Enumeration<String> atomtypeNames = at.keys();
                        while (atomtypeNames.hasMoreElements()) {
                                String atomTypename = 
atomtypeNames.nextElement();
                                Integer freq = atnew.get(atomTypename);
                                assertNotNull(freq);
                                assertEquals(freq,at.get(atomTypename));
                        }
                } catch (Exception x) {
                        fail();
                }               
        }
                
                
                protected Hashtable<String,Integer> 
getAtomTypes(IAtomTypeMatcher atm, IMolecule m) throws CDKException {
                
                Hashtable<String,Integer> ht = new Hashtable<String,Integer>();
                // would be nice to have Iterator<IAtom>
                Iterator e = m.atoms();
                while (e.hasNext()) {
                        IAtomType atomType = 
atm.findMatchingAtomType(m,(IAtom)e.next());
                        if (atomType != null) { 
                                Integer freq = 
ht.get(atomType.getAtomTypeName());
                                if (freq == null) 
ht.put(atomType.getAtomTypeName(),new Integer(1));
                                else ht.put(atomType.getAtomTypeName(),new 
Integer(freq.intValue()+1));
                        }                       
                }
                return ht;
                
                }
}
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Cdk-user mailing list
Cdk-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/cdk-user

Reply via email to