Thanks a lot! -- Gustavo Seabra.
On Fri, Jul 23, 2021 at 12:18 PM Paolo Tosco <paolo.tosco.m...@gmail.com> wrote: > Hi Gustavo, > > Chem.Atom.HasQuery() and Chem.Bond.HasQuery() return True when the > underlying atom (or bond) is an instance of Chem.QueryAtom (or Chem. > QueryBond). > Query atoms and bonds can either be defined through SMARTS expressions... > > from rdkit import Chem > from rdkit.Chem import rdqueries > > a = Chem.Atom(6) > a.HasQuery() > False > > mol = Chem.MolFromSmarts("[+1;D3]") > qa_from_smarts = mol.GetAtomWithIdx(0) > qa_from_smarts.HasQuery() > True > > qa_from_smarts.DescribeQuery() > 'AtomAnd\n AtomFormalCharge 1 = val\n AtomExplicitDegree 3 = val\n' > > ...or be directly instantiated from Python and combined at your leisure; > through this approach you can actually define very specific queries that > may not be possible to describe with SMARTS. > Below I show how to construct the same query atom as from the above SMARTS > expression: > > qa = rdqueries.FormalChargeEqualsQueryAtom(1) > qa > <rdkit.Chem.rdchem.QueryAtom at 0x7fe938ac2c60> > > qa.HasQuery() > True > > qa.DescribeQuery() > 'AtomFormalCharge 1 = val\n' > > qa2 = rdqueries.ExplicitDegreeEqualsQueryAtom(3) > qa2.DescribeQuery() > 'AtomExplicitDegree 3 = val\n' > > qa.ExpandQuery(qa2) > qa.DescribeQuery() > 'AtomAnd\n AtomFormalCharge 1 = val\n AtomExplicitDegree 3 = val\n' > > Cheers, > p. > > On Fri, Jul 23, 2021 at 5:47 PM Gustavo Seabra <gustavo.sea...@gmail.com> > wrote: > >> This works perfectly! >> >> I could understand most of what you did there ;-), but what does the >> ".HasQuery()" mean? The RDKit API is not very clear about it: "Returns >> whether or not the atom has an associated query". Is this described >> anywhere else? >> >> Thank you so much! >> -- >> Gustavo Seabra. >> >> >> On Fri, Jul 23, 2021 at 4:53 AM Paolo Tosco <paolo.tosco.m...@gmail.com> >> wrote: >> >>> Hi Gustavo, >>> >>> you should be able to address this with a custom AtomCompare (and >>> BondCompare, if you want to use bond queries too) class, that now is >>> also supported from Python. >>> You can take a look at Code/GraphMol/FMCS/Wrap/testFMCS.py for >>> inspiration how to use it; here's something that seems to work for your >>> example: >>> >>> from rdkit import Chem >>> from rdkit.Chem import rdFMCS >>> >>> template = >>> Chem.MolFromSmarts('[a]1(-[S](-*)(=[O])=[O]):[a]:[a]:[a]:[a]:[a]:1') >>> # This should give a sulfone connected to an aromatic ring and >>> # some other (any) element. Notice that the ring may have >>> # any atoms (N,C,O), but for me it is important to have the SO2 group. >>> >>> template >>> [image: image.png] >>> >>> mol1 = Chem.MolFromSmiles('CS(=O)(=O)c1ccc(C2=C(c3ccccc3)CCN2)cc1') >>> # This molecule has the pattern. >>> >>> mol1 >>> [image: image.png] >>> >>> compare = [template, mol1] >>> res = rdFMCS.FindMCS(compare, >>> atomCompare=rdFMCS.AtomCompare.CompareElements, >>> bondCompare=rdFMCS.BondCompare.CompareAny, >>> ringMatchesRingOnly=False, >>> completeRingsOnly=False) >>> res.smartsString >>> # gives: '[#16](=[#8])=[#8]' >>> >>> # Let's address the problem with a custom AtomCompare class: >>> >>> class CompareQueryAtoms(rdFMCS.MCSAtomCompare): >>> def __call__(self, p, mol1, atom1, mol2, atom2): >>> a1 = mol1.GetAtomWithIdx(atom1) >>> a2 = mol2.GetAtomWithIdx(atom2) >>> if ((not a1.HasQuery()) and (not a2.HasQuery()) and >>> a1.GetAtomicNum() != a2.GetAtomicNum()): >>> return False >>> if (p.MatchValences and a1.GetTotalValence() != >>> a2.GetTotalValence()): >>> return False >>> if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1, >>> atom1, mol2, atom2)): >>> return False >>> if (p.MatchFormalCharge and (not a1.HasQuery()) and (not >>> a2.HasQuery()) and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)): >>> return False >>> if p.RingMatchesRingOnly: >>> return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2) >>> if ((a1.HasQuery() or a2.HasQuery()) and (not a1.Match(a2))): >>> return False >>> return True >>> >>> params = rdFMCS.MCSParameters() >>> params.AtomCompareParameters.RingMatchesRingOnly = False >>> params.BondCompareParameters.RingMatchesRingOnly = False >>> params.AtomCompareParameters.CompleteRingsOnly = False >>> params.BondCompareParameters.CompleteRingsOnly = False >>> params.BondTyper = rdFMCS.BondCompare.CompareAny >>> params.AtomTyper = CompareQueryAtoms() >>> >>> compare = [template, mol1] >>> res = rdFMCS.FindMCS(compare, params) >>> res.smartsString >>> >>> '[#16](-[#0,#6])(=[#8])(=[#8])-[#0,#6]1:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#6]:1' >>> >>> >>> # the queryMol returned by MCS will match the template, but the original >>> template query >>> >>> # has many more details, so we extract the MCS part of the original >>> template and use that >>> >>> # as query instead >>> >>> def trim_template(template, query): >>> template_mcs_core = Chem.ReplaceSidechains(template, query) >>> for a in template_mcs_core.GetAtoms(): >>> if (not a.GetAtomicNum()) and a.GetIsotope(): >>> a.SetAtomicNum(1) >>> a.SetIsotope(0) >>> return Chem.RemoveAllHs(template_mcs_core) >>> >>> >>> query_mol = trim_template(template, res.queryMol) >>> template.GetSubstructMatch(query_mol) >>> >>> (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) >>> >>> >>> # here there seems to a be a bug with the 2D depiction, but that's another >>> story >>> >>> template >>> >>> [image: image.png] >>> >>> mol1.GetSubstructMatches(query_mol) >>> >>> ((4, 1, 0, 2, 3, 5, 6, 7, 19, 20),) >>> >>> >>> mol1 >>> >>> [image: image.png] >>> >>> >>> mol2 = Chem.MolFromSmiles('Cc1ccc(C2=CCNC2c2ccc(C(C)(F)F)nc2)nn1') >>> compare = [template, mol2] >>> >>> >>> mol2 >>> >>> [image: image.png] >>> >>> >>> res = rdFMCS.FindMCS(compare, params) >>> res.smartsString >>> >>> '[#0,#6]1:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#7]:[#0,#7]:1' >>> >>> >>> query_mol = trim_template(template, res.queryMol) >>> >>> query_mol >>> >>> [image: image.png] >>> >>> >>> mol2.GetSubstructMatches(query_mol) >>> >>> ((1, 2, 3, 4, 20, 21), (10, 11, 12, 13, 18, 19)) >>> >>> >>> mol2 >>> >>> [image: image.png] >>> >>> >>> I hope the above helps, cheers >>> >>> p. >>> >>> >>> On Thu, Jul 22, 2021 at 7:45 PM Gustavo Seabra <gustavo.sea...@gmail.com> >>> wrote: >>> >>>> Hi all,, >>>> >>>> I would appreciate some pointers on how it would be possible to find >>>> the maximum common substructure of 2 molecules, where in the template >>>> structure some atoms may be *any*, but some other atoms must be fixed. >>>> >>>> Currently, I'm trying to use rdFMCS module. For example: >>>> >>>> from rdkit import Chem >>>> from rdkit.Chem import rdFMCS >>>> >>>> template = >>>> Chem.MolFromSmarts('[a]1(-[S](-*)(=[O])=[O]):[a]:[a]:[a]:[a]:[a]:1') >>>> # This should give a sulfone connected to an aromatic ring and >>>> # some other (any) element. Notice that the ring may have >>>> # any atoms (N,C,O), but for me it is important to have the SO2 group. >>>> >>>> mol1 = Chem.MolFromSmiles('CS(=O)(=O)c1ccc(C2=C(c3ccccc3)CCN2)cc1') >>>> # This molecule has the pattern. >>>> >>>> # Now, if I try to find a substructure match, I use: >>>> compare = [template, mol1] >>>> res = rdFMCS.FindMCS(compare, >>>> atomCompare=rdFMCS.AtomCompare.CompareElements, >>>> bondCompare=rdFMCS.BondCompare.CompareAny, >>>> ringMatchesRingOnly=False, >>>> completeRingsOnly=False) >>>> res.smartsString >>>> # gives: '[#16](=[#8])=[#8]' >>>> >>>> # Notice that the only match is the SO2, it does not match the ring. >>>> However, if I try that with another structure that has a CF3 in place of >>>> the SO2, I get: >>>> mol2 = Chem.MolFromSmiles('Cc1ccc(C2=CCNC2c2ccc(C(C)(F)F)nc2)nn1') >>>> compare = [template,mol2] >>>> res = rdFMCS.FindMCS(compare, >>>> atomCompare=rdFMCS.AtomCompare.CompareElements, >>>> bondCompare=rdFMCS.BondCompare.CompareAny, >>>> ringMatchesRingOnly=False, >>>> completeRingsOnly=False) >>>> res.smartsString >>>> # Returns: '' (empty string) >>>> >>>> # if I change to AtomCompare.CompareAny, now a CF3 will also match >>>> # in the SO2-X: >>>> mol2 = Chem.MolFromSmiles('Cc1ccc(C2=CCNC2c2ccc(C(C)(F)F)nc2)nn1') >>>> compare = [template,mol2] >>>> res = rdFMCS.FindMCS(compare, >>>> atomCompare=rdFMCS.AtomCompare.CompareAny, >>>> bondCompare=rdFMCS.BondCompare.CompareAny, >>>> ringMatchesRingOnly=False, >>>> completeRingsOnly=False) >>>> res.smartsString >>>> # Returns: >>>> '[#16,#6](-[#0,#6])(=,-[#8,#9])(=,-[#8,#9])-[#0,#6]1:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#7]:1' >>>> >>>> But now theCF3 is counted in place of the SO2. The result I'd like to >>>> get here would be just the ring, as in the case: >>>> new_template = Chem.MolFromSmarts('CS(=O)(=O)c1cnccc1') >>>> mol2 = Chem.MolFromSmiles('Cc1ccc(C2=CCNC2c2ccc(C(C)(F)F)nc2)nn1') >>>> compare = [new_template,mol2] >>>> res = rdFMCS.FindMCS(compare, >>>> atomCompare=rdFMCS.AtomCompare.CompareElements, >>>> bondCompare=rdFMCS.BondCompare.CompareAny, >>>> ringMatchesRingOnly=False, >>>> completeRingsOnly=False) >>>> res.smartsString >>>> # Returns: '[#6]1:[#6]:[#7]:[#6]:[#6]:[#6]:1' (just the ring) >>>> >>>> Notice that if I use CompareElements, there seems to be no way to match >>>> the ring with either N or C. >>>> >>>> Does anyone have a suggestion on how I can specify flexibility (similar >>>> to AtomCompare.CompareAny) only for a portion of the molecule and still >>>> enforce specific atoms in another portion? >>>> >>>> Thank you so much! >>>> -- >>>> Gustavo Seabra. >>>> _______________________________________________ >>>> Rdkit-discuss mailing list >>>> Rdkit-discuss@lists.sourceforge.net >>>> https://lists.sourceforge.net/lists/listinfo/rdkit-discuss >>>> >>>
_______________________________________________ Rdkit-discuss mailing list Rdkit-discuss@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/rdkit-discuss