On Sun, Jun 9, 2013 at 7:33 PM, Gonzalo Colmenarejo-Sanchez <
[email protected]> wrote:
> I see. Are these what you call “layered” fingerprints? How do they
> differ from the Daylight-like fingerprints?
>
No, the pattern fingerprints use a different approach that I haven't yet
done a reasonably description of. That's on my ToDo list.
> Looking forward for the C++ sample code.
>
It's attached. The layout of the files isn't really great since I struggled
with the file i/o stuff, but this should at least demonstrate the idea.
Hopefully you're better at C++ file i/o than I am and can make something
more useful out of this.
-greg
// $Id$
//
// Copyright (C) 2008-2011 Greg Landrum
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
/* Can be built with:
g++ -o fingerprint_screen.exe fingerprint_screen.cpp -I$RDBASE/Code -I$RDBASE/Extern \
-L$RDBASE/lib -lFileParsers -lSmilesParse -lFingerprints \
-lSubstructMatch -lGraphMol -lDataStructs -lRDGeometryLib -lRDGeneral
*/
#include <RDGeneral/Invariant.h>
#include <DataStructs/BitVects.h>
#include <DataStructs/BitOps.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/MolPickler.h>
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <GraphMol/Depictor/RDDepictor.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/Fingerprints/Fingerprints.h>
#include <RDGeneral/RDLog.h>
#include <vector>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <RDGeneral/StreamOps.h>
using namespace RDKit;
typedef boost::shared_ptr<ExplicitBitVect> EBV_SPTR;
void ReadMols(std::vector<ROMOL_SPTR> &mols,
std::vector<ROMOL_SPTR> &queries){
// --------------------------------------------
// Read molecules
// --------------------------------------------
std::string rdbase = getenv("RDBASE");
std::string sdname = rdbase + "/Regress/Data/mols.1000.sdf";
std::string qname = rdbase + "/Regress/Data/queries.txt";
SDMolSupplier msuppl(sdname);
SmilesMolSupplier qsuppl(qname," ",0,-1,false);
BOOST_LOG(rdInfoLog)<<"loading mols: "<<std::endl;
while(!msuppl.atEnd()){
ROMol *m=msuppl.next();
if(!m) continue;
ROMOL_SPTR mp(m);
mols.push_back(mp);
}
BOOST_LOG(rdInfoLog)<<"loading queries: "<<std::endl;
while(!qsuppl.atEnd()){
ROMol *m=qsuppl.next();
if(!m) continue;
ROMOL_SPTR mp(m);
queries.push_back(mp);
}
}
void BuildFps(const std::vector<ROMOL_SPTR> &mols,
std::vector<EBV_SPTR > &mol_fps){
// --------------------------------------------
// Construct fingerprints
// --------------------------------------------
BOOST_FOREACH(ROMOL_SPTR mp,mols){
ExplicitBitVect *fp=PatternFingerprintMol(*mp);
EBV_SPTR fpp(fp);
mol_fps.push_back(fpp);
}
}
void FPScreen(const std::vector<ROMOL_SPTR> &mols,
const std::vector<ROMOL_SPTR> &queries,
const std::vector<EBV_SPTR > &mol_fps,
const std::vector<EBV_SPTR > &query_fps)
{
// --------------------------------------------
// substructure searches
// --------------------------------------------
unsigned int nMatches=0;
for(unsigned int i=0;i<mols.size();++i){
ROMOL_SPTR mp=mols[i];
EBV_SPTR mfp=mol_fps[i];
for(unsigned int j=0;j<queries.size();++j){
// fingerprint screen:
EBV_SPTR qfp=query_fps[j];
if(!AllProbeBitsMatch(*qfp,*mfp)) continue;
// molecule substructure search:
MatchVectType mv;
ROMOL_SPTR qp=queries[j];
if(SubstructMatch(*mp,*qp,mv)) ++nMatches;
}
}
BOOST_LOG(rdInfoLog)<<" num matches: "<<nMatches<<std::endl;
}
void WriteData(const std::vector<ROMOL_SPTR> &mols,
const std::vector<EBV_SPTR > &mol_fps,
std::string filen){
std::ofstream molStream((filen+"mols.bin").c_str(),std::ios_base::binary|std::ios_base::out);
unsigned int sz=mols.size();
streamWrite(molStream,sz);
for(unsigned int i=0;i<mols.size();++i){
MolPickler::pickleMol(*(mols[i]),molStream);
}
std::ofstream fpStream((filen+"fps.bin").c_str());
for(unsigned int i=0;i<mols.size();++i){
fpStream<<BitVectToFPSText(*mol_fps[i]);
fpStream<<"\n";
}
}
void ReadData(std::vector<ROMOL_SPTR> &mols,
std::vector<EBV_SPTR > &mol_fps,
std::string filen){
mols.clear();
mol_fps.clear();
std::ifstream molStream((filen+"mols.bin").c_str(),std::ios_base::binary|std::ios_base::in);
unsigned int nMols;
streamRead(molStream,nMols);
for(unsigned int i=0;i<nMols;++i){
ROMol *nMol=new ROMol();
MolPickler::molFromPickle(molStream,nMol);
ROMOL_SPTR mp(nMol);
mols.push_back(mp);
}
std::ifstream fpStream((filen+"fps.bin").c_str());
for(unsigned int i=0;i<nMols;++i){
std::string pkl;
std::getline(fpStream,pkl);
ExplicitBitVect *bv=new ExplicitBitVect(2048);
UpdateBitVectFromFPSText(*bv,pkl);
EBV_SPTR bvp(bv);
mol_fps.push_back(bvp);
}
}
int
main(int argc, char *argv[])
{
RDLog::InitLogs();
std::vector<ROMOL_SPTR> mols;
std::vector<ROMOL_SPTR> queries;
std::vector<EBV_SPTR > mol_fps;
std::vector<EBV_SPTR > query_fps;
ReadMols(mols,queries);
BOOST_LOG(rdInfoLog)<<"fingerprinting mols: "<<std::endl;
BuildFps(mols,mol_fps);
BOOST_LOG(rdInfoLog)<<"fingerprinting queries: "<<std::endl;
BuildFps(queries,query_fps);
BOOST_LOG(rdInfoLog)<<"writing mols and fps: "<<std::endl;
WriteData(mols,mol_fps,"mol_store");
BOOST_LOG(rdInfoLog)<<"writing queries and fps: "<<std::endl;
WriteData(queries,query_fps,"query_store");
BOOST_LOG(rdInfoLog)<<"Reading mols and fps: "<<std::endl;
ReadData(mols,mol_fps,"mol_store");
BOOST_LOG(rdInfoLog)<<"Reading queries and fps: "<<std::endl;
ReadData(queries,query_fps,"query_store");
FPScreen(mols,queries,mol_fps,query_fps);
}
------------------------------------------------------------------------------
How ServiceNow helps IT people transform IT departments:
1. A cloud service to automate IT design, transition and operations
2. Dashboards that offer high-level views of enterprise services
3. A single system of record for all IT processes
http://p.sf.net/sfu/servicenow-d2d-j
_______________________________________________
Rdkit-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss