Dear all,
I was willing to export a list of molecules using the molecule image +
some properties, but ran out a strange error.
I have thus came back to the documentation
(http://rdkit.org/docs/source/rdkit.Chem.PandasTools.html), but still
encounter the same error.
Rdkit version : 2022.09.1
The exact error is:
File~/projets/docknmine/venv/lib/python3.9/site-packages/rdkit/Chem/PandasTools.py:506,
inSaveXlsxFromFrame(frame, outFile, molCol, size)
489 """
490 Saves pandas DataFrame as a xlsx file with embedded images.
491 It maps numpy data types to excel cell types:
(...)
501 This feature is only available at runtime from within Excel.
502 """
504 import xlsxwriter # don't want to make this a RDKit dependency
--> 506 cols= list(frame.columns)
507 cols.remove(molCol)
508 dataTypes= dict(frame.dtypes)
AttributeError: 'NoneType' object has no attribute 'columns'
Any idea if I'm doing something wrong?
Second, I also get a warning about the "append" deprecation in pandas. One should now use
"concat",
so the documentation should be updated as is:
For instance I added "Daunostin" to the antibiotics list :
daunoblastin =
pd.DataFrame([{'Smiles':'CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C(C4=O)C(=CC=C5)OC)O)(C(=O)C)O)N)O.Cl',
'Name':'Daunoblastin'}]) # PubChem 3085106
antibiotics = pd.concat([antibiotics,daunoblastin], ignore_index=True)
-> No warning in this case.
Any idea why the export to Excel fails?
Thanks a lor in advance,
Stéphane
--
Assistant Professor, USBB, UMR 6286 CNRS, Bioinformatique Structurale
UFR Sciences et Techniques, 2, rue de la Houssinière, Bât. 25, 44322 Nantes
cedex 03, France
Tél : +33 251 125 636 / Fax : +33 251 125 632
http://www.ufip.univ-nantes.fr/ -http://www.steletch.org
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "83a450ec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2022.09.1\n"
]
}
],
"source": [
"from rdkit.Chem import PandasTools\n",
"import pandas as pd\n",
"import os\n",
"from rdkit import RDConfig\n",
"import rdkit\n",
"print(rdkit.__version__)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a0a1a711",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Name', 'Smiles']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/j2/f6jpp0b97dl48k1nlwsstc2r0000gn/T/ipykernel_5514/2372667806.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" antibiotics = antibiotics.append({'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C', 'Name':'Penicilline G'}, ignore_index=True)#Penicilline G\n",
"/var/folders/j2/f6jpp0b97dl48k1nlwsstc2r0000gn/T/ipykernel_5514/2372667806.py:3: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" antibiotics = antibiotics.append({'Smiles':'CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O', 'Name':'Tetracycline'}, ignore_index=True)#Tetracycline\n",
"/var/folders/j2/f6jpp0b97dl48k1nlwsstc2r0000gn/T/ipykernel_5514/2372667806.py:4: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" antibiotics = antibiotics.append({'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C','Name':'Ampicilline'}, ignore_index=True)#Ampicilline\n"
]
}
],
"source": [
"antibiotics = pd.DataFrame(columns=['Name','Smiles'])\n",
"antibiotics = antibiotics.append({'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C', 'Name':'Penicilline G'}, ignore_index=True)#Penicilline G\n",
"antibiotics = antibiotics.append({'Smiles':'CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O', 'Name':'Tetracycline'}, ignore_index=True)#Tetracycline\n",
"antibiotics = antibiotics.append({'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C','Name':'Ampicilline'}, ignore_index=True)#Ampicilline\n",
"\n",
"print([str(x) for x in antibiotics.columns])\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f42993ea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Name Smiles\n",
"0 Penicilline G CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C\n",
"1 Tetracycline CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4...\n",
"2 Ampicilline CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O...\n"
]
}
],
"source": [
"print(antibiotics)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8c346efc",
"metadata": {},
"outputs": [],
"source": [
"daunoblastin = pd.DataFrame([{'Smiles':'CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C(C4=O)C(=CC=C5)OC)O)(C(=O)C)O)N)O.Cl', 'Name':'Daunoblastin'}]) # PubChem 3085106\n",
"antibiotics = pd.concat([antibiotics,daunoblastin], ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "86540e11",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Name Smiles\n",
"0 Penicilline G CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C\n",
"1 Tetracycline CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4...\n",
"2 Ampicilline CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O...\n",
"3 Daunoblastin CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C...\n"
]
}
],
"source": [
"print(antibiotics)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b7077d1a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Smiles</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Penicilline G</td>\n",
" <td>CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Tetracycline</td>\n",
" <td>CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Ampicilline</td>\n",
" <td>CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Daunoblastin</td>\n",
" <td>CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Name Smiles\n",
"0 Penicilline G CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C\n",
"1 Tetracycline CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4...\n",
"2 Ampicilline CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O...\n",
"3 Daunoblastin CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C..."
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"antibiotics"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "3106a182",
"metadata": {},
"outputs": [],
"source": [
"frame = PandasTools.AddMoleculeColumnToFrame(antibiotics,'Smiles','Molecule',includeFingerprints=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8e61e89a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Name', 'Smiles', 'Molecule']\n"
]
}
],
"source": [
"print([str(x) for x in antibiotics.columns])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "046722a8",
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'NoneType' object has no attribute 'columns'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn [9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mPandasTools\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSaveXlsxFromFrame\u001b[49m\u001b[43m(\u001b[49m\u001b[43mframe\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43messai.xslx\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmolCol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mROmol\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m300\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m300\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/projets/docknmine/venv/lib/python3.9/site-packages/rdkit/Chem/PandasTools.py:506\u001b[0m, in \u001b[0;36mSaveXlsxFromFrame\u001b[0;34m(frame, outFile, molCol, size)\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 490\u001b[0m \u001b[38;5;124;03m Saves pandas DataFrame as a xlsx file with embedded images.\u001b[39;00m\n\u001b[1;32m 491\u001b[0m \u001b[38;5;124;03m It maps numpy data types to excel cell types:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;124;03m This feature is only available at runtime from within Excel.\u001b[39;00m\n\u001b[1;32m 502\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 504\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mxlsxwriter\u001b[39;00m \u001b[38;5;66;03m# don't want to make this a RDKit dependency\u001b[39;00m\n\u001b[0;32m--> 506\u001b[0m cols \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[43mframe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m)\n\u001b[1;32m 507\u001b[0m cols\u001b[38;5;241m.\u001b[39mremove(molCol)\n\u001b[1;32m 508\u001b[0m dataTypes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(frame\u001b[38;5;241m.\u001b[39mdtypes)\n",
"\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'columns'"
]
}
],
"source": [
"PandasTools.SaveXlsxFromFrame(frame, \"essai.xslx\", molCol=\"ROmol\", size=(300, 300))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss