[Rdkit-discuss] Issue with PandaTools Export to Excel

Stéphane Téletchéa Wed, 26 Oct 2022 15:56:20 -0700

Dear all,

I was willing to export a list of molecules using the molecule image +some properties, but ran out a strange error.

I have thus came back to the documentation(http://rdkit.org/docs/source/rdkit.Chem.PandasTools.html), but stillencounter the same error.


Rdkit version : 2022.09.1

The exact error is:

File~/projets/docknmine/venv/lib/python3.9/site-packages/rdkit/Chem/PandasTools.py:506,
 inSaveXlsxFromFrame(frame, outFile, molCol, size)
489  """
490  Saves pandas DataFrame as a xlsx file with embedded images.
491  It maps numpy data types to excel cell types:
(...)
501  This feature is only available at runtime from within Excel.
502  """
504  import  xlsxwriter   # don't want to make this a RDKit dependency
--> 506  cols=  list(frame.columns)
507  cols.remove(molCol)
508  dataTypes=  dict(frame.dtypes)

AttributeError: 'NoneType' object has no attribute 'columns'


Any idea if I'm doing something wrong?

Second, I also get a warning about the "append" deprecation in pandas. One should now use 
"concat",
so the documentation should be updated as is:

For instance I added "Daunostin" to the antibiotics list :

daunoblastin = 
pd.DataFrame([{'Smiles':'CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C(C4=O)C(=CC=C5)OC)O)(C(=O)C)O)N)O.Cl',
 'Name':'Daunoblastin'}]) # PubChem 3085106
antibiotics = pd.concat([antibiotics,daunoblastin], ignore_index=True)

-> No warning in this case.

Any idea why the export to Excel fails?

Thanks a lor in advance,

Stéphane

--
Assistant Professor, USBB, UMR 6286 CNRS, Bioinformatique Structurale
UFR Sciences et Techniques, 2, rue de la Houssinière, Bât. 25, 44322 Nantes 
cedex 03, France
Tél : +33 251 125 636 / Fax : +33 251 125 632
http://www.ufip.univ-nantes.fr/  -http://www.steletch.org

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "83a450ec",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2022.09.1\n"
     ]
    }
   ],
   "source": [
    "from rdkit.Chem import PandasTools\n",
    "import pandas as pd\n",
    "import os\n",
    "from rdkit import RDConfig\n",
    "import rdkit\n",
    "print(rdkit.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a0a1a711",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Name', 'Smiles']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/j2/f6jpp0b97dl48k1nlwsstc2r0000gn/T/ipykernel_5514/2372667806.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
      "  antibiotics = antibiotics.append({'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C', 'Name':'Penicilline G'}, ignore_index=True)#Penicilline G\n",
      "/var/folders/j2/f6jpp0b97dl48k1nlwsstc2r0000gn/T/ipykernel_5514/2372667806.py:3: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
      "  antibiotics = antibiotics.append({'Smiles':'CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O', 'Name':'Tetracycline'}, ignore_index=True)#Tetracycline\n",
      "/var/folders/j2/f6jpp0b97dl48k1nlwsstc2r0000gn/T/ipykernel_5514/2372667806.py:4: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
      "  antibiotics = antibiotics.append({'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C','Name':'Ampicilline'}, ignore_index=True)#Ampicilline\n"
     ]
    }
   ],
   "source": [
    "antibiotics = pd.DataFrame(columns=['Name','Smiles'])\n",
    "antibiotics = antibiotics.append({'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C', 'Name':'Penicilline G'}, ignore_index=True)#Penicilline G\n",
    "antibiotics = antibiotics.append({'Smiles':'CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O', 'Name':'Tetracycline'}, ignore_index=True)#Tetracycline\n",
    "antibiotics = antibiotics.append({'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C','Name':'Ampicilline'}, ignore_index=True)#Ampicilline\n",
    "\n",
    "print([str(x) for x in  antibiotics.columns])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f42993ea",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            Name                                             Smiles\n",
      "0  Penicilline G    CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C\n",
      "1   Tetracycline  CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4...\n",
      "2    Ampicilline  CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O...\n"
     ]
    }
   ],
   "source": [
    "print(antibiotics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8c346efc",
   "metadata": {},
   "outputs": [],
   "source": [
    "daunoblastin = pd.DataFrame([{'Smiles':'CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C(C4=O)C(=CC=C5)OC)O)(C(=O)C)O)N)O.Cl', 'Name':'Daunoblastin'}]) # PubChem 3085106\n",
    "antibiotics = pd.concat([antibiotics,daunoblastin], ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "86540e11",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            Name                                             Smiles\n",
      "0  Penicilline G    CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C\n",
      "1   Tetracycline  CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4...\n",
      "2    Ampicilline  CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O...\n",
      "3   Daunoblastin  CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C...\n"
     ]
    }
   ],
   "source": [
    "print(antibiotics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b7077d1a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Smiles</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Penicilline G</td>\n",
       "      <td>CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Tetracycline</td>\n",
       "      <td>CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Ampicilline</td>\n",
       "      <td>CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Daunoblastin</td>\n",
       "      <td>CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Name                                             Smiles\n",
       "0  Penicilline G    CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C\n",
       "1   Tetracycline  CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4...\n",
       "2    Ampicilline  CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O...\n",
       "3   Daunoblastin  CC1C(C(CC(O1)OC2CC(CC3=C2C(=C4C(=C3O)C(=O)C5=C..."
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "antibiotics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3106a182",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = PandasTools.AddMoleculeColumnToFrame(antibiotics,'Smiles','Molecule',includeFingerprints=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8e61e89a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Name', 'Smiles', 'Molecule']\n"
     ]
    }
   ],
   "source": [
    "print([str(x) for x in  antibiotics.columns])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "046722a8",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'NoneType' object has no attribute 'columns'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn [9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mPandasTools\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSaveXlsxFromFrame\u001b[49m\u001b[43m(\u001b[49m\u001b[43mframe\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43messai.xslx\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmolCol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mROmol\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m300\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m300\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/projets/docknmine/venv/lib/python3.9/site-packages/rdkit/Chem/PandasTools.py:506\u001b[0m, in \u001b[0;36mSaveXlsxFromFrame\u001b[0;34m(frame, outFile, molCol, size)\u001b[0m\n\u001b[1;32m    489\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    490\u001b[0m \u001b[38;5;124;03m    Saves pandas DataFrame as a xlsx file with embedded images.\u001b[39;00m\n\u001b[1;32m    491\u001b[0m \u001b[38;5;124;03m    It maps numpy data types to excel cell types:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    501\u001b[0m \u001b[38;5;124;03m    This feature is only available at runtime from within Excel.\u001b[39;00m\n\u001b[1;32m    502\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m    504\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mxlsxwriter\u001b[39;00m  \u001b[38;5;66;03m# don't want to make this a RDKit dependency\u001b[39;00m\n\u001b[0;32m--> 506\u001b[0m cols \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[43mframe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m)\n\u001b[1;32m    507\u001b[0m cols\u001b[38;5;241m.\u001b[39mremove(molCol)\n\u001b[1;32m    508\u001b[0m dataTypes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(frame\u001b[38;5;241m.\u001b[39mdtypes)\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'columns'"
     ]
    }
   ],
   "source": [
    "PandasTools.SaveXlsxFromFrame(frame, \"essai.xslx\", molCol=\"ROmol\", size=(300, 300))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}

_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

[Rdkit-discuss] Issue with PandaTools Export to Excel

Reply via email to