It is now working, but not complete. I just ignore any parameters that
syntax give to me, and all variables are 255 width strings.
Also, non asci chars are shown as '?'. But, it is working quite well.

I'm sending the two new files xls-reader.[ch] and a diff of what I've
changed to make it work. I would like very much some advices on code styling
and related
issues.

Michel
/* PSPP - a program for statistical analysis.
   Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>. */


#include <config.h>

#include "libpspp/message.h"
#include "libpspp/misc.h"

#include "gl/minmax.h"

#include "gettext.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) (msgid)


#if XLS_SUPPORT

struct casereader *
xls_open_reader (struct xls_read_info *xls, struct dictionary **dict)
{
  msg (ME, _("Support for XLS files was not compiled into this installation of PSPP"));

  return NULL;
}

#else

#include "data/xls-reader.h"

#include <assert.h>
#include <stdbool.h>
#include <errno.h>
#include <libxls/xls.h>
#include <zlib.h>

#include "data/case.h"
#include "data/casereader-provider.h"
#include "data/dictionary.h"
#include "data/identifier.h"
#include "data/value.h"
#include "data/variable.h"
#include "libpspp/i18n.h"
#include "libpspp/str.h"

#include "gl/xalloc.h"

static void xls_file_casereader_destroy (struct casereader *, void *);

static struct ccase *xls_file_casereader_read (struct casereader *, void *);

void convert_xls_to_value(struct ccase *, const struct variable *, xlsCell *);

static const struct casereader_class xls_file_casereader_class =
  {
    xls_file_casereader_read,
    xls_file_casereader_destroy,
    NULL,
    NULL,
  };

struct xls_reader
{
  int n_cases;
  int first_case;
  int last_case;
  int next_case;
  int n_var_specs;
  struct caseproto *proto;
  struct dictionary *dict;
  xlsWorkBook* pWB;
  xlsWorkSheet* pWS;
};


static void
xls_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
{
  struct xls_reader *r = r_;

  if (r == NULL)
    return;

  caseproto_unref(r->proto);
  free(r);
}


struct casereader *
xls_open_reader (struct xls_read_info *xls, struct dictionary **dict)
{
  struct xls_reader *r = NULL;
  char charset[] = "UTF-8";
  int i;

  r = xzalloc (sizeof *r);
  r->pWB = xls_open(xls->file_name, charset);

  if (NULL == r->pWB)
    {
      msg (ME, _("Error opening `%s' for reading as a XLS file: %s."),
           xls->file_name, strerror (errno));
      return NULL;
    }

  r->pWS = xls_getWorkSheet(r->pWB, 0);
  xls_parseWorkSheet(r->pWS);

  r->n_cases = r->pWS->rows.lastrow;
  r->n_var_specs = r->pWS->rows.lastcol;

  r->first_case = 0;
  r->last_case = r->n_cases;
  r->next_case = 0;

  /* Create the dictionary and populate it */
  *dict = r->dict = dict_create ();

  for (i = 0 ; i < r->n_var_specs ; ++i )
    {
      unsigned long int vstart = 0;
      char *name;
      name = dict_make_unique_var_name (r->dict, NULL, &vstart);
      dict_create_var (r->dict, name, 255);
      free (name);
    }

  r->proto = caseproto_ref (dict_get_proto (r->dict));

  return casereader_create_sequential
    (NULL,
     r->proto,
     r->n_cases,
     &xls_file_casereader_class, r);
}


/* Reads and returns one case from READER's file.  Returns a null
   pointer on failure. */
static struct ccase *
xls_file_casereader_read (struct casereader *reader UNUSED, void *r_)
{
  struct ccase *c;
  int i;

  struct xls_reader *r = r_;

  c = case_create (r->proto);
  case_set_missing (c);
  
  if(r->next_case > r->last_case)
    return NULL;
  for ( i = 0 ; i < r->n_var_specs ; ++i )
    {
    
      const struct variable *var = dict_get_var (r->dict, i);
      
      xlsCell *cell = xls_cell(r->pWS, r->next_case, i);
      convert_xls_to_value (c, var, cell);
    }
  r->next_case++;
  return c;
}

void
convert_xls_to_value(struct ccase *c, const struct variable *var, xlsCell *cell)
{
  union value *v = case_data_rw (c, var);
  int width = var_get_width(var);
  char *tmp = malloc(width);

  if (cell->id == 0x27e || cell->id == 0x0BD || cell->id == 0x203)
    sprintf(tmp,"%.15g", cell->d);

  /* Formula */
  else if (cell->id == 0x06)
  {
    /* Numeric Formula */
    if (cell->l == 0)
       sprintf(tmp,"%.15g", cell->d);
    else
    {
      /* Boolean Formula */
      if (strcmp(cell->str,"bool"))
        {
          if(cell->d)
            sprintf(tmp,"true");
          else
            sprintf(tmp,"false");
        }
      /* Error Formula */
      else if (strcmp(cell->str,"error"))
        sprintf(tmp,"*error*");
      /* Probably a String Formula */
      else
        sprintf(tmp,"%s",cell->str);
    }
  }
  /* String? */
  else if (cell->str != NULL)
    tmp = cell->str;
  /*Empty Cell*/
  else  
    sprintf(tmp, "%s", "");
  value_copy_str_rpad(v, width, (const uint8_t *) tmp, ' ');
}

#endif /* XLS_SUPPORT */
/* PSPP - a program for statistical analysis.
   Copyright (C) 2007, 2010 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>. */

#ifndef XLS_READ_H
#define XLS_READ_H 1

#include <stdbool.h>

struct casereader;


struct xls_read_info
{
  char *sheet_name ;            /* In UTF-8. */
  char *file_name ;             /* In filename encoding. */
  char *cell_range ;            /* In UTF-8. */
  int sheet_index ;
  bool read_names ;
  int asw ;
};

struct dictionary;

struct casereader * xls_open_reader (struct xls_read_info *, struct dictionary **);


#endif
diff --git a/src/data/automake.mk b/src/data/automake.mk
index f7ee43f..92e2f55 100644
--- a/src/data/automake.mk
+++ b/src/data/automake.mk
@@ -67,6 +67,8 @@ src_data_libdata_la_SOURCES = \
 	src/data/format.def \
 	src/data/gnumeric-reader.c \
 	src/data/gnumeric-reader.h \
+	src/data/xls-reader.c \
+	src/data/xls-reader.h \
 	src/data/identifier.c \
 	src/data/identifier2.c \
 	src/data/identifier.h \
diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c
index dd55752..6e6fe9b 100644
--- a/src/language/data-io/get-data.c
+++ b/src/language/data-io/get-data.c
@@ -22,6 +22,7 @@
 #include "data/dictionary.h"
 #include "data/format.h"
 #include "data/gnumeric-reader.h"
+#include "data/xls-reader.h"
 #include "data/psql-reader.h"
 #include "data/settings.h"
 #include "language/command.h"
@@ -43,6 +44,7 @@
 static int parse_get_gnm (struct lexer *lexer, struct dataset *);
 static int parse_get_txt (struct lexer *lexer, struct dataset *);
 static int parse_get_psql (struct lexer *lexer, struct dataset *);
+static int parse_get_xls (struct lexer *lexer, struct dataset *);
 
 int
 cmd_get_data (struct lexer *lexer, struct dataset *ds)
@@ -60,6 +62,8 @@ cmd_get_data (struct lexer *lexer, struct dataset *ds)
     return parse_get_txt (lexer, ds);
   else if (lex_match_id (lexer, "PSQL"))
     return parse_get_psql (lexer, ds);
+  else if (lex_match_id (lexer, "XLS"))
+    return parse_get_xls (lexer, ds);
 
   msg (SE, _("Unsupported TYPE %s."), lex_tokcstr (lexer));
   return CMD_FAILURE;
@@ -569,3 +573,117 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds)
   free (name);
   return CMD_CASCADING_FAILURE;
 }
+
+static int
+parse_get_xls (struct lexer *lexer, struct dataset *ds)
+{
+  struct xls_read_info xls  = {NULL, NULL, NULL, 1, true, -1};
+
+  lex_force_match (lexer, T_SLASH);
+
+  if (!lex_force_match_id (lexer, "FILE"))
+    goto error;
+
+  lex_force_match (lexer, T_EQUALS);
+
+  if (!lex_force_string (lexer))
+    goto error;
+
+  xls.file_name = utf8_to_filename (lex_tokcstr (lexer));
+
+  lex_get (lexer);
+
+  while (lex_match (lexer, T_SLASH) )
+    {
+      if ( lex_match_id (lexer, "ASSUMEDSTRWIDTH"))
+	{
+	  lex_match (lexer, T_EQUALS);
+	  xls.asw = lex_integer (lexer);
+	  lex_get (lexer);
+	}
+      else if (lex_match_id (lexer, "SHEET"))
+	{
+	  lex_match (lexer, T_EQUALS);
+	  if (lex_match_id (lexer, "NAME"))
+	    {
+	      if ( ! lex_force_string (lexer) )
+		goto error;
+
+	      xls.sheet_name = ss_xstrdup (lex_tokss (lexer));
+	      xls.sheet_index = -1; //TODO why?
+
+	      lex_get (lexer);
+	    }
+	  else if (lex_match_id (lexer, "INDEX"))
+	    {
+	      xls.sheet_index = lex_integer (lexer);
+	      lex_get (lexer);
+	    }
+	  else
+	    goto error;
+	}
+      else if (lex_match_id (lexer, "CELLRANGE"))
+	{
+	  lex_match (lexer, T_EQUALS);
+
+	  if (lex_match_id (lexer, "FULL"))
+	    {
+	      xls.cell_range = NULL;
+	    }
+	  else if (lex_match_id (lexer, "RANGE"))
+	    {
+	      if ( ! lex_force_string (lexer) )
+		goto error;
+
+	      xls.cell_range = ss_xstrdup (lex_tokss (lexer));
+	      lex_get (lexer);
+	    }
+	  else
+	    goto error;
+	}
+      else if (lex_match_id (lexer, "READNAMES"))
+	{
+	  lex_match (lexer, T_EQUALS);
+
+	  if ( lex_match_id (lexer, "ON"))
+	    {
+	      xls.read_names = true;
+	    }
+	  else if (lex_match_id (lexer, "OFF"))
+	    {
+	      xls.read_names = false;
+	    }
+	  else
+	    goto error;
+	}
+      else
+	{
+	  lex_error (lexer, NULL);
+	  goto error;
+	}
+    }
+
+  {
+    struct dictionary *dict = NULL;
+    struct casereader *reader = xls_open_reader (&xls, &dict);
+
+    if ( reader )
+      {
+        dataset_set_dict (ds, dict);
+        dataset_set_source (ds, reader);
+      }
+  }
+
+  free (xls.file_name);
+  free (xls.sheet_name);
+  free (xls.cell_range);
+  return CMD_SUCCESS;
+
+ error:
+
+  free (xls.file_name);
+  free (xls.sheet_name);
+  free (xls.cell_range);
+  return CMD_FAILURE;
+}
+
_______________________________________________
pspp-dev mailing list
pspp-dev@gnu.org
http://lists.gnu.org/mailman/listinfo/pspp-dev

Reply via email to