Hello,

I figure out that libxls has a function which does all the guessing
job. I've updated my code to use it. Looking at it source,
it seems like it takes care about utf8 issues, but I wasn't able to
make it work. Maybe a better programmer could look at
it.

I'm sending the last version of my code
/* PSPP - a program for statistical analysis.
   Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>. */


#include <config.h>

#include "libpspp/message.h"
#include "libpspp/misc.h"

#include "gl/minmax.h"

#include "gettext.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) (msgid)


#if !XLS_SUPPORT

struct casereader *
xls_open_reader (struct xls_read_info *xls, struct dictionary **dict)
{
  msg (ME, _("Support for XLS files was not compiled into this installation of PSPP"));

  return NULL;
}

#else

#include "data/xls-reader.h"

#include <assert.h>
#include <stdbool.h>
#include <errno.h>
#include <libxls/xls.h>
#include <zlib.h>

#include "data/case.h"
#include "data/casereader-provider.h"
#include "data/dictionary.h"
#include "data/identifier.h"
#include "data/value.h"
#include "data/variable.h"
#include "libpspp/i18n.h"
#include "libpspp/str.h"

#include "gl/xalloc.h"

static void xls_file_casereader_destroy (struct casereader *, void *);

static struct ccase *xls_file_casereader_read (struct casereader *, void *);

static const struct casereader_class xls_file_casereader_class =
  {
    xls_file_casereader_read,
    xls_file_casereader_destroy,
    NULL,
    NULL,
  };

struct xls_reader
{
  int n_cases;
  int first_case;
  int last_case;
  int next_case;
  int n_var_specs;
  struct caseproto *proto;
  struct dictionary *dict;
  xlsWorkBook* pWB;
  xlsWorkSheet* pWS;
};


static void
xls_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
{
  struct xls_reader *r = r_;

  if (r == NULL)
    return;

  caseproto_unref(r->proto);
  free(r);
}


struct casereader *
xls_open_reader (struct xls_read_info *xls, struct dictionary **dict)
{
  struct xls_reader *r = NULL;
  char charset[] = "UTF-8";
  int i;

  r = xzalloc (sizeof *r);
  r->pWB = xls_open(xls->file_name, charset);

  if (NULL == r->pWB)
    {
      msg (ME, _("Error opening `%s' for reading as a XLS file: %s."),
           xls->file_name, strerror (errno));
      return NULL;
    }

  r->pWS = xls_getWorkSheet(r->pWB, 0);
  xls_parseWorkSheet(r->pWS);

  r->n_cases = r->pWS->rows.lastrow;
  r->n_var_specs = r->pWS->rows.lastcol;

  r->first_case = 0;
  r->last_case = r->n_cases;
  r->next_case = 0;

  /* Create the dictionary and populate it */
  *dict = r->dict = dict_create ();

  for (i = 0 ; i < r->n_var_specs ; ++i )
    {
      unsigned long int vstart = 0;
      char *name;
      name = dict_make_unique_var_name (r->dict, NULL, &vstart);
      dict_create_var (r->dict, name, 255);
      free (name);
    }

  r->proto = caseproto_ref (dict_get_proto (r->dict));

  return casereader_create_sequential
    (NULL,
     r->proto,
     r->n_cases,
     &xls_file_casereader_class, r);
}


/* Reads and returns one case from READER's file.  Returns a null
   pointer on failure. */
static struct ccase *
xls_file_casereader_read (struct casereader *reader UNUSED, void *r_)
{
  struct ccase *c;
  int i;

  struct xls_reader *r = r_;

  c = case_create (r->proto);
  case_set_missing (c);
  
  if(r->next_case > r->last_case)
    return NULL;
  for ( i = 0 ; i < r->n_var_specs ; ++i )
    {
    
      const struct variable *var = dict_get_var (r->dict, i);
      union value *v = case_data_rw (c, var);
      int width = var_get_width(var);
      
      xlsCell *cell = xls_cell(r->pWS, r->next_case, i);
      char* tmp = xls_getfcell(r->pWB,cell);

      value_copy_str_rpad(v, width, (const uint8_t *) tmp, ' ');
 
    }
  r->next_case++;
  return c;
}

#endif /* XLS_SUPPORT */
_______________________________________________
pspp-dev mailing list
pspp-dev@gnu.org
http://lists.gnu.org/mailman/listinfo/pspp-dev

Reply via email to