On 10/1/06, Jamie McCracken <[EMAIL PROTECTED]> wrote:
1) Remove libextractor by implementing the last few extractors (lidgsf
for msoffice and re using the extractors for gif and tiff in libextractor)
Here's a patch for libgsf. Tested with word and powerpoint.
--
Mr Jamie McCracken
http://jamiemcc.livejournal.com/
_______________________________________________
tracker-list mailing list
[email protected]
http://mail.gnome.org/mailman/listinfo/tracker-list
Index: configure.in
===================================================================
RCS file: /cvs/gnome/tracker/configure.in,v
retrieving revision 1.20
diff -u -p -r1.20 configure.in
--- configure.in 27 Sep 2006 10:38:12 -0000 1.20
+++ configure.in 3 Oct 2006 00:53:02 -0000
@@ -374,6 +374,19 @@ AC_SUBST(LIBEXIF_CFLAGS)
AC_SUBST(LIBEXIF_LIBS)
test "$have_libexif" = "yes" && AC_DEFINE(HAVE_LIBEXIF, [], [Define if we have libexif])
+##################################################################
+# check for libgsf
+##################################################################
+
+LIBGSF_REQUIRED=1.13
+
+PKG_CHECK_MODULES(LIBGSF, [libgsf-1 >= $LIBGSF_REQUIRED], [have_libgsf=yes] , [have_libgsf=no])
+
+AM_CONDITIONAL(HAVE_LIBGSF, test "$have_libgsf" = "yes")
+AC_SUBST(LIBGSF_CFLAGS)
+AC_SUBST(LIBGSF_LIBS)
+test "$have_libgsf" = "yes" && AC_DEFINE(HAVE_LIBGSF, [], [Define if we have libgsf])
+
#####################################################
@@ -425,5 +438,6 @@ Metadata extractors:
ogg/theora : $have_theora
png : $have_libpng
exif (jpeg) : $have_libexif
+ gsf : $have_libgsf
"
Index: src/tracker-extract/Makefile.am
===================================================================
RCS file: /cvs/gnome/tracker/src/tracker-extract/Makefile.am,v
retrieving revision 1.5
diff -u -p -r1.5 Makefile.am
--- src/tracker-extract/Makefile.am 27 Sep 2006 10:34:55 -0000 1.5
+++ src/tracker-extract/Makefile.am 3 Oct 2006 00:53:28 -0000
@@ -3,7 +3,8 @@ INCLUDES = $(GLIB2_CFLAGS) $(CFLAGS) -g
$(VORBIS_CFLAGS) \
$(THEORA_CFLAGS) \
$(LIBPNG_CFLAGS) \
- $(LIBEXIF_CFLAGS)
+ $(LIBEXIF_CFLAGS) \
+ $(LIBGSF_CFLAGS)
bin_PROGRAMS = tracker-extract
@@ -14,7 +15,8 @@ tracker_extract_SOURCES = tracker-extrac
tracker-extract-abw.c \
tracker-extract-vorbis.c \
tracker-extract-png.c \
- tracker-extract-exif.c
+ tracker-extract-exif.c \
+ tracker-extract-msoffice.c
if USING_INTERNAL_LIBEXTRACTOR
extractor_ldadd = $(top_builddir)/src/libextractor/src/main/libextractor.la
@@ -27,4 +29,5 @@ tracker_extract_LDADD = $(GLIB2_LIBS) $(
$(VORBIS_LIBS) \
$(THEORA_LIBS) \
$(LIBPNG_LIBS) \
- $(LIBEXIF_LIBS)
+ $(LIBEXIF_LIBS) \
+ $(LIBGSF_LIBS)
--- /dev/null 2006-08-05 19:53:54.000000000 -0400
+++ src/tracker-extract/tracker-extract-msoffice.c 2006-10-02 20:52:12.000000000 -0400
@@ -0,0 +1,75 @@
+
+#include "config.h"
+
+#ifdef HAVE_LIBGSF
+
+#include <stdio.h>
+#include <string.h>
+#include <glib.h>
+#include <gsf/gsf.h>
+#include <gsf/gsf-input-stdio.h>
+#include <gsf/gsf-infile.h>
+#include <gsf/gsf-infile-msole.h>
+#include <gsf/gsf-msole-utils.h>
+#include <gsf/gsf-doc-meta-data.h>
+
+static void metadata_cb (gpointer key, gpointer value, gpointer user_data)
+{
+ gchar *name = (gchar *)key;
+ GsfDocProp *property = (GsfDocProp *)value;
+ GHashTable *metadata = (GHashTable *) user_data;
+ GValue const *val = gsf_doc_prop_get_val (property);
+
+ if (strcmp (name, "dc:title") == 0) {
+ g_hash_table_insert (metadata, g_strdup ("Doc.Title"), g_strdup_value_contents (val));
+ }
+ else if (strcmp (name, "dc:subject") == 0) {
+ g_hash_table_insert (metadata, g_strdup ("Doc.Subject"), g_strdup_value_contents (val));
+ }
+ else if (strcmp (name, "dc:creator") == 0) {
+ g_hash_table_insert (metadata, g_strdup ("Doc.Author"), g_strdup_value_contents (val));
+ }
+ else if (strcmp (name, "dc:keywords") == 0) {
+ g_hash_table_insert (metadata, g_strdup ("Doc.Keywords"), g_strdup_value_contents (val));
+ }
+ else if (strcmp (name, "dc:description") == 0) {
+ g_hash_table_insert (metadata, g_strdup ("Doc.Comment"), g_strdup_value_contents (val));
+ }
+ else if (strcmp (name, "gsf:page-count") == 0) {
+ g_hash_table_insert (metadata, g_strdup ("Doc.PageCount"), g_strdup_value_contents (val));
+ }
+ else if (strcmp (name, "gsf:word-count") == 0) {
+ g_hash_table_insert (metadata, g_strdup ("Doc.WordCount"), g_strdup_value_contents (val));
+ }
+ else if (strcmp (name, "meta:creation-date") == 0) {
+ g_hash_table_insert (metadata, g_strdup ("Doc.Created"), g_strdup_value_contents (val));
+ }
+ else if (strcmp (name, "meta:generator") == 0) {
+ g_hash_table_insert (metadata, g_strdup ("File.Other"), g_strdup_value_contents (val));
+ }
+}
+
+void
+tracker_extract_msoffice (gchar *filename, GHashTable *metadata)
+{
+ GsfInput *input;
+ GsfInfile *infile;
+ GsfInput *stream;
+ GsfDocMetaData *md;
+ GError *error = NULL;
+
+ if (!(input = gsf_input_stdio_new (filename, &error)))
+ return;
+ if (!(infile = gsf_infile_msole_new (input, &error)))
+ return;
+ if (!(stream = gsf_infile_child_by_name (infile, "\05SummaryInformation")))
+ return;
+ md = gsf_doc_meta_data_new ();
+ if ((error = gsf_msole_metadata_read (stream, md)))
+ return;
+ gsf_doc_meta_data_foreach (md, metadata_cb, metadata);
+}
+
+#else
+#warning "Not building Microsoft Office metadata extractor."
+#endif /* HAVE_LIBGSF */
Index: src/tracker-extract/tracker-extract.c
===================================================================
RCS file: /cvs/gnome/tracker/src/tracker-extract/tracker-extract.c,v
retrieving revision 1.11
diff -u -p -r1.11 tracker-extract.c
--- src/tracker-extract/tracker-extract.c 27 Sep 2006 10:34:55 -0000 1.11
+++ src/tracker-extract/tracker-extract.c 3 Oct 2006 00:57:32 -0000
@@ -129,6 +129,9 @@ void tracker_extract_ps (gchar *, GHa
void tracker_extract_pdf (gchar *, GHashTable *);
#endif
void tracker_extract_abw (gchar *, GHashTable *);
+#ifdef HAVE_LIBGSF
+void tracker_extract_msoffice (gchar *, GHashTable *);
+#endif
#ifdef HAVE_VORBIS
void tracker_extract_vorbis (gchar *, GHashTable *);
#endif
@@ -150,6 +153,11 @@ MimeToExtractor extractors[] = {
{ "application/pdf", tracker_extract_pdf },
#endif
{ "application/x-abiword", tracker_extract_abw },
+#ifdef HAVE_LIBGSF
+ { "application/msword", tracker_extract_msoffice },
+ { "application/vnd.ms-excel", tracker_extract_msoffice },
+ { "application/vnd.ms-powerpoint", tracker_extract_msoffice },
+#endif
/* Video extractors */
_______________________________________________
tracker-list mailing list
[email protected]
http://mail.gnome.org/mailman/listinfo/tracker-list