On 10/1/06, Jamie McCracken <[EMAIL PROTECTED]> wrote:

1) Remove libextractor by implementing the last few extractors (lidgsf
for msoffice and re using the extractors for gif and tiff in libextractor)


Here's a patch for libgsf.  Tested with word and powerpoint.


--
Mr Jamie McCracken
http://jamiemcc.livejournal.com/

_______________________________________________
tracker-list mailing list
[email protected]
http://mail.gnome.org/mailman/listinfo/tracker-list

Index: configure.in
===================================================================
RCS file: /cvs/gnome/tracker/configure.in,v
retrieving revision 1.20
diff -u -p -r1.20 configure.in
--- configure.in	27 Sep 2006 10:38:12 -0000	1.20
+++ configure.in	3 Oct 2006 00:53:02 -0000
@@ -374,6 +374,19 @@ AC_SUBST(LIBEXIF_CFLAGS)
 AC_SUBST(LIBEXIF_LIBS)
 test "$have_libexif" = "yes" && AC_DEFINE(HAVE_LIBEXIF, [], [Define if we have libexif])
 
+##################################################################
+# check for libgsf
+##################################################################
+
+LIBGSF_REQUIRED=1.13
+
+PKG_CHECK_MODULES(LIBGSF, [libgsf-1 >= $LIBGSF_REQUIRED], [have_libgsf=yes] , [have_libgsf=no])
+
+AM_CONDITIONAL(HAVE_LIBGSF, test "$have_libgsf" = "yes")
+AC_SUBST(LIBGSF_CFLAGS)
+AC_SUBST(LIBGSF_LIBS)
+test "$have_libgsf" = "yes" && AC_DEFINE(HAVE_LIBGSF, [], [Define if we have libgsf])
+
 
 #####################################################
 
@@ -425,5 +438,6 @@ Metadata extractors:
 	ogg/theora :				$have_theora
 	png :					$have_libpng
 	exif (jpeg) :				$have_libexif
+	gsf :					$have_libgsf
 "
 
Index: src/tracker-extract/Makefile.am
===================================================================
RCS file: /cvs/gnome/tracker/src/tracker-extract/Makefile.am,v
retrieving revision 1.5
diff -u -p -r1.5 Makefile.am
--- src/tracker-extract/Makefile.am	27 Sep 2006 10:34:55 -0000	1.5
+++ src/tracker-extract/Makefile.am	3 Oct 2006 00:53:28 -0000
@@ -3,7 +3,8 @@ INCLUDES = $(GLIB2_CFLAGS) $(CFLAGS) -g 
 	$(VORBIS_CFLAGS)			\
 	$(THEORA_CFLAGS)			\
 	$(LIBPNG_CFLAGS)			\
-	$(LIBEXIF_CFLAGS)
+	$(LIBEXIF_CFLAGS)			\
+	$(LIBGSF_CFLAGS)
 
 bin_PROGRAMS = tracker-extract
 
@@ -14,7 +15,8 @@ tracker_extract_SOURCES = tracker-extrac
 	tracker-extract-abw.c	\
 	tracker-extract-vorbis.c	\
 	tracker-extract-png.c \
-	tracker-extract-exif.c
+	tracker-extract-exif.c	\
+	tracker-extract-msoffice.c
 
 if USING_INTERNAL_LIBEXTRACTOR
 extractor_ldadd = $(top_builddir)/src/libextractor/src/main/libextractor.la
@@ -27,4 +29,5 @@ tracker_extract_LDADD = $(GLIB2_LIBS) $(
  		 $(VORBIS_LIBS) \
  		 $(THEORA_LIBS) \
  		 $(LIBPNG_LIBS) \
- 		 $(LIBEXIF_LIBS)
+ 		 $(LIBEXIF_LIBS) \
+		 $(LIBGSF_LIBS)
--- /dev/null	2006-08-05 19:53:54.000000000 -0400
+++ src/tracker-extract/tracker-extract-msoffice.c	2006-10-02 20:52:12.000000000 -0400
@@ -0,0 +1,75 @@
+
+#include "config.h"
+
+#ifdef HAVE_LIBGSF
+
+#include <stdio.h>
+#include <string.h>
+#include <glib.h>
+#include <gsf/gsf.h>
+#include <gsf/gsf-input-stdio.h>
+#include <gsf/gsf-infile.h>
+#include <gsf/gsf-infile-msole.h>
+#include <gsf/gsf-msole-utils.h>
+#include <gsf/gsf-doc-meta-data.h>
+
+static void metadata_cb (gpointer key, gpointer value, gpointer user_data)
+{
+	gchar *name = (gchar *)key;
+	GsfDocProp *property = (GsfDocProp *)value;
+	GHashTable *metadata = (GHashTable *) user_data;
+	GValue const *val = gsf_doc_prop_get_val (property);
+
+	if (strcmp (name, "dc:title") == 0) {
+		g_hash_table_insert (metadata, g_strdup ("Doc.Title"), g_strdup_value_contents (val));
+	}
+	else if (strcmp (name, "dc:subject") == 0) {
+		g_hash_table_insert (metadata, g_strdup ("Doc.Subject"), g_strdup_value_contents (val));
+	}
+	else if (strcmp (name, "dc:creator") == 0) {
+		g_hash_table_insert (metadata, g_strdup ("Doc.Author"), g_strdup_value_contents (val));
+	}
+	else if (strcmp (name, "dc:keywords") == 0) {
+		g_hash_table_insert (metadata, g_strdup ("Doc.Keywords"), g_strdup_value_contents (val));
+	}
+	else if (strcmp (name, "dc:description") == 0) {
+		g_hash_table_insert (metadata, g_strdup ("Doc.Comment"), g_strdup_value_contents (val));
+	}
+	else if (strcmp (name, "gsf:page-count") == 0) {
+		g_hash_table_insert (metadata, g_strdup ("Doc.PageCount"), g_strdup_value_contents (val));
+	}
+	else if (strcmp (name, "gsf:word-count") == 0) {
+		g_hash_table_insert (metadata, g_strdup ("Doc.WordCount"), g_strdup_value_contents (val));
+	}
+	else if (strcmp (name, "meta:creation-date") == 0) {
+		g_hash_table_insert (metadata, g_strdup ("Doc.Created"), g_strdup_value_contents (val));
+	}
+	else if (strcmp (name, "meta:generator") == 0) {
+		g_hash_table_insert (metadata, g_strdup ("File.Other"), g_strdup_value_contents (val));
+	}
+}
+
+void
+tracker_extract_msoffice (gchar *filename, GHashTable *metadata)
+{
+	GsfInput       *input;
+	GsfInfile      *infile;
+	GsfInput       *stream;
+	GsfDocMetaData *md;
+	GError         *error = NULL;
+
+	if (!(input = gsf_input_stdio_new (filename, &error)))
+		return;
+	if (!(infile = gsf_infile_msole_new (input, &error)))
+		return;
+	if (!(stream = gsf_infile_child_by_name (infile, "\05SummaryInformation")))
+		return;
+	md = gsf_doc_meta_data_new ();
+	if ((error = gsf_msole_metadata_read (stream, md)))
+		return;
+	gsf_doc_meta_data_foreach (md, metadata_cb, metadata);
+}
+
+#else
+#warning "Not building Microsoft Office metadata extractor."
+#endif  /* HAVE_LIBGSF */
Index: src/tracker-extract/tracker-extract.c
===================================================================
RCS file: /cvs/gnome/tracker/src/tracker-extract/tracker-extract.c,v
retrieving revision 1.11
diff -u -p -r1.11 tracker-extract.c
--- src/tracker-extract/tracker-extract.c	27 Sep 2006 10:34:55 -0000	1.11
+++ src/tracker-extract/tracker-extract.c	3 Oct 2006 00:57:32 -0000
@@ -129,6 +129,9 @@ void tracker_extract_ps    (gchar *, GHa
 void tracker_extract_pdf   (gchar *, GHashTable *);
 #endif
 void tracker_extract_abw   (gchar *, GHashTable *);
+#ifdef HAVE_LIBGSF
+void tracker_extract_msoffice   (gchar *, GHashTable *);
+#endif
 #ifdef HAVE_VORBIS
 void tracker_extract_vorbis   (gchar *, GHashTable *);
 #endif
@@ -150,6 +153,11 @@ MimeToExtractor extractors[] = {
 	{ "application/pdf",                                 tracker_extract_pdf   },
 #endif
 	{ "application/x-abiword",                           tracker_extract_abw   },
+#ifdef HAVE_LIBGSF
+	{ "application/msword",                              tracker_extract_msoffice   },
+	{ "application/vnd.ms-excel",                        tracker_extract_msoffice   },
+	{ "application/vnd.ms-powerpoint",                   tracker_extract_msoffice   },
+#endif
 
 
    /* Video extractors */
_______________________________________________
tracker-list mailing list
[email protected]
http://mail.gnome.org/mailman/listinfo/tracker-list

Reply via email to