Sorry, couldn't resist. Some obvious fixes and added some TODO marks and
questions in the code for whoever reviews this idea to read.

Kind regards,

Philip

On Fri, 2012-12-14 at 18:15 +0100, Philip Van Hoof wrote:
> Hi there,
> 
> SO I started taking a look at how tracker-miner-file.c processes the
> files by sending it to the tracker-extract process by using the
> libtracker-extract library.
> 
> It uses quite a bit of GAsync calls in cascading order before it all is
> done processing. But so far only the tracker:volume handling is in the
> wrong location and not accessible by libtracker-extract users.
> 
> That is the miner_files_add_to_datasource call in tracker-miner-file.c
> and all the code that depends on this call: mostly volume handling.
> 
> Everything else I've started porting to this extract-test.c file that
> probably, most likely even, doesn't yet compile but that has the bean
> for a get_metadata_async and a get_metadata_finish API that will be
> usable by a external process like an MTP daemon.
> 
> The idea is to put such an API into libtracker-extract and then nicely
> wrap it with Qt, C#, Vala, etc bindings.
> 
> I've just attached my unfinished business as my gf has arrived and I
> really need to stop coding in a few minutes (or else ...).
> 
> :-)
> 
> Any experienced Tracker developer will see where I'm going with that
> code. An early review would be welcome.
>  
> Kind regards,
> 
> Philip
> 
> _______________________________________________
> tracker-list mailing list
> [email protected]
> https://mail.gnome.org/mailman/listinfo/tracker-list

-- 


Philip Van Hoof
Software developer
Codeminded BVBA - http://codeminded.be
#include <libtracker-sparql/tracker-sparql.h>
#include <libtracker-extract/tracker-extract.h>

#define MTP_GRAPH_URN "urn:uuid:fd9d3960-4600-11e2-bcfd-0800200c9a66"

typedef struct {
	TrackerSparqlBuilder *sparql
	GFile *file;
	gchar *urn;
	gchar *url;
	GSimpleAsyncResult *simple;
} ExtractionData;

static GSimpleAsyncResult*
extraction_data_free (ExtractionData *data)
{
	GSimpleAsyncResult *simple = data->simple;
	
	g_free (data->urn);
	g_free (data->url);

	if (data->file) {
		g_object_unref (data->file);
	}

	if (data->sparql) {
		g_object_unref (data->sparql);
	}

	return simple;
}

/* TODO: port (not necessarily easy to add)
static void
miner_files_add_to_datasource (TrackerMinerFiles    *mf,
                               GFile                *file,
                               TrackerSparqlBuilder *sparql)
{
	TrackerMinerFilesPrivate *priv;
	const gchar *removable_device_uuid;
	gchar *removable_device_urn, *uri;
	const gchar *urn;
	gboolean is_iri;

	priv = TRACKER_MINER_FILES_GET_PRIVATE (mf);
	uri = g_file_get_uri (file);

	removable_device_uuid = tracker_storage_get_uuid_for_file (priv->storage, file);

	if (removable_device_uuid) {
		removable_device_urn = g_strdup_printf (TRACKER_DATASOURCE_URN_PREFIX "%s",
		                                        removable_device_uuid);
	} else {
		removable_device_urn = g_strdup (TRACKER_NON_REMOVABLE_MEDIA_DATASOURCE_URN);
	}

	urn = miner_files_get_file_urn (mf, file, &is_iri);

	if (is_iri) {
		tracker_sparql_builder_subject_iri (sparql, urn);
	} else {
		tracker_sparql_builder_subject (sparql, urn);
	}

	tracker_sparql_builder_predicate (sparql, "a");
	tracker_sparql_builder_object (sparql, "nfo:FileDataObject");

	tracker_sparql_builder_predicate (sparql, "nie:dataSource");
	tracker_sparql_builder_object_iri (sparql, removable_device_urn);

	tracker_sparql_builder_predicate (sparql, "tracker:available");
	tracker_sparql_builder_object_boolean (sparql, TRUE);

	g_free (removable_device_urn);
	g_free (uri);
}
*/

static void
sparql_builder_finish (ExtractionData *data,
                                   const gchar       *preupdate,
                                   const gchar       *postupdate,
                                   const gchar       *where)
{
	tracker_sparql_builder_graph_close (data->sparql);
	tracker_sparql_builder_insert_close (data->sparql);

	if (where && *where) {
		tracker_sparql_builder_where_open (data->sparql);
		tracker_sparql_builder_append (data->sparql, where);
		tracker_sparql_builder_where_close (data->sparql);
	}

	/* Prepend preupdate queries */
	if (preupdate && *preupdate) {
		tracker_sparql_builder_prepend (data->sparql, preupdate);
	}

	/* Append postupdate */
	if (postupdate && *postupdate) {
		tracker_sparql_builder_append (data->sparql, postupdate);
	}
}

static void
extractor_get_embedded_metadata_cb (GObject *object, GAsyncResult *result, gpointer user_data)
{
	ExtractionData *data = user_data;
	TrackerSparqlBuilder *preupdate, *postupdate, *sparql;
	const gchar *where;
	GError *error = NULL;
	TrackerExtractInfo *info = tracker_extract_client_get_metadata_finish (object, result, &error);

	if (error == NULL) {
		TrackerSparqlBuilder *preupdate, *postupdate, *sparql;
		const gchar *where;

		preupdate = tracker_extract_info_get_preupdate_builder (info);
		postupdate = tracker_extract_info_get_postupdate_builder (info);
		sparql = tracker_extract_info_get_metadata_builder (info);
		where = tracker_extract_info_get_where_clause (info);

		sparql_builder_finish (data, tracker_sparql_builder_get_result (preupdate),
		                                    tracker_sparql_builder_get_result (postupdate), where);

		/* And .. we're done */
		gchar *sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
		g_simple_async_result_set_op_res_gpointer (simple, sparql_s, g_free);
		g_simple_async_result_complete (extraction_data_free (data->simple));

	} else {
		g_simple_async_result_set_from_error (data->simple, error);
		g_simple_async_result_complete (extraction_data_free (data));
	}

	g_clear_error (&error);

}

static void
on_fileinfo_received (GObject *file, GAsyncResult *result, gpointer user_data)
{
	ExtractionData *data = data;
	GFileInfo *file_info = g_file_query_info_finish (file, result, &error);

	if (error == NULL) {
		TrackerSparqlBuilder *sparql = data->sparql;
		time_t time;
		const gchar *mime_type;

		/* TODO: This should come from data->url, not data->file, so this is atm wrong!!! */
		tracker_sparql_builder_predicate (sparql, "nfo:fileName");
		tracker_sparql_builder_object_string (sparql, g_file_info_get_display_name (file_info));

		tracker_sparql_builder_predicate (sparql, "nfo:fileSize");
		tracker_sparql_builder_object_int64 (sparql, g_file_info_get_size (file_info));

		/* TODO: These two assume the process will copy from the temp file, perhaps allow
		 * passing these times to the API instead of getting them from the temp file? */

		time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_MODIFIED);
		tracker_sparql_builder_predicate (sparql, "nfo:fileLastModified");
		tracker_sparql_builder_object_date (sparql, (time_t *) &time_);

		time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_ACCESS);
		tracker_sparql_builder_predicate (sparql, "nfo:fileLastAccessed");
		tracker_sparql_builder_object_date (sparql, (time_t *) &time_);

		/* Laying the link between the IE and the DO. We use IE = DO */
		tracker_sparql_builder_predicate (sparql, "nie:isStoredAs");
		if (data->urn) {
			tracker_sparql_builder_object_iri (sparql, data->urn);
		} else {
			tracker_sparql_builder_object (sparql, "_:file");
		}

		/* The URL of the DataObject (because IE = DO, this is correct) */
		tracker_sparql_builder_predicate (sparql, "nie:url");
		tracker_sparql_builder_object_string (sparql, data->url);

		mime_type = g_file_info_get_content_type (file_info);

		tracker_sparql_builder_predicate (sparql, "nie:mimeType");
		tracker_sparql_builder_object_string (sparql, mime_type);


		/* TODO: port
		  * miner_files_add_to_datasource (data->miner, file, sparql); */

		if (tracker_extract_module_manager_mimetype_is_handled (mime_type)) {
			/* Next step, if handled by the extractor, get embedded metadata */
			tracker_extract_client_get_metadata (data->file, mime_type,
			                                                           MTP_GRAPH_URN, NULL,
			                                                           extractor_get_embedded_metadata_cb,
			                                                           data);
		} else {
			gchar *sparql_s;

			/* Otherwise, don't request embedded metadata extraction. We're done here */
			sparql_builder_finish (data, NULL, NULL, NULL);

			sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
			g_simple_async_result_set_op_res_gpointer (simple, sparql_s, g_free);
			g_simple_async_result_complete (extraction_data_free (data));
		}
	} else {
		g_simple_async_result_set_from_error (data->simple, error);
		g_simple_async_result_complete (extraction_data_free (data->simple));
	}

	g_clear_error (&error);
}

static void
on_parent_received (GObject *con, GAsyncResult *result, gpointer user_data)
{
	ExtractionData *data = user_data;
	TrackerSparqlBuilder *sparql = data->sparql;
	GFile *file = data->file;
	TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (con, result, &error);

	if (error == NULL) {
		gchar *parent_urn = NULL;
		GFileInfo *file_info;
		const gchar *attrs;

		while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
			parent_urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
			break;
		}

		if (parent_urn) {
			tracker_sparql_builder_predicate (sparql, "nfo:belongsToContainer");
			tracker_sparql_builder_object_iri (sparql, parent_urn);
		}

		attrs = G_FILE_ATTRIBUTE_STANDARD_TYPE ","
			G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE ","
			G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME ","
			G_FILE_ATTRIBUTE_STANDARD_SIZE ","
			G_FILE_ATTRIBUTE_TIME_MODIFIED ","
			G_FILE_ATTRIBUTE_TIME_ACCESS;

		g_file_query_info_async (file, attrs, G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
		                                        G_PRIORITY_DEFAULT, NULL,
		                                        on_fileinfo_received, data);

		g_free (parent_urn);
		g_object_unref (cursor);
	} else {
		g_simple_async_result_set_from_error (data->simple, error);
		g_simple_async_result_complete (extraction_data_free (data));
	}

	g_clear_error (&error);
}

static void
on_file_exists_checked (GObject *con, GAsyncResult *result, gpointer user_data)
{
	GFile *file = user_data;
	GError *error = NULL;
	TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (con, result, &error);

	if (error == NULL) {
		TrackerSparqlBuilder *sparql = tracker_sparql_builder_new_embedded_insert ();
		GFile *parent;
		gchar *url, *qry;

		while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
			data->urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
			break;
		}

		g_object_unref (cursor);

		tracker_sparql_builder_insert_silent_open (sparql, NULL);
		tracker_sparql_builder_graph_open (sparql, MTP_GRAPH_URN);

		if (data->urn != NULL) {
			tracker_sparql_builder_subject_iri (sparql, data->urn);
		} else {
			tracker_sparql_builder_subject (sparql, "_:file");
		}

		tracker_sparql_builder_predicate (sparql, "a");
		tracker_sparql_builder_object (sparql, "nfo:FileDataObject");
		tracker_sparql_builder_object (sparql, "nie:InformationElement");

		if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY) {
			tracker_sparql_builder_object (sparql, "nfo:Folder");
		}

		data->sparql = sparql;

		parent = g_file_get_parent (file);

		url = g_file_get_uri (parent);
		qry = g_strdup_printf ("select ?urn { ?urn nie:url '%s' }", url);

		tracker_sparql_connection_query_async (data->con, qry, on_parent_received, data);

		g_free (url);
		g_object_unref (parent);
	} else {
		g_simple_async_result_set_from_error (data->simple, error);
		g_simple_async_result_complete (extraction_data_free (data->simple));
	}

	g_clear_error(&error);
}

static void
on_get_connection (GObject *con, GAsyncResult *result, gpointer user_data)
{
	ExtractionData *data = user_data;
	GError *error = NULL;
	TrackerSparqlConnection*con = tracker_sparql_connection_get_finish (con, result, &error);

	if (error == NULL) {
		gchar *qry;

		qry = g_strdup_printf ("select ?urn { ?urn nie:url '%s' }", data->url);
		tracker_sparql_connection_query_async (data->con, qry, on_file_exists_checked, data);

	} else {
		g_simple_async_result_set_from_error (data->simple, error);
		g_simple_async_result_complete (extraction_data_free (data));
	}

	g_clear_error (&error);
}

/* TODO: Question: pass the modification time and accessed time here or get the values from the tempfile? */
void
get_metadata (const gchar *temp_file, const gchar *dest_url, GAsyncReadyCallback callback, gpointer user_data)
{
	ExtractionData *data = g_new0(ExtractionData, 1);

	data->file = g_file_new_for_path(temp_file);
	data->url = g_strdup (dest_url);
	data->simple = g_simple_async_result_new (NULL, callback, user_data, get_metadata);

	tracker_sparql_connection_get_async (NULL, on_get_connection, data);
}

gchar*
get_metadata_finish (GObject *none, GAsyncResult *result, GError **error)
{
	gchar *res;
	GSimpleAsyncResult *simple;
	simple = (GSimpleAsyncResult *) result;

	if (g_simple_async_result_propagate_error (simple, error))
		return NULL;

	res = g_simple_async_result_get_op_res_gpointer (simple);

	return res;
}

static void
on_finished (GObject *none, GAsyncResult *result, gpointer user_data)
{
	GError *error = NULL;
	gchar *sparql = get_metadata_finish (none, result, &error);

	if (error != NULL) {
		g_print ("%s", sparql);
		g_free (sparql);
	}

	g_clear_error (&error);
}

int main (int argc, char **argv)
{
	GMainLoop *loop;
	TrackerSparqlBuilder *sparql;

	if (argc != 1) {
		g_print("Usage: %s filename", argv[0]);
		return 1;
	}

	g_type_init();

	loop = g_main_loop_new (NULL, FALSE);

	get_metadata_for (filename, filename, on_finished, NULL);

	g_main_loop_run(loop);

	return 0;
}


_______________________________________________
tracker-list mailing list
[email protected]
https://mail.gnome.org/mailman/listinfo/tracker-list

Reply via email to