Hi there,
SO I started taking a look at how tracker-miner-file.c processes the
files by sending it to the tracker-extract process by using the
libtracker-extract library.
It uses quite a bit of GAsync calls in cascading order before it all is
done processing. But so far only the tracker:volume handling is in the
wrong location and not accessible by libtracker-extract users.
That is the miner_files_add_to_datasource call in tracker-miner-file.c
and all the code that depends on this call: mostly volume handling.
Everything else I've started porting to this extract-test.c file that
probably, most likely even, doesn't yet compile but that has the bean
for a get_metadata_async and a get_metadata_finish API that will be
usable by a external process like an MTP daemon.
The idea is to put such an API into libtracker-extract and then nicely
wrap it with Qt, C#, Vala, etc bindings.
I've just attached my unfinished business as my gf has arrived and I
really need to stop coding in a few minutes (or else ...).
:-)
Any experienced Tracker developer will see where I'm going with that
code. An early review would be welcome.
Kind regards,
Philip
--
Philip Van Hoof
Software developer
Codeminded BVBA - http://codeminded.be
#include <libtracker-sparql/tracker-sparql.h>
#include <libtracker-extract/tracker-extract.h>
#define MTP_GRAPH_URN "urn:uuid:fd9d3960-4600-11e2-bcfd-0800200c9a66"
typedef struct {
TrackerSparqlBuilder *sparql
GFile *file;
gchar *urn;
gchar *url;
gchar *filename;
GSimpleAsyncResult *simple;
} ExtractionData;
static GSimpleAsyncResult*
extraction_data_free (ExtractionData *data)
{
GSimpleAsyncResult *simple = data->simple;
g_free (data->urn);
g_free (data->url);
g_free (data->filename);
if (data->file) {
g_object_unref (data->file);
}
if (data->sparql) {
g_object_unref (data->sparql);
}
return simple;
}
/* TODO: port (not necessarily easy to add)
static void
miner_files_add_to_datasource (TrackerMinerFiles *mf,
GFile *file,
TrackerSparqlBuilder *sparql)
{
TrackerMinerFilesPrivate *priv;
const gchar *removable_device_uuid;
gchar *removable_device_urn, *uri;
const gchar *urn;
gboolean is_iri;
priv = TRACKER_MINER_FILES_GET_PRIVATE (mf);
uri = g_file_get_uri (file);
removable_device_uuid = tracker_storage_get_uuid_for_file (priv->storage, file);
if (removable_device_uuid) {
removable_device_urn = g_strdup_printf (TRACKER_DATASOURCE_URN_PREFIX "%s",
removable_device_uuid);
} else {
removable_device_urn = g_strdup (TRACKER_NON_REMOVABLE_MEDIA_DATASOURCE_URN);
}
urn = miner_files_get_file_urn (mf, file, &is_iri);
if (is_iri) {
tracker_sparql_builder_subject_iri (sparql, urn);
} else {
tracker_sparql_builder_subject (sparql, urn);
}
tracker_sparql_builder_predicate (sparql, "a");
tracker_sparql_builder_object (sparql, "nfo:FileDataObject");
tracker_sparql_builder_predicate (sparql, "nie:dataSource");
tracker_sparql_builder_object_iri (sparql, removable_device_urn);
tracker_sparql_builder_predicate (sparql, "tracker:available");
tracker_sparql_builder_object_boolean (sparql, TRUE);
g_free (removable_device_urn);
g_free (uri);
}
*/
static void
sparql_builder_finish (ExtractionData *data,
const gchar *preupdate,
const gchar *postupdate,
const gchar *where)
{
tracker_sparql_builder_graph_close (data->sparql);
tracker_sparql_builder_insert_close (data->sparql);
if (where && *where) {
tracker_sparql_builder_where_open (data->sparql);
tracker_sparql_builder_append (data->sparql, where);
tracker_sparql_builder_where_close (data->sparql);
}
/* Prepend preupdate queries */
if (preupdate && *preupdate) {
tracker_sparql_builder_prepend (data->sparql, preupdate);
}
/* Append postupdate */
if (postupdate && *postupdate) {
tracker_sparql_builder_append (data->sparql, postupdate);
}
}
static void
extractor_get_embedded_metadata_cb (GObject *object, GAsyncResult *result, gpointer user_data)
{
ExtractionData *data = user_data;
TrackerSparqlBuilder *preupdate, *postupdate, *sparql;
const gchar *where;
GError *error = NULL;
TrackerExtractInfo *info = tracker_extract_client_get_metadata_finish (object, result, &error);
if (error == NULL) {
TrackerSparqlBuilder *preupdate, *postupdate, *sparql;
const gchar *where;
preupdate = tracker_extract_info_get_preupdate_builder (info);
postupdate = tracker_extract_info_get_postupdate_builder (info);
sparql = tracker_extract_info_get_metadata_builder (info);
where = tracker_extract_info_get_where_clause (info);
sparql_builder_finish (data, tracker_sparql_builder_get_result (preupdate),
tracker_sparql_builder_get_result (postupdate), where);
/* And .. we're done */
gchar *sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
g_simple_async_result_set_op_res_gpointer (simple, sparql_s, g_free);
g_simple_async_result_complete (extraction_data_free (data->simple));
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data->simple));
}
g_clear_error (&error);
}
static void
on_fileinfo_received (GObject *file, GAsyncResult *result, gpointer user_data)
{
ExtractionData *data = data;
GFileInfo *file_info = g_file_query_info_finish (file, result, &error);
if (error == NULL) {
TrackerSparqlBuilder *sparql = data->sparql;
time_t time;
const gchar *mime_type;
tracker_sparql_builder_predicate (sparql, "nfo:fileName");
tracker_sparql_builder_object_string (sparql, g_file_info_get_display_name (file_info));
tracker_sparql_builder_predicate (sparql, "nfo:fileSize");
tracker_sparql_builder_object_int64 (sparql, g_file_info_get_size (file_info));
time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_MODIFIED);
tracker_sparql_builder_predicate (sparql, "nfo:fileLastModified");
tracker_sparql_builder_object_date (sparql, (time_t *) &time_);
time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_ACCESS);
tracker_sparql_builder_predicate (sparql, "nfo:fileLastAccessed");
tracker_sparql_builder_object_date (sparql, (time_t *) &time_);
/* Laying the link between the IE and the DO. We use IE = DO */
tracker_sparql_builder_predicate (sparql, "nie:isStoredAs");
if (data->urn) {
tracker_sparql_builder_object_iri (sparql, data->urn);
} else {
tracker_sparql_builder_object (sparql, "_:file");
}
/* The URL of the DataObject (because IE = DO, this is correct) */
tracker_sparql_builder_predicate (sparql, "nie:url");
tracker_sparql_builder_object_string (sparql, data->url);
mime_type = g_file_info_get_content_type (file_info);
tracker_sparql_builder_predicate (sparql, "nie:mimeType");
tracker_sparql_builder_object_string (sparql, mime_type);
/* TODO: port
* miner_files_add_to_datasource (data->miner, file, sparql); */
if (tracker_extract_module_manager_mimetype_is_handled (mime_type)) {
/* Next step, if handled by the extractor, get embedded metadata */
tracker_extract_client_get_metadata (data->file, mime_type,
MTP_GRAPH_URN, NULL,
extractor_get_embedded_metadata_cb,
data);
} else {
gchar *sparql_s;
/* Otherwise, don't request embedded metadata extraction. We're done here */
sparql_builder_finish (data, NULL, NULL, NULL);
sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
g_simple_async_result_set_op_res_gpointer (simple, sparql_s, g_free);
g_simple_async_result_complete (extraction_data_free (data->simple));
}
}
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data->simple));
}
g_clear_error (&error);
}
static void
on_parent_received (GObject *con, GAsyncResult *result, gpointer user_data)
{
ExtractionData *data = user_data;
TrackerSparqlBuilder *sparql = data->sparql;
GFile *file = data->file;
TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (con, result, &error);
if (error == NULL) {
gchar *parent_urn = NULL;
GFileInfo *file_info;
const gchar *attrs;
while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
parent_urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
break;
}
if (parent_urn) {
tracker_sparql_builder_predicate (sparql, "nfo:belongsToContainer");
tracker_sparql_builder_object_iri (sparql, parent_urn);
}
attrs = G_FILE_ATTRIBUTE_STANDARD_TYPE ","
G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE ","
G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME ","
G_FILE_ATTRIBUTE_STANDARD_SIZE ","
G_FILE_ATTRIBUTE_TIME_MODIFIED ","
G_FILE_ATTRIBUTE_TIME_ACCESS;
g_file_query_info_async (file, attrs, G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
G_PRIORITY_DEFAULT, NULL,
on_fileinfo_received, data);
g_free (parent_urn);
g_object_unref (cursor);
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data->simple));
}
g_clear_error (&error);
}
static void
on_file_exists_checked (GObject *con, GAsyncResult *result, gpointer user_data)
{
GFile *file = user_data;
GError *error = NULL;
TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (con, result, &error);
if (error == NULL) {
TrackerSparqlBuilder *sparql = tracker_sparql_builder_new_embedded_insert ();
GFile *parent;
gchar *url, *qry;
while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
data->urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
break;
}
g_object_unref (cursor);
tracker_sparql_builder_insert_silent_open (sparql, NULL);
tracker_sparql_builder_graph_open (sparql, MTP_GRAPH_URN);
if (data->urn != NULL) {
tracker_sparql_builder_subject_iri (sparql, data->urn);
} else {
tracker_sparql_builder_subject (sparql, "_:file");
}
tracker_sparql_builder_predicate (sparql, "a");
tracker_sparql_builder_object (sparql, "nfo:FileDataObject");
tracker_sparql_builder_object (sparql, "nie:InformationElement");
if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY) {
tracker_sparql_builder_object (sparql, "nfo:Folder");
}
data->sparql = sparql;
parent = g_file_get_parent (file);
url = g_file_get_uri (parent);
qry = g_strdup_printf ("tracker_sparql_connection_select ?urn { ?o nie:url '%s' }", url);
tracker_sparql_connection_query_async (data->con, qry, on_parent_received, data);
g_free (url);
g_object_unref (parent);
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data->simple));
}
g_clear_error(&error);
}
static void
on_get_connection (GObject *con, GAsyncResult *result, gpointer user_data)
{
ExtractionData *data = user_data;
GError *error = NULL;
TrackerSparqlConnection*con = tracker_sparql_connection_get_finish (con, result, &error);
if (error == NULL) {
gchar *url, *qry;
data->file = g_file_new_for_path(filename);
data->url = g_file_get_uri (data->file);
qry = g_strdup_printf ("tracker_sparql_connection_select ?urn { ?o nie:url '%s' }", data->url);
tracker_sparql_connection_query_async (data->con, qry, on_file_exists_checked, data);
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data->simple));
}
g_clear_error (&error);
}
void
get_metadata (const gchar *filename, GAsyncReadyCallback callback, gpointer user_data)
{
ExtractionData *data = g_new0(ExtractionData, 1);
data->filename = g_strdup (filename);
data->simple = g_simple_async_result_new (NULL, callback, user_data, get_metadata);
tracker_sparql_connection_get_async (NULL, on_get_connection, data);
}
gchar*
get_metadata_finish (GObject *none, GAsyncResult *result, GError **error)
{
gchar *res;
GSimpleAsyncResult *simple;
simple = (GSimpleAsyncResult *) result;
if (g_simple_async_result_propagate_error (simple, error))
return NULL;
res = g_simple_async_result_get_op_res_gpointer (simple);
return res;
}
static void
on_finished (GObject *none, GAsyncResult *result, gpointer user_data)
{
GError *error = NULL;
gchar *sparql = get_metadata_finish (none, result, &error);
if (error != NULL) {
g_print ("%s", sparql);
g_free (sparql);
}
g_clear_error (&error);
}
int main (int argc, char **argv)
{
GMainLoop *loop;
TrackerSparqlBuilder *sparql;
if (argc != 1) {
g_print("Usage: %s filename", argv[0]);
return 1;
}
g_type_init();
loop = g_main_loop_new (NULL, FALSE);
get_metadata_for (filename, on_finished, NULL);
g_main_loop_run(loop);
return 0;
}
_______________________________________________
tracker-list mailing list
[email protected]
https://mail.gnome.org/mailman/listinfo/tracker-list