Sorry, couldn't resist. Some obvious fixes and added some TODO marks and
questions in the code for whoever reviews this idea to read.
Kind regards,
Philip
On Fri, 2012-12-14 at 18:15 +0100, Philip Van Hoof wrote:
> Hi there,
>
> SO I started taking a look at how tracker-miner-file.c processes the
> files by sending it to the tracker-extract process by using the
> libtracker-extract library.
>
> It uses quite a bit of GAsync calls in cascading order before it all is
> done processing. But so far only the tracker:volume handling is in the
> wrong location and not accessible by libtracker-extract users.
>
> That is the miner_files_add_to_datasource call in tracker-miner-file.c
> and all the code that depends on this call: mostly volume handling.
>
> Everything else I've started porting to this extract-test.c file that
> probably, most likely even, doesn't yet compile but that has the bean
> for a get_metadata_async and a get_metadata_finish API that will be
> usable by a external process like an MTP daemon.
>
> The idea is to put such an API into libtracker-extract and then nicely
> wrap it with Qt, C#, Vala, etc bindings.
>
> I've just attached my unfinished business as my gf has arrived and I
> really need to stop coding in a few minutes (or else ...).
>
> :-)
>
> Any experienced Tracker developer will see where I'm going with that
> code. An early review would be welcome.
>
> Kind regards,
>
> Philip
>
> _______________________________________________
> tracker-list mailing list
> [email protected]
> https://mail.gnome.org/mailman/listinfo/tracker-list
--
Philip Van Hoof
Software developer
Codeminded BVBA - http://codeminded.be
#include <libtracker-sparql/tracker-sparql.h>
#include <libtracker-extract/tracker-extract.h>
#define MTP_GRAPH_URN "urn:uuid:fd9d3960-4600-11e2-bcfd-0800200c9a66"
typedef struct {
TrackerSparqlBuilder *sparql
GFile *file;
gchar *urn;
gchar *url;
GSimpleAsyncResult *simple;
} ExtractionData;
static GSimpleAsyncResult*
extraction_data_free (ExtractionData *data)
{
GSimpleAsyncResult *simple = data->simple;
g_free (data->urn);
g_free (data->url);
if (data->file) {
g_object_unref (data->file);
}
if (data->sparql) {
g_object_unref (data->sparql);
}
return simple;
}
/* TODO: port (not necessarily easy to add)
static void
miner_files_add_to_datasource (TrackerMinerFiles *mf,
GFile *file,
TrackerSparqlBuilder *sparql)
{
TrackerMinerFilesPrivate *priv;
const gchar *removable_device_uuid;
gchar *removable_device_urn, *uri;
const gchar *urn;
gboolean is_iri;
priv = TRACKER_MINER_FILES_GET_PRIVATE (mf);
uri = g_file_get_uri (file);
removable_device_uuid = tracker_storage_get_uuid_for_file (priv->storage, file);
if (removable_device_uuid) {
removable_device_urn = g_strdup_printf (TRACKER_DATASOURCE_URN_PREFIX "%s",
removable_device_uuid);
} else {
removable_device_urn = g_strdup (TRACKER_NON_REMOVABLE_MEDIA_DATASOURCE_URN);
}
urn = miner_files_get_file_urn (mf, file, &is_iri);
if (is_iri) {
tracker_sparql_builder_subject_iri (sparql, urn);
} else {
tracker_sparql_builder_subject (sparql, urn);
}
tracker_sparql_builder_predicate (sparql, "a");
tracker_sparql_builder_object (sparql, "nfo:FileDataObject");
tracker_sparql_builder_predicate (sparql, "nie:dataSource");
tracker_sparql_builder_object_iri (sparql, removable_device_urn);
tracker_sparql_builder_predicate (sparql, "tracker:available");
tracker_sparql_builder_object_boolean (sparql, TRUE);
g_free (removable_device_urn);
g_free (uri);
}
*/
static void
sparql_builder_finish (ExtractionData *data,
const gchar *preupdate,
const gchar *postupdate,
const gchar *where)
{
tracker_sparql_builder_graph_close (data->sparql);
tracker_sparql_builder_insert_close (data->sparql);
if (where && *where) {
tracker_sparql_builder_where_open (data->sparql);
tracker_sparql_builder_append (data->sparql, where);
tracker_sparql_builder_where_close (data->sparql);
}
/* Prepend preupdate queries */
if (preupdate && *preupdate) {
tracker_sparql_builder_prepend (data->sparql, preupdate);
}
/* Append postupdate */
if (postupdate && *postupdate) {
tracker_sparql_builder_append (data->sparql, postupdate);
}
}
static void
extractor_get_embedded_metadata_cb (GObject *object, GAsyncResult *result, gpointer user_data)
{
ExtractionData *data = user_data;
TrackerSparqlBuilder *preupdate, *postupdate, *sparql;
const gchar *where;
GError *error = NULL;
TrackerExtractInfo *info = tracker_extract_client_get_metadata_finish (object, result, &error);
if (error == NULL) {
TrackerSparqlBuilder *preupdate, *postupdate, *sparql;
const gchar *where;
preupdate = tracker_extract_info_get_preupdate_builder (info);
postupdate = tracker_extract_info_get_postupdate_builder (info);
sparql = tracker_extract_info_get_metadata_builder (info);
where = tracker_extract_info_get_where_clause (info);
sparql_builder_finish (data, tracker_sparql_builder_get_result (preupdate),
tracker_sparql_builder_get_result (postupdate), where);
/* And .. we're done */
gchar *sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
g_simple_async_result_set_op_res_gpointer (simple, sparql_s, g_free);
g_simple_async_result_complete (extraction_data_free (data->simple));
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data));
}
g_clear_error (&error);
}
static void
on_fileinfo_received (GObject *file, GAsyncResult *result, gpointer user_data)
{
ExtractionData *data = data;
GFileInfo *file_info = g_file_query_info_finish (file, result, &error);
if (error == NULL) {
TrackerSparqlBuilder *sparql = data->sparql;
time_t time;
const gchar *mime_type;
/* TODO: This should come from data->url, not data->file, so this is atm wrong!!! */
tracker_sparql_builder_predicate (sparql, "nfo:fileName");
tracker_sparql_builder_object_string (sparql, g_file_info_get_display_name (file_info));
tracker_sparql_builder_predicate (sparql, "nfo:fileSize");
tracker_sparql_builder_object_int64 (sparql, g_file_info_get_size (file_info));
/* TODO: These two assume the process will copy from the temp file, perhaps allow
* passing these times to the API instead of getting them from the temp file? */
time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_MODIFIED);
tracker_sparql_builder_predicate (sparql, "nfo:fileLastModified");
tracker_sparql_builder_object_date (sparql, (time_t *) &time_);
time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_ACCESS);
tracker_sparql_builder_predicate (sparql, "nfo:fileLastAccessed");
tracker_sparql_builder_object_date (sparql, (time_t *) &time_);
/* Laying the link between the IE and the DO. We use IE = DO */
tracker_sparql_builder_predicate (sparql, "nie:isStoredAs");
if (data->urn) {
tracker_sparql_builder_object_iri (sparql, data->urn);
} else {
tracker_sparql_builder_object (sparql, "_:file");
}
/* The URL of the DataObject (because IE = DO, this is correct) */
tracker_sparql_builder_predicate (sparql, "nie:url");
tracker_sparql_builder_object_string (sparql, data->url);
mime_type = g_file_info_get_content_type (file_info);
tracker_sparql_builder_predicate (sparql, "nie:mimeType");
tracker_sparql_builder_object_string (sparql, mime_type);
/* TODO: port
* miner_files_add_to_datasource (data->miner, file, sparql); */
if (tracker_extract_module_manager_mimetype_is_handled (mime_type)) {
/* Next step, if handled by the extractor, get embedded metadata */
tracker_extract_client_get_metadata (data->file, mime_type,
MTP_GRAPH_URN, NULL,
extractor_get_embedded_metadata_cb,
data);
} else {
gchar *sparql_s;
/* Otherwise, don't request embedded metadata extraction. We're done here */
sparql_builder_finish (data, NULL, NULL, NULL);
sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
g_simple_async_result_set_op_res_gpointer (simple, sparql_s, g_free);
g_simple_async_result_complete (extraction_data_free (data));
}
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data->simple));
}
g_clear_error (&error);
}
static void
on_parent_received (GObject *con, GAsyncResult *result, gpointer user_data)
{
ExtractionData *data = user_data;
TrackerSparqlBuilder *sparql = data->sparql;
GFile *file = data->file;
TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (con, result, &error);
if (error == NULL) {
gchar *parent_urn = NULL;
GFileInfo *file_info;
const gchar *attrs;
while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
parent_urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
break;
}
if (parent_urn) {
tracker_sparql_builder_predicate (sparql, "nfo:belongsToContainer");
tracker_sparql_builder_object_iri (sparql, parent_urn);
}
attrs = G_FILE_ATTRIBUTE_STANDARD_TYPE ","
G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE ","
G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME ","
G_FILE_ATTRIBUTE_STANDARD_SIZE ","
G_FILE_ATTRIBUTE_TIME_MODIFIED ","
G_FILE_ATTRIBUTE_TIME_ACCESS;
g_file_query_info_async (file, attrs, G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
G_PRIORITY_DEFAULT, NULL,
on_fileinfo_received, data);
g_free (parent_urn);
g_object_unref (cursor);
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data));
}
g_clear_error (&error);
}
static void
on_file_exists_checked (GObject *con, GAsyncResult *result, gpointer user_data)
{
GFile *file = user_data;
GError *error = NULL;
TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (con, result, &error);
if (error == NULL) {
TrackerSparqlBuilder *sparql = tracker_sparql_builder_new_embedded_insert ();
GFile *parent;
gchar *url, *qry;
while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
data->urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
break;
}
g_object_unref (cursor);
tracker_sparql_builder_insert_silent_open (sparql, NULL);
tracker_sparql_builder_graph_open (sparql, MTP_GRAPH_URN);
if (data->urn != NULL) {
tracker_sparql_builder_subject_iri (sparql, data->urn);
} else {
tracker_sparql_builder_subject (sparql, "_:file");
}
tracker_sparql_builder_predicate (sparql, "a");
tracker_sparql_builder_object (sparql, "nfo:FileDataObject");
tracker_sparql_builder_object (sparql, "nie:InformationElement");
if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY) {
tracker_sparql_builder_object (sparql, "nfo:Folder");
}
data->sparql = sparql;
parent = g_file_get_parent (file);
url = g_file_get_uri (parent);
qry = g_strdup_printf ("select ?urn { ?urn nie:url '%s' }", url);
tracker_sparql_connection_query_async (data->con, qry, on_parent_received, data);
g_free (url);
g_object_unref (parent);
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data->simple));
}
g_clear_error(&error);
}
static void
on_get_connection (GObject *con, GAsyncResult *result, gpointer user_data)
{
ExtractionData *data = user_data;
GError *error = NULL;
TrackerSparqlConnection*con = tracker_sparql_connection_get_finish (con, result, &error);
if (error == NULL) {
gchar *qry;
qry = g_strdup_printf ("select ?urn { ?urn nie:url '%s' }", data->url);
tracker_sparql_connection_query_async (data->con, qry, on_file_exists_checked, data);
} else {
g_simple_async_result_set_from_error (data->simple, error);
g_simple_async_result_complete (extraction_data_free (data));
}
g_clear_error (&error);
}
/* TODO: Question: pass the modification time and accessed time here or get the values from the tempfile? */
void
get_metadata (const gchar *temp_file, const gchar *dest_url, GAsyncReadyCallback callback, gpointer user_data)
{
ExtractionData *data = g_new0(ExtractionData, 1);
data->file = g_file_new_for_path(temp_file);
data->url = g_strdup (dest_url);
data->simple = g_simple_async_result_new (NULL, callback, user_data, get_metadata);
tracker_sparql_connection_get_async (NULL, on_get_connection, data);
}
gchar*
get_metadata_finish (GObject *none, GAsyncResult *result, GError **error)
{
gchar *res;
GSimpleAsyncResult *simple;
simple = (GSimpleAsyncResult *) result;
if (g_simple_async_result_propagate_error (simple, error))
return NULL;
res = g_simple_async_result_get_op_res_gpointer (simple);
return res;
}
static void
on_finished (GObject *none, GAsyncResult *result, gpointer user_data)
{
GError *error = NULL;
gchar *sparql = get_metadata_finish (none, result, &error);
if (error != NULL) {
g_print ("%s", sparql);
g_free (sparql);
}
g_clear_error (&error);
}
int main (int argc, char **argv)
{
GMainLoop *loop;
TrackerSparqlBuilder *sparql;
if (argc != 1) {
g_print("Usage: %s filename", argv[0]);
return 1;
}
g_type_init();
loop = g_main_loop_new (NULL, FALSE);
get_metadata_for (filename, filename, on_finished, NULL);
g_main_loop_run(loop);
return 0;
}
_______________________________________________
tracker-list mailing list
[email protected]
https://mail.gnome.org/mailman/listinfo/tracker-list