Hi guys,
Here is a low hanging fruit patch done by the nice guys at Intel who for
some reason apparently didn't propose it to upstream that is now used by
Jolla's package:
https://build.pub.meego.com/package/files?package=tracker&project=CE%
3AMW%3AShared
So eh, let's get it upstream.
So can somebody review this for master? Martyn?
ps. I can afaik still commit and push to git-gnome.
Kind regards,
Philip
--
Philip Van Hoof
Software developer
Codeminded BVBA - http://codeminded.be
diff --git a/src/tracker-extract/tracker-extract-gstreamer.c b/src/tracker-extract/tracker-extract-gstreamer.c
index 2a2fb7b..15da85a 100644
--- a/src/tracker-extract/tracker-extract-gstreamer.c
+++ b/src/tracker-extract/tracker-extract-gstreamer.c
@@ -89,6 +89,29 @@
#define GST_TAG_FRAMERATE "framerate"
#endif
+/* These are in newer GStreamer releases, but we define them here
+ so we don't need to depend on a new release */
+#ifndef GST_TAG_SHOW_NAME
+#define GST_TAG_SHOW_NAME "show-name"
+#endif
+
+#ifndef GST_TAG_SHOW_EPISODE_NUMBER
+#define GST_TAG_SHOW_EPISODE_NUMBER "show-episode-number"
+#endif
+
+#ifndef GST_TAG_SHOW_SEASON_NUMBER
+#define GST_TAG_SHOW_SEASON_NUMBER "show-season-number"
+#endif
+
+#define TV_REGEX "(?<showname>.*)\\.(?<season>(?:\\d{1,2})|(?:[sS]\\K\\d{1,2}))(?<episode>(?:\\d{2})|(?:[eE]\\K\\d{1,2}))\\.?(?<name>.*)?"
+#define MOVIE_REGEX "(?<name>.*)\\.?[\\(\\[](?<year>[12][90]\\d{2})[\\)\\]]"
+
+typedef enum {
+ VIDEO_TYPE_UNKNOWN,
+ VIDEO_TYPE_MOVIE,
+ VIDEO_TYPE_SERIES
+} VideoType;
+
typedef enum {
EXTRACT_MIME_AUDIO,
EXTRACT_MIME_VIDEO,
@@ -1013,6 +1036,295 @@ delete_existing_tracks (TrackerSparqlBuilder *postupdate,
g_free (sparql);
}
+const gchar *blacklisted_prefix[] = {
+ "tpz-", NULL
+};
+
+/* Blacklisted are words that we ignore everything after */
+const char *blacklist[] = {
+ "720p", "1080p",
+ "ws", "WS", "proper", "PROPER",
+ "repack", "real.repack",
+ "hdtv", "HDTV", "pdtv", "PDTV", "notv", "NOTV",
+ "dsr", "DSR", "DVDRip", "divx", "DIVX", "xvid", "Xvid",
+ NULL
+};
+
+static gchar *
+sanitise_string (const gchar *str)
+{
+ int i;
+ gchar *line;
+
+ line = (gchar *) str;
+ for (i = 0; blacklisted_prefix[i]; i++) {
+ if (g_str_has_prefix (str, blacklisted_prefix[i])) {
+ int len = strlen (blacklisted_prefix[i]);
+
+ line = (gchar *) str + len;
+ }
+ }
+
+ for (i = 0; blacklist[i]; i++) {
+ gchar *end;
+
+ end = strstr (line, blacklist[i]);
+ if (end) {
+ return g_strndup (line, end - line);
+ }
+ }
+
+ return g_strdup (line);
+}
+
+/* tidies strings before we run them through the regexes */
+static gchar *
+uri_to_metadata (const gchar *uri)
+{
+ gchar *ext, *basename, *name, *whitelisted;
+
+ basename = g_path_get_basename (uri);
+ ext = strrchr (basename, '.');
+ if (ext) {
+ name = g_strndup (basename, ext - basename);
+ g_free (basename);
+ } else {
+ name = basename;
+ }
+
+ /* Replace _ <space> with . */
+ g_strdelimit (name, "_ ", '.');
+ whitelisted = sanitise_string (name);
+ g_free (name);
+
+ return whitelisted;
+}
+
+static VideoType
+parse_uri (const gchar *uri,
+ gchar **title,
+ gchar **showname,
+ GDate **date,
+ gint *season,
+ gint *episode)
+{
+ gchar *metadata;
+ GRegex *regex;
+ GMatchInfo *info;
+
+ metadata = uri_to_metadata (uri);
+
+ regex = g_regex_new (MOVIE_REGEX, 0, 0, NULL);
+ g_regex_match (regex, metadata, 0, &info);
+
+ if (g_match_info_matches (info)) {
+ if (title) {
+ *title= g_match_info_fetch_named (info, "name");
+ /* Replace "." with <space> */
+ g_strdelimit (*title, ".", ' ');
+ }
+
+ if (date) {
+ gchar *year = g_match_info_fetch_named (info, "year");
+
+ *date = g_date_new ();
+ g_date_set_year (*date, atoi (year));
+ g_free (year);
+ }
+
+ if (showname) {
+ *showname = NULL;
+ }
+
+ if (season) {
+ *season = 0;
+ }
+
+ if (episode) {
+ *episode = 0;
+ }
+
+ g_regex_unref (regex);
+ g_match_info_free (info);
+ g_free (metadata);
+
+ return VIDEO_TYPE_MOVIE;
+ }
+
+ g_regex_unref (regex);
+ g_match_info_free (info);
+
+ regex = g_regex_new (TV_REGEX, 0, 0, NULL);
+ g_regex_match (regex, metadata, 0, &info);
+
+ if (g_match_info_matches (info)) {
+ if (title) {
+ *title = g_match_info_fetch_named (info, "name");
+ g_strdelimit (*title, ".", ' ');
+ }
+
+ if (showname) {
+ *showname = g_match_info_fetch_named (info, "showname");
+ g_strdelimit (*showname, ".", ' ');
+ }
+
+ if (season) {
+ gchar *s = g_match_info_fetch_named (info, "season");
+ if (s) {
+ if (*s == 's' || *s == 'S') {
+ *season = atoi (s + 1);
+ } else {
+ *season = atoi (s);
+ }
+ } else {
+ *season = 0;
+ }
+
+ g_free (s);
+ }
+
+ if (episode) {
+ gchar *e = g_match_info_fetch_named (info, "episode");
+ if (e) {
+ if (*e == 'e' || *e == 'E') {
+ *episode = atoi (e + 1);
+ } else {
+ *episode = atoi (e);
+ }
+ } else {
+ *episode = 0;
+ }
+
+ g_free (e);
+ }
+
+ if (date) {
+ *date = NULL;
+ }
+
+ g_regex_unref (regex);
+ g_match_info_free (info);
+ g_free (metadata);
+
+ return VIDEO_TYPE_SERIES;
+ }
+
+ g_regex_unref (regex);
+ g_match_info_free (info);
+
+ /* The filename doesn't look like a movie or a TV show, just use the
+ filename without extension as the title */
+ if (title) {
+ *title = g_strdelimit (metadata, ".", ' ');
+ }
+
+ if (showname) {
+ *showname = NULL;
+ }
+
+ if (date) {
+ *date = NULL;
+ }
+
+ if (season) {
+ *season = 0;
+ }
+
+ if (episode) {
+ *episode = 0;
+ }
+
+ return VIDEO_TYPE_UNKNOWN;
+}
+
+static void
+sanity_check_video_metadata (MetadataExtractor *extractor,
+ const gchar *uri,
+ TrackerSparqlBuilder *metadata)
+{
+ VideoType type;
+ gchar *title, *showname;
+ gboolean ret;
+ GDate *date;
+ gint season, episode;
+
+ ret = gst_tag_list_get_string (extractor->tagcache,
+ GST_TAG_TITLE, &title);
+ if (ret == FALSE) {
+ title = NULL;
+ }
+
+ ret = gst_tag_list_get_string (extractor->tagcache,
+ GST_TAG_SHOW_NAME, &showname);
+ if (ret == FALSE) {
+ showname = NULL;
+ }
+
+ ret = gst_tag_list_get_int (extractor->tagcache,
+ GST_TAG_SHOW_EPISODE_NUMBER, &episode);
+ if (ret == FALSE) {
+ episode = 0;
+ }
+
+ ret = gst_tag_list_get_int (extractor->tagcache,
+ GST_TAG_SHOW_SEASON_NUMBER, &season);
+ if (ret == FALSE) {
+ season = 0;
+ }
+
+ ret = gst_tag_list_get_date (extractor->tagcache, GST_TAG_DATE, &date);
+ if (ret == FALSE) {
+ date = NULL;
+ }
+
+ type = parse_uri (uri, title ? NULL : &title,
+ showname ? NULL : &showname,
+ date ? NULL : &date,
+ season != 0 ? NULL : &season,
+ episode != 0 ? NULL : &episode);
+
+ if (title && title[0] != '\0') {
+ tracker_sparql_builder_predicate (metadata, "nie:title");
+ tracker_sparql_builder_object_unvalidated (metadata, title);
+ }
+
+ /* FIXME: Tracker doesn't have (as far as I can see) anyway to store
+ showname, so if title hasn't been set, then we set it to showname */
+ if (showname && showname != '\0') {
+ if (title == NULL || title[0] == '\0') {
+ tracker_sparql_builder_predicate (metadata, "nie:title");
+ tracker_sparql_builder_object_unvalidated (metadata, showname);
+ }
+ }
+ g_free (showname);
+ g_free (title);
+
+ if (date) {
+ gchar buf[10];
+
+ if (g_date_strftime (buf, 10, "%Y", date)) {
+ tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+ tracker_sparql_builder_object_unvalidated (metadata, buf);
+ }
+
+ g_date_free (date);
+ }
+
+ if (episode > 0) {
+ tracker_sparql_builder_predicate (metadata, "nmm:episodeNumber");
+ tracker_sparql_builder_object_int64 (metadata, episode);
+ }
+
+ if (season > 0) {
+ tracker_sparql_builder_predicate (metadata, "nmm:season");
+ tracker_sparql_builder_object_int64 (metadata, season);
+ }
+
+ if (type == VIDEO_TYPE_SERIES) {
+ /* FIXME: Set isSeries */
+ }
+}
+
static void
extract_metadata (MetadataExtractor *extractor,
const gchar *file_url,
_______________________________________________
tracker-list mailing list
[email protected]
https://mail.gnome.org/mailman/listinfo/tracker-list