This is an automated email from the ASF dual-hosted git repository. sblackmon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/streams.git
The following commit(s) were added to refs/heads/master by this push: new 7217733 STREAMS-679: add support for accessing and searching with twitter derived fields 7217733 is described below commit 7217733f205eebd63c92a9e140ca9a037a1f8a8c Author: sblackmon <sblack...@apache.org> AuthorDate: Tue Oct 20 13:34:11 2020 -0500 STREAMS-679: add support for accessing and searching with twitter derived fields resolves STREAMS-679 --- .../apache/streams/twitter/search/SearchUtil.java | 13 ++++++- .../twitter/api/ThirtyDaySearchRequest.json | 5 +++ .../SevenDaySearchProviderConfiguration.json | 19 ++++++++++ .../ThirtyDaySearchProviderConfiguration.json | 7 ++++ .../org/apache/streams/twitter/pojo/User.json | 44 ++++++++++++++++++++++ .../twitter/search/ThirtyDaySearchOperator.json | 30 +++++++++++++-- 6 files changed, 112 insertions(+), 6 deletions(-) diff --git a/streams-contrib/streams-provider-twitter/src/main/java/org/apache/streams/twitter/search/SearchUtil.java b/streams-contrib/streams-provider-twitter/src/main/java/org/apache/streams/twitter/search/SearchUtil.java index 4426c80..01dd20d 100644 --- a/streams-contrib/streams-provider-twitter/src/main/java/org/apache/streams/twitter/search/SearchUtil.java +++ b/streams-contrib/streams-provider-twitter/src/main/java/org/apache/streams/twitter/search/SearchUtil.java @@ -88,6 +88,15 @@ public class SearchUtil { for( String time_zone : operator.getTimeZones()) { stringJoiner.add("time_zone:" + time_zone); } + if( operator.getProfileCountry() != null) { + stringJoiner.add("profile_country:" + operator.getProfileCountry()); + } + if( operator.getProfileRegion() != null) { + stringJoiner.add("profile_region:" + operator.getProfileRegion()); + } + if( operator.getProfileLocality() != null) { + stringJoiner.add("profile_locality:" + operator.getProfileLocality()); + } if( operator.getHasImages() ) { stringJoiner.add("has:images"); } @@ -97,8 +106,8 @@ public class SearchUtil { if( operator.getHasMedia() ) { stringJoiner.add("has:media"); } - if( operator.getHasImages() ) { - stringJoiner.add("has:image"); + if( operator.getHasProfileGeo() ) { + stringJoiner.add("has:profile_geo"); } if( operator.getHasVideos() ) { stringJoiner.add("has:video"); diff --git a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/api/ThirtyDaySearchRequest.json b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/api/ThirtyDaySearchRequest.json index 21f5cce..6740843 100644 --- a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/api/ThirtyDaySearchRequest.json +++ b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/api/ThirtyDaySearchRequest.json @@ -13,6 +13,11 @@ "required": true, "type": "string" }, + "tag": { + "description": "Tags can be used to segregate rules and their matching data into different logical groups. If a rule tag is provided, the rule tag is included in the 'matching_rules' attribute.\n\nIt is recommended to assign rule-specific UUIDs to rule tags and maintain desired mappings on the client side.", + "required": false, + "type": "string" + }, "fromDate": { "description": "The oldest UTC timestamp (back to 3/21/2006) from which the activities will be provided. Timestamp is in minute granularity and is inclusive (i.e. 12:00 includes the 00 minute).", "required": false, diff --git a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/config/SevenDaySearchProviderConfiguration.json b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/config/SevenDaySearchProviderConfiguration.json index bc5450b..551517b 100644 --- a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/config/SevenDaySearchProviderConfiguration.json +++ b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/config/SevenDaySearchProviderConfiguration.json @@ -19,6 +19,25 @@ "max_pages": { "type": "integer", "description": "Max pages to request" + }, + "page_size": { + "type": "integer", + "description": "Requested items per page" + }, + "geocode": { + "type": "string" + }, + "lang": { + "type": "string" + }, + "locale": { + "type": "string" + }, + "result_type": { + "type": "string" + }, + "include_entities": { + "type": "boolean" } } } \ No newline at end of file diff --git a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/config/ThirtyDaySearchProviderConfiguration.json b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/config/ThirtyDaySearchProviderConfiguration.json index 1d31bb3..c117b0f 100644 --- a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/config/ThirtyDaySearchProviderConfiguration.json +++ b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/config/ThirtyDaySearchProviderConfiguration.json @@ -12,6 +12,9 @@ "query": { "$ref": "../api/ThirtyDaySearchRequest.json#properties/query" }, + "tag": { + "$ref": "../api/ThirtyDaySearchRequest.json#properties/tag" + }, "max_items": { "type": "integer", "description": "Max items total" @@ -19,6 +22,10 @@ "max_pages": { "type": "integer", "description": "Max pages to request" + }, + "page_size": { + "type": "integer", + "description": "Requested items per page" } } } \ No newline at end of file diff --git a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/pojo/User.json b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/pojo/User.json index c6cb798..a560011 100644 --- a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/pojo/User.json +++ b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/pojo/User.json @@ -123,6 +123,50 @@ }, "status": { "$ref": "tweet.json" + }, + "derived": { + "type": "object", + "properties": { + "locations": { + "type": "array", + "items": { + "type": "object", + "javaType" : "org.apache.streams.twitter.pojo.DerivedLocation", + "properties": { + "country": { + "type": "string" + }, + "country_code": { + "type": "string" + }, + "locality": { + "type": "string" + }, + "region": { + "type": "string" + }, + "sub_region": { + "type": "string" + }, + "full_name": { + "type": "string" + }, + "geo": { + "type": "object", + "properties": { + "coordinates": { + "type": "array", + "items": { + "type": "number" + } + }, + "type": "string" + } + } + } + } + } + } } } diff --git a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/search/ThirtyDaySearchOperator.json b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/search/ThirtyDaySearchOperator.json index 2ae1657..3c78e8b 100644 --- a/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/search/ThirtyDaySearchOperator.json +++ b/streams-contrib/streams-provider-twitter/src/main/jsonschema/org/apache/streams/twitter/search/ThirtyDaySearchOperator.json @@ -121,6 +121,18 @@ "type": "string" } }, + "profile_country": { + "description": "Exact match on the “countryCode” field from the “address” object in the Profile Geo enrichment.\nUses a normalized set of two-letter country codes, based on ISO-3166-1-alpha-2 specification. This operator is provided in lieu of an operator for “country” field from the “address” object to be concise.", + "type": "string" + }, + "profile_region": { + "description": "Matches on the “region” field from the “address” object in the Profile Geo enrichment.\nThis is an exact full string match. It is not necessary to escape characters with a backslash. For example, if matching something with a slash, use “one/two”, not “one\\/two”. Use double quotes to match substrings that contain whitespace or punctuation.", + "type": "string" + }, + "profile_locality": { + "description": "Matches on the “locality” field from the “address” object in the Profile Geo enrichment.\nThis is an exact full string match. It is not necessary to escape characters with a backslash. For example, if matching something with a slash, use “one/two”, not “one\\/two”. Use double quotes to match substrings that contain whitespace or punctuation.", + "type": "string" + }, "time_zones": { "description": "Matches Tweets where the user-selected time zone specified in a user’s profile settings matches a given string.", "type": "array", @@ -135,6 +147,11 @@ "type": "string" } }, + "has_geo": { + "description": "Matches Tweets that have Tweet-specific geo location data provided from Twitter. This can be either “geo” lat-long coordinate, or a “location” in the form of a Twitter “Place”, with corresponding display name, geo polygon, and other fields.", + "type": "boolean", + "default": false + }, "has_images": { "description": "A boolean search operator that returns all Tweets that contain a native images (e.g. pic.twitter.com).", "type": "boolean", @@ -145,18 +162,23 @@ "type": "boolean", "default": false }, + "has_media": { + "description": "Matches Tweets that contain a media url classified by Twitter, e.g. pic.twitter.com.", + "type": "boolean", + "default": false + }, "has_mentions": { "description": "Matches Tweets that mention another Twitter user.", "type": "boolean", "default": false }, - "has_videos": { - "description": "A boolean search operator that returns all Tweets that contain native videos (does not include vine, periscope).", + "has_profile_geo": { + "description": "Matches Tweets that have any Profile Geo metadata, regardless of the actual value.", "type": "boolean", "default": false }, - "has_media": { - "description": "Matches Tweets that contain a media url classified by Twitter, e.g. pic.twitter.com.", + "has_videos": { + "description": "A boolean search operator that returns all Tweets that contain native videos (does not include vine, periscope).", "type": "boolean", "default": false },