Xikui Wang has submitted this change and it was merged. Change subject: [NO ISSUE] User-defined Function Documentation update ......................................................................
[NO ISSUE] User-defined Function Documentation update - user model changes: no - storage format changes: no - interface changes: no Details: 1. Updated the UDF documentation to be consisten with current master. 2. Cleaned default UDF package to remove useless UDFs. 3. Added the example in documentation as a test case for IT. 4. Reorganized the documentation to keep up with the new structure. 5. Minor changes to other documentation pages to keep style consistent. Change-Id: I17b1b4d639ca38689298ce88145257e794eb90e1 Reviewed-on: https://asterix-gerrit.ics.uci.edu/2804 Sonar-Qube: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Contrib: Jenkins <[email protected]> Reviewed-by: Taewoo Kim <[email protected]> Integration-Tests: Jenkins <[email protected]> --- M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm M asterixdb/asterix-doc/pom.xml R asterixdb/asterix-doc/src/main/data_ingestion/feeds.md A asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md M asterixdb/asterix-doc/src/main/installation/ansible_title.md M asterixdb/asterix-doc/src/main/installation/aws_title.md A asterixdb/asterix-doc/src/main/user-defined_function/udf.md A asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md M asterixdb/asterix-doc/src/site/markdown/ncservice.md D asterixdb/asterix-doc/src/site/markdown/udf.md M asterixdb/asterix-doc/src/site/site.xml M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java D asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java M asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java R asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java A asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java M asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml M asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm 26 files changed, 329 insertions(+), 359 deletions(-) Approvals: Anon. E. Moose #1000171: Taewoo Kim: Looks good to me, approved Jenkins: Verified; No violations found; ; Verified diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp index 9402e1f..a6a1cdc 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp @@ -18,5 +18,4 @@ */ use externallibtest; -let x=testlib#mysum(3,4) -select VALUE x; +testlib#mysum(3,4); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp index 4fdc669..3bc33de 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp @@ -22,34 +22,20 @@ * Date : 4th Oct 2017 */ -drop dataverse externallibtest if exists; -create dataverse externallibtest; -use externallibtest; +drop dataverse udfs if exists; +create dataverse udfs; +use udfs; -create type TweetInputType as open { - id: string, - username : string, - location : string, - text : string, - timestamp : string -}; - -create type TweetOutputType as open { - id: string, - username : string, - location : string, - text : string, - timestamp : string, - topics : {{string}} +create type TweetType if not exists as open { + id: int64 }; create feed TweetFeed with { "adapter-name" : "localfs", - "type-name" : "TweetInputType", - "path" : "asterix_nc1://data/twitter/obamatweets.adm", + "type-name" : "TweetType", + "path" : "asterix_nc1://data/twitter/extrasmalltweets.txt", "format" : "adm" }; -create dataset TweetsFeedIngest(TweetOutputType) -primary key id; +create dataset ProcessedTweets(TweetType) primary key id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp index d1e0e87..4f0c6d3 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp @@ -16,4 +16,4 @@ * specific language governing permissions and limitations * under the License. */ -install externallibtest testlib target/data/externallib/asterix-external-data-testlib.zip \ No newline at end of file +install udfs testlib target/data/externallib/asterix-external-data-testlib.zip \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp index 0d46387..1407514b 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp @@ -21,10 +21,8 @@ * Expected Res : Success * Date : 4th Oct 2017 */ -use externallibtest; +use udfs; -SET `compiler.parallelism` "5"; - -connect feed TweetFeed to dataset TweetsFeedIngest apply function `testlib#parseTweet`; +connect feed TweetFeed to dataset ProcessedTweets apply function testlib#addMentionedUsers; start feed TweetFeed; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp index 607e5bd..b95294a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp @@ -22,8 +22,8 @@ * Date : 4th Oct 2017 */ // polltimeoutsecs=5 -use externallibtest; +use udfs; -select value t from TweetsFeedIngest t +select value t from ProcessedTweets t ORDER BY t.id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp index 86af80f..98c334d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp @@ -16,4 +16,4 @@ * specific language governing permissions and limitations * under the License. */ -uninstall externallibtest testlib \ No newline at end of file +uninstall udfs testlib \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp index 2a7acef..128c793 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp @@ -16,4 +16,4 @@ * specific language governing permissions and limitations * under the License. */ -drop dataverse externallibtest if exists; +drop dataverse udfs if exists; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm index f0ad2b2..9b2714a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm @@ -1,5 +1,5 @@ -{ "Function": { "DataverseName": "externallibtest", "Name": "testlib#addHashTags", "Arity": "1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet", "Definition": "org.apache.asterix.external.library.AddHashTagsFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } } { "Function": { "DataverseName": "externallibtest", "Name": "testlib#addHashTagsInPlace", "Arity": "1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet", "Definition": "org.apache.asterix.external.library.AddHashTagsInPlaceFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } } +{ "Function": { "DataverseName": "externallibtest", "Name": "testlib#addMentionedUsers", "Arity": "1", "Params": [ "TweetType" ], "ReturnType": "TweetType", "Definition": "org.apache.asterix.external.library.AddMentionedUsersFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } } { "Function": { "DataverseName": "externallibtest", "Name": "testlib#allTypes", "Arity": "1", "Params": [ "AllType" ], "ReturnType": "AllType", "Definition": "org.apache.asterix.external.library.AllTypesFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } } { "Function": { "DataverseName": "externallibtest", "Name": "testlib#echoDelay", "Arity": "1", "Params": [ "TweetMessageType" ], "ReturnType": "TweetMessageType", "Definition": "org.apache.asterix.external.library.EchoDelayFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } } { "Function": { "DataverseName": "externallibtest", "Name": "testlib#fnameDetector", "Arity": "1", "Params": [ "InputRecordType" ], "ReturnType": "DetectResultType", "Definition": "org.apache.asterix.external.library.KeywordsDetectorFactory", "Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm index 1291213..0f7eb82 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm @@ -1,12 +1,21 @@ -{ "id": "nc1:1", "username": "BronsonMike", "location": "", "text": "@GottaLaff @reutersus Christie and obama just foul weather friends", "timestamp": "Thu Dec 06 16:53:06 PST 2012", "topics": {{ }} } -{ "id": "nc1:100", "username": "KidrauhlProuds", "location": "", "text": "RT @01Direclieber: A filha do Michael Jackson uma Belieber,a filha do Eminem e uma Belieber,as filhas de Obama sao Beliebers, e a filha do meu pai e Belieber", "timestamp": "Thu Dec 06 16:53:16 PST 2012", "topics": {{ }} } -{ "id": "nc1:102", "username": "jaysauce82", "location": "", "text": "Not voting for President Obama #BadDecision", "timestamp": "Thu Dec 06 16:53:16 PST 2012", "topics": {{ "#BadDecision" }} } -{ "id": "nc1:104", "username": "princeofsupras", "location": "", "text": "RT @01Direclieber: A filha do Michael Jackson e uma Belieber,a filha do Eminem e uma Belieber,as filhas de Obama sao Beliebers, e a filha do meu pai e Belieber", "timestamp": "Thu Dec 06 16:53:15 PST 2012", "topics": {{ }} } -{ "id": "nc1:106", "username": "GulfDogs", "location": "", "text": "Obama Admin Knew Libyan Terrorists Had US-Provided Weaponsteaparty #tcot #ccot #NewGuards #BreitbartArmy #patriotwttp://t.co/vJxzrQUE", "timestamp": "Thu Dec 06 16:53:14 PST 2012", "topics": {{ "#tcot", "#ccot", "#NewGuards", "#BreitbartArmy", "#patriotwttp://t.co/vJxzrQUE" }} } -{ "id": "nc1:108", "username": "Laugzpz", "location": "", "text": "@AlfredoJalife Maestro Obama se hace de la vista gorda, es un acuerdo de siempre creo yo.", "timestamp": "Thu Dec 06 16:53:14 PST 2012", "topics": {{ }} } -{ "id": "nc1:11", "username": "magarika", "location": "", "text": "RT @ken24xavier: Obama tells SOROS - our plan is ALMOST finished http://t.co/WvzK0GtU", "timestamp": "Thu Dec 06 16:53:05 PST 2012", "topics": {{ }} } -{ "id": "nc1:111", "username": "ToucanMall", "location": "", "text": "RT @WorldWar3Watch: Michelle Obama Gets More Grammy Nominations Than Justin ... #Obama #WW3 http://t.co/0Wv2GKij", "timestamp": "Thu Dec 06 16:53:13 PST 2012", "topics": {{ "#Obama", "#WW3" }} } -{ "id": "nc1:113", "username": "ToucanMall", "location": "", "text": "RT @ObamaPalooza: Tiffany Shared What $2,000 Meant to Her ... and the President Stopped by to Talk About It http://t.co/sgT7lsNV #Obama", "timestamp": "Thu Dec 06 16:53:12 PST 2012", "topics": {{ "#Obama" }} } -{ "id": "nc1:115", "username": "thewildpitch", "location": "", "text": "RT @RevkahJC: Dennis Miller: Obama Should Just Say He Wants To Tax Successful People http://t.co/Ihlemy9Y", "timestamp": "Thu Dec 06 16:53:11 PST 2012", "topics": {{ }} } -{ "id": "nc1:117", "username": "Rnugent24", "location": "", "text": "RT @ConservativeQuo: unemployment is above 8% again. I wonder how long it will take for Obama to start blaming Bush? 3-2-1 #tcot #antiobama", "timestamp": "Thu Dec 06 16:53:10 PST 2012", "topics": {{ "#tcot", "#antiobama" }} } -{ "id": "nc1:119", "username": "ToucanMall", "location": "", "text": "RT @Newitrsdotcom: I hope #Obama will win re-election... Other four years without meaningless #wars", "timestamp": "Thu Dec 06 16:53:09 PST 2012", "topics": {{ "#Obama", "#wars" }} } +{ "id": 21, "tweetid": 69902639026020352, "loc": point("34.5,-100.5"), "time": datetime("2011-05-15T16:11:02.000Z"), "text": "thats that smokers cough maam <<<<<--- @x_incredibleL :: Allergies. i got that "cough" lol", "mentionedUsers": [ "@x_incredibleL" ] } +{ "id": 22, "tweetid": 69988755800465408, "loc": point("34.5,-97.5"), "time": datetime("2011-05-15T21:53:14.000Z"), "text": "Allergies fuckin over me..#damn", "mentionedUsers": [ ] } +{ "id": 23, "tweetid": 69940039605432320, "loc": point("34.5,-97.5"), "time": datetime("2011-05-15T18:39:39.000Z"), "text": "Natural Asthma Remedy - Deal With Your Asthma in a Natural Way.. Allergies", "mentionedUsers": [ ] } +{ "id": 24, "tweetid": 69834276929159169, "loc": point("25.5,-100.5"), "time": datetime("2011-05-15T11:39:23.000Z"), "text": "Damn Allergies... sneezing like crazy! >_<", "mentionedUsers": [ ] } +{ "id": 25, "tweetid": 69950146787553281, "loc": point("25.5,-97.5"), "time": datetime("2011-05-15T19:19:49.000Z"), "text": "pass me an asthma pump", "mentionedUsers": [ ] } +{ "id": 26, "tweetid": 69754524767756289, "loc": point("25.5,-97.5"), "time": datetime("2011-05-15T06:22:29.000Z"), "text": "Never knew allergies could actually keep me from sleeping", "mentionedUsers": [ ] } +{ "id": 27, "tweetid": 69999864498487297, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T22:37:22.000Z"), "text": "@ItsCrystal320 gooodd mommy! Except my allergies have been acting up :( and Im having issues with you know who. Smh nothing new. Lol", "mentionedUsers": [ "@ItsCrystal320" ] } +{ "id": 28, "tweetid": 69996796616777728, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T22:25:11.000Z"), "text": "My allergies act up so much while Im in this house!!! Idk why! Sneezing, now my eye is swollen!! Smh.", "mentionedUsers": [ ] } +{ "id": 29, "tweetid": 69977295351316480, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T21:07:41.000Z"), "text": "@GOLDenNote6 lmmmaaaoooo!!!! nnnnooo! ur the one that needs the asthma pump!", "mentionedUsers": [ "@GOLDenNote6" ] } +{ "id": 30, "tweetid": 69972022586912768, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T20:46:44.000Z"), "text": "@TinaLee90 hell yeah ! He snapped cause she got allergies and heavy she be snorting and coughing while he trying to study", "mentionedUsers": [ "@TinaLee90" ] } +{ "id": 31, "tweetid": 69965044678524928, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T20:19:01.000Z"), "text": "Back home and my ears begin to itch!!! Omg allergies go away please! #thingsicanlivewithout", "mentionedUsers": [ ] } +{ "id": 32, "tweetid": 69961997680246784, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T20:06:54.000Z"), "text": "@BravoAndy allergies acting up again or you just digging the glasses? Haha u rock it though!", "mentionedUsers": [ "@BravoAndy" ] } +{ "id": 33, "tweetid": 69946356248215552, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T19:04:45.000Z"), "text": "My allergies act up at the worst times -_-", "mentionedUsers": [ ] } +{ "id": 34, "tweetid": 69929466691993600, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T17:57:38.000Z"), "text": "Hate being sick!!! -_____- I hate you allergies! :/", "mentionedUsers": [ ] } +{ "id": 35, "tweetid": 69928014615556096, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T17:51:52.000Z"), "text": "Allergies please go away :(", "mentionedUsers": [ ] } +{ "id": 36, "tweetid": 69916338092654592, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T17:05:28.000Z"), "text": "I feel tired....i got asthma :( but it was still an awesome birthday", "mentionedUsers": [ ] } +{ "id": 37, "tweetid": 69911241975529474, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T16:45:13.000Z"), "text": "Cant stand that asthma commercial with the gold fish -__-", "mentionedUsers": [ ] } +{ "id": 38, "tweetid": 69910467233062912, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T16:42:08.000Z"), "text": "@PapisFavWave whats wrong? Got a cold? Asthma ?", "mentionedUsers": [ "@PapisFavWave" ] } +{ "id": 39, "tweetid": 69908652202536961, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T16:34:56.000Z"), "text": "My allergies are killing me!", "mentionedUsers": [ ] } +{ "id": 40, "tweetid": 69897794273546240, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T15:51:47.000Z"), "text": "and allergies", "mentionedUsers": [ ] } +{ "id": 41, "tweetid": 69893733449080832, "loc": point("25.5,-80.5"), "time": datetime("2011-05-15T15:35:39.000Z"), "text": "Repeated splashing of water about the skin, specifically following an exposure to pollution and dirt, makes sure... http://bit.ly/mnWnJo", "mentionedUsers": [ ] } diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml index 8ddc1d8..0cc43e4 100644 --- a/asterixdb/asterix-doc/pom.xml +++ b/asterixdb/asterix-doc/pom.xml @@ -69,6 +69,12 @@ <concat destfile="${project.build.directory}/generated-site/markdown/aws.md"> <filelist dir="${project.basedir}/src/main/installation/" files="aws_title.md,aws.md" /> </concat> + <concat destfile="${project.build.directory}/generated-site/markdown/feeds.md"> + <filelist dir="${project.basedir}/src/main/data_ingestion/" files="feeds_title.md,feeds.md" /> + </concat> + <concat destfile="${project.build.directory}/generated-site/markdown/udf.md"> + <filelist dir="${project.basedir}/src/main/user-defined_function/" files="udf_title.md,udf.md" /> + </concat> </target> </configuration> <goals> diff --git a/asterixdb/asterix-doc/src/site/markdown/feeds/tutorial.md b/asterixdb/asterix-doc/src/main/data_ingestion/feeds.md similarity index 96% rename from asterixdb/asterix-doc/src/site/markdown/feeds/tutorial.md rename to asterixdb/asterix-doc/src/main/data_ingestion/feeds.md index f5635b8..0dd6789 100644 --- a/asterixdb/asterix-doc/src/site/markdown/feeds/tutorial.md +++ b/asterixdb/asterix-doc/src/main/data_ingestion/feeds.md @@ -17,14 +17,6 @@ ! under the License. !--> -# Support for Data Ingestion in AsterixDB # - -## <a id="#toc">Table of Contents</a> ## - -* [Introduction](#Introduction) -* [Feed Adapters](#FeedAdapters) -* [Feed Policies](#FeedPolicies) - ## <a name="Introduction">Introduction</a> ## In this document, we describe the support for data ingestion in @@ -101,7 +93,12 @@ The "push_twitter" adapter takes as configuration the above mentioned parameters. End users are required to obtain the above authentication credentials prior to using the "push_twitter" adapter. For further information on obtaining OAuth keys and tokens and -registering an application with Twitter, please visit http://apps.twitter.com +registering an application with Twitter, please visit http://apps.twitter.com. + +Note that AsterixDB uses the Twitter4J API for getting data from Twitter. Due to a license conflict, +Apache AsterixDB cannot ship the Twitter4J library. To use the Twitter adapter in AsterixDB, +please download the necessary dependencies (`twitter4j-core-4.0.x.jar` and `twitter4j-stream-4.0.x.jar`) and drop +them into the `repo/` directory before AsterixDB starts. Given below is an example SQL++ statement that creates a feed called "TwitterFeed" by using the "push_twitter" adapter. diff --git a/asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md b/asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md new file mode 100644 index 0000000..1b7293d --- /dev/null +++ b/asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md @@ -0,0 +1,25 @@ +<!-- + ! Licensed to the Apache Software Foundation (ASF) under one + ! or more contributor license agreements. See the NOTICE file + ! distributed with this work for additional information + ! regarding copyright ownership. The ASF licenses this file + ! to you under the Apache License, Version 2.0 (the + ! "License"); you may not use this file except in compliance + ! with the License. You may obtain a copy of the License at + ! + ! http://www.apache.org/licenses/LICENSE-2.0 + ! + ! Unless required by applicable law or agreed to in writing, + ! software distributed under the License is distributed on an + ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ! KIND, either express or implied. See the License for the + ! specific language governing permissions and limitations + ! under the License. + !--> + +# Data Ingestion with Feeds # + +## <a id="#toc">Table of Contents</a> ## +* [Introduction](#Introduction) +* [Feed Adapters](#FeedAdapters) +* [Feed Policies](#FeedPolicies) \ No newline at end of file diff --git a/asterixdb/asterix-doc/src/main/installation/ansible_title.md b/asterixdb/asterix-doc/src/main/installation/ansible_title.md index 307580a..d72801f 100644 --- a/asterixdb/asterix-doc/src/main/installation/ansible_title.md +++ b/asterixdb/asterix-doc/src/main/installation/ansible_title.md @@ -16,7 +16,9 @@ ! specific language governing permissions and limitations ! under the License. !--> +# Installation using Ansible # +## <a id="#toc">Table of Contents</a> ## * [Introduction](#Introduction) * [Prerequisites](#Prerequisites) * [Cluster Configuration](#config) diff --git a/asterixdb/asterix-doc/src/main/installation/aws_title.md b/asterixdb/asterix-doc/src/main/installation/aws_title.md index abf01c9..9af36a9 100644 --- a/asterixdb/asterix-doc/src/main/installation/aws_title.md +++ b/asterixdb/asterix-doc/src/main/installation/aws_title.md @@ -16,7 +16,9 @@ ! specific language governing permissions and limitations ! under the License. !--> +# Installation using Amazon Web Services # +## <a id="#toc">Table of Contents</a> ## * [Introduction](#Introduction) * [Prerequisites](#Prerequisites) * [Cluster Configuration](#config) diff --git a/asterixdb/asterix-doc/src/main/user-defined_function/udf.md b/asterixdb/asterix-doc/src/main/user-defined_function/udf.md new file mode 100644 index 0000000..2431448 --- /dev/null +++ b/asterixdb/asterix-doc/src/main/user-defined_function/udf.md @@ -0,0 +1,147 @@ +<!-- + ! Licensed to the Apache Software Foundation (ASF) under one + ! or more contributor license agreements. See the NOTICE file + ! distributed with this work for additional information + ! regarding copyright ownership. The ASF licenses this file + ! to you under the Apache License, Version 2.0 (the + ! "License"); you may not use this file except in compliance + ! with the License. You may obtain a copy of the License at + ! + ! http://www.apache.org/licenses/LICENSE-2.0 + ! + ! Unless required by applicable law or agreed to in writing, + ! software distributed under the License is distributed on an + ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ! KIND, either express or implied. See the License for the + ! specific language governing permissions and limitations + ! under the License. + !--> + +## <a name="introduction">Introduction</a>## + +Apache AsterixDB supports two languages for writing user-defined functions (UDFs): SQL++ and Java. +A user can encapsulate data processing logic into a UDF and invoke it +later repeatedly. For SQL++ functions, a user can refer to [SQL++ Functions](sqlpp/manual.html#Functions) +for their usages. In this document, we +focus on how to install/invoke/uninstall a Java function library using the Ansible script that we provide. + + +## <a name="installingUDF">Installing an UDF Library</a>## + +UDFs have to be installed offline. +This section describes the process assuming that you have followed the preceding [ansible installation instructions](ansible.html) +to deploy an AsterixDB instance on your local machine or cluster. Here are the +instructions to install an UDF library: + +- Step 1: Stop the AsterixDB instance if it is ACTIVE. + + $ bin/stop.sh + +- Step 2: Deploy the UDF package. + + $ bin/udf.sh -m i -d DATAVERSE_NAME -l LIBRARY_NAME -p UDF_PACKAGE_PATH + +- Step 3: Start AsterixDB + + $ bin/start.sh + +After AsterixDB starts, you can use the following query to check whether your UDFs have been sucessfully registered with the system. + + SELECT * FROM Metadata.`Function`; + +In the AsterixDB source release, we provide several sample UDFs that you can try out. +You need to build the AsterixDB source to get the compiled UDF package. It can be found under +the `asterixdb-external` sub-project. Assuming that these UDFs have been installed into the `udfs` dataverse and `testlib` library, +here is an example that uses the sample UDF `mysum` to compute the sum of two input integers. + + use udfs; + + testlib#mysum(3,4); + +## <a id="UDFOnFeeds">Attaching a UDF on Data Feeds</a> ## + +In [Data Ingestion using feeds](feeds.html), we introduced an efficient way for users to get data into AsterixDB. In +some use cases, users may want to pre-process the incoming data before storing it into the dataset. To meet this need, +AsterixDB allows +the user to attach a UDF onto the ingestion pipeline. Following the example in [Data Ingestion](feeds.html), here we +show an example of how to attach a UDF that extracts the user names mentioned from the incoming Tweet text, storing the +processed Tweets into a dataset. + +We start by creating the datatype and dataset that will be used for the feed and UDF. One thing to keep in mind is that +data flows from the feed to the UDF and then to the dataset. This means that the feed's datatype +should be the same as the input type of the UDF, and the output datatype of the UDF should be the same as the dataset's +datatype. Thus, users should make sure that their datatypes are consistent in the UDF configuration. Users can also +take advantage of open datatypes in AsterixDB by creating a minimum description of the data for simplicity. +Here we use open datatypes: + + use udfs; + + create type TweetType if not exists as open { + id: int64 + }; + + create dataset ProcessedTweets(TweetType) primary key id; + +As the `TweetType` is an open datatype, processed Tweets can be stored into the dataset after they are annotated +with an extra attribute. Given the datatype and dataset above, we can create a Twitter Feed with the same datatype. +Please refer to section [Data Ingestion](feeds.html) if you have any trouble in creating feeds. + + use udfs; + + create feed TwitterFeed with { + "adapter-name": "push_twitter", + "type-name": "TweetType", + "format": "twitter-status", + "consumer.key": "************", + "consumer.secret": "************", + "access.token": "**********", + "access.token.secret": "*************" + }; + +After creating the feed, we attach the UDF onto the feed pipeline and start the feed with following statements: + + use udfs; + + connect feed TwitterFeed to dataset ProcessedTweets apply function udfs#addMentionedUsers; + + start feed TwitterFeed; + +You can check the annotated Tweets by querying the `ProcessedTweets` dataset: + + SELECT * FROM ProcessedTweets LIMIT 10; + +## <a name="udfConfiguration">A quick look of the UDF configuration</a>## + +AsterixDB uses an XML configuration file to describe the UDFs. A user can use it to define and reuse their compiled UDFs +for different purposes. Here is a snippet of the configuration used in our [previous example](#UDFOnFeeds): + + <libraryFunction> + <name>addMentionedUsers</name> + <function_type>SCALAR</function_type> + <argument_type>TweetType</argument_type> + <return_type>TweetType</return_type> + <definition>org.apache.asterix.external.library.AddMentionedUsersFactory</definition> + <parameters>text</parameters> + </libraryFunction> + +Here are the explanations of the fields in the configuration file: + + name: The proper name that is used for invoke the function. + function_type: The type of the function. + argument_type: The datatype of the arguments passed in. If there is more than one parameter, separate them with comma(s), e.g., `AINT32,AINT32`. + return_type: The datatype of the returning value. + definition: A reference to the function factory. + parameters: The parameters passed into the function. + +In our feeds example, we passed in `"text"` as a parameter to the function so it knows which field to look at to get the Tweet text. +If the Twitter API were to change its field names in the future, we can accommodate that change by simply modifying the configuration file +instead of recompiling the whole UDF package. This feature can be further utilized in use cases where a user has a Machine Learning +algorithm with different trained model files. If you are interested, You can find more examples [here](https://github.com/apache/asterixdb/tree/master/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library) + +## <a name="uninstall">Unstalling an UDF Library</a>## + +If you want to uninstall the UDF library, put AsterixDB into `INACTVIVE` mode and run following command: + + $ bin/udf.sh -m u -d DATAVERSE_NAME -l LIBRARY_NAME + + diff --git a/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md b/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md new file mode 100644 index 0000000..659c13b --- /dev/null +++ b/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md @@ -0,0 +1,27 @@ +<!-- + ! Licensed to the Apache Software Foundation (ASF) under one + ! or more contributor license agreements. See the NOTICE file + ! distributed with this work for additional information + ! regarding copyright ownership. The ASF licenses this file + ! to you under the Apache License, Version 2.0 (the + ! "License"); you may not use this file except in compliance + ! with the License. You may obtain a copy of the License at + ! + ! http://www.apache.org/licenses/LICENSE-2.0 + ! + ! Unless required by applicable law or agreed to in writing, + ! software distributed under the License is distributed on an + ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ! KIND, either express or implied. See the License for the + ! specific language governing permissions and limitations + ! under the License. + !--> + +# User-defined Functions # + +## <a id="#toc">Table of Contents</a> ## +* [Introduction](#introduction) +* [Installing an UDF Library](#installingUDF) +* [Attaching an UDF on Data Feeds](#UDFOnFeeds) +* [A quick look of the UDF configuration](#udfConfiguration) +* [Unstalling an UDF Library](#uninstall) \ No newline at end of file diff --git a/asterixdb/asterix-doc/src/site/markdown/ncservice.md b/asterixdb/asterix-doc/src/site/markdown/ncservice.md index 2b309ce..ef2ac9b 100644 --- a/asterixdb/asterix-doc/src/site/markdown/ncservice.md +++ b/asterixdb/asterix-doc/src/site/markdown/ncservice.md @@ -17,6 +17,8 @@ ! under the License. !--> +# Installation using NCService # + ## <a id="toc">Table of Contents</a> ## * [Quick Start](#quickstart) diff --git a/asterixdb/asterix-doc/src/site/markdown/udf.md b/asterixdb/asterix-doc/src/site/markdown/udf.md deleted file mode 100644 index b2ef2bc..0000000 --- a/asterixdb/asterix-doc/src/site/markdown/udf.md +++ /dev/null @@ -1,189 +0,0 @@ -<!-- - ! Licensed to the Apache Software Foundation (ASF) under one - ! or more contributor license agreements. See the NOTICE file - ! distributed with this work for additional information - ! regarding copyright ownership. The ASF licenses this file - ! to you under the Apache License, Version 2.0 (the - ! "License"); you may not use this file except in compliance - ! with the License. You may obtain a copy of the License at - ! - ! http://www.apache.org/licenses/LICENSE-2.0 - ! - ! Unless required by applicable law or agreed to in writing, - ! software distributed under the License is distributed on an - ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - ! KIND, either express or implied. See the License for the - ! specific language governing permissions and limitations - ! under the License. - !--> - -# Support for User Defined Functions in AsterixDB # - -## <a id="#toc">Table of Contents</a> ## -* [Using UDF to preprocess feed-collected data](#PreprocessingCollectedData) -* [Writing an External UDF](#WritingAnExternalUDF) -* [Creating an AsterixDB Library](#CreatingAnAsterixDBLibrary) -* [Installing an AsterixDB Library](#installingUDF) - -In this document, we describe the support for implementing, using, and installing user-defined functions (UDF) in -AsterixDB. We will explain how we can use UDFs to preprocess, e.g., data collected using feeds (see the [feeds tutorial](feeds/tutorial.html)). - - -### <a name="installingUDF">Installing an AsterixDB Library</a>### - -We assume you have followed the [installation instructions](../install.html) to set up a running AsterixDB instance. Let us refer your AsterixDB instance by the name "my_asterix". - -- Step 1: Stop the AsterixDB instance if it is in the ACTIVE state. - - $ managix stop -n my_asterix - -- Step 2: Install the library using Managix install command. Just to illustrate, we use the help command to look up the syntax - - $ managix help -cmd install - Installs a library to an asterix instance. - Options - n Name of Asterix Instance - d Name of the dataverse under which the library will be installed - l Name of the library - p Path to library zip bundle - -Above is a sample output and explains the usage and the required parameters. Each library has a name and is installed under a dataverse. Recall that we had created a dataverse by the name - "feeds" prior to creating our datatypes and dataset. We shall name our library - "testlib". - -We assume you have a library zip bundle that needs to be installed. -To install the library, use the Managix install command. An example is shown below. - - $ managix install -n my_asterix -d feeds -l testlib -p extlibs/asterix-external-data-0.8.7-binary-assembly.zip - -You should see the following message: - - INFO: Installed library testlib - -We shall next start our AsterixDB instance using the start command as shown below. - - $ managix start -n my_asterix - -You may now use the AsterixDB library in AQL statements and queries. To look at the installed artifacts, you may execute the following query at the AsterixDB web-console. - - for $x in dataset Metadata.Function - return $x - - for $x in dataset Metadata.Library - return $x - -Our library is now installed and is ready to be used. - - -## <a id="PreprocessingCollectedData">Preprocessing Collected Data</a> ### - -In the following we assume that you already created the `TwitterFeed` and its corresponding data types and dataset following the instruction explained in the [feeds tutorial](feeds/tutorial.html). - -A feed definition may optionally include the specification of a -user-defined function that is to be applied to each feed object prior -to persistence. Examples of pre-processing might include adding -attributes, filtering out objects, sampling, sentiment analysis, feature -extraction, etc. We can express a UDF, which can be defined in AQL or in a programming -language such as Java, to perform such pre-processing. An AQL UDF is a good fit when -pre-processing a object requires the result of a query (join or aggregate) -over data contained in AsterixDB datasets. More sophisticated -processing such as sentiment analysis of text is better handled -by providing a Java UDF. A Java UDF has an initialization phase -that allows the UDF to access any resources it may need to initialize -itself prior to being used in a data flow. It is assumed by the -AsterixDB compiler to be stateless and thus usable as an embarrassingly -parallel black box. In contrast, the AsterixDB compiler can -reason about an AQL UDF and involve the use of indexes during -its invocation. - -We consider an example transformation of a raw tweet into its -lightweight version called `ProcessedTweet`, which is defined next. - - use dataverse feeds; - - create type ProcessedTweet if not exists as open { - id: string, - user_name:string, - location:point, - created_at:string, - message_text:string, - country: string, - topics: {{string}} - }; - - create dataset ProcessedTweets(ProcessedTweet) - primary key id; - -The processing required in transforming a collected tweet to its lighter version of type `ProcessedTweet` involves extracting the topics or hash-tags (if any) in a tweet -and collecting them in the referred "topics" attribute for the tweet. -Additionally, the latitude and longitude values (doubles) are combined into the spatial point type. Note that spatial data types are considered as first-class citizens that come with the support for creating indexes. Next we show a revised version of our example TwitterFeed that involves the use of a UDF. We assume that the UDF that contains the transformation logic into a "ProcessedTweet" is available as a Java UDF inside an AsterixDB library named 'testlib'. We defer the writing of a Java UDF and its installation as part of an AsterixDB library to a later section of this document. - - use dataverse feeds; - - create feed ProcessedTwitterFeed if not exists - using "push_twitter" - (("type-name"="Tweet"), - ("consumer.key"="************"), - ("consumer.secret"="**************"), - ("access.token"="**********"), - ("access.token.secret"="*************")) - - apply function testlib#addHashTagsInPlace; - -Note that a feed adaptor and a UDF act as pluggable components. These -contribute towards providing a generic "plug-and-play" model where -custom implementations can be provided to cater to specific requirements. - -####Building a Cascade Network of Feeds#### -Multiple high-level applications may wish to consume the data -ingested from a data feed. Each such application might perceive the -feed in a different way and require the arriving data to be processed -and/or persisted differently. Building a separate flow of data from -the external source for each application is wasteful of resources as -the pre-processing or transformations required by each application -might overlap and could be done together in an incremental fashion -to avoid redundancy. A single flow of data from the external source -could provide data for multiple applications. To achieve this, we -introduce the notion of primary and secondary feeds in AsterixDB. - -A feed in AsterixDB is considered to be a primary feed if it gets -its data from an external data source. The objects contained in a -feed (subsequent to any pre-processing) are directed to a designated -AsterixDB dataset. Alternatively or additionally, these objects can -be used to derive other feeds known as secondary feeds. A secondary -feed is similar to its parent feed in every other aspect; it can -have an associated UDF to allow for any subsequent processing, -can be persisted into a dataset, and/or can be made to derive other -secondary feeds to form a cascade network. A primary feed and a -dependent secondary feed form a hierarchy. As an example, we next show an -example AQL statement that redefines the previous feed -"ProcessedTwitterFeed" in terms of their -respective parent feed (TwitterFeed). - - use dataverse feeds; - - drop feed ProcessedTwitterFeed if exists; - - create secondary feed ProcessedTwitterFeed from feed TwitterFeed - apply function testlib#addHashTags; - - connect feed ProcessedTwitterFeed to dataset ProcessedTweets; - -The `addHashTags` function is already provided in the example UDF.To see what objects -are being inserted into the dataset, we can perform a simple dataset scan after -allowing a few moments for the feed to start ingesting data: - - use dataverse feeds; - - for $i in dataset ProcessedTweets limit 10 return $i; - -For an example of how to write a Java UDF from scratch, the source for the example -UDF that has been used in this tutorial is available [here] (https://github.com/apache/asterixdb/tree/master/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library) - -## <a name="installingUDF">Unstalling an AsterixDB Library</a>### - -To uninstall a library, use the Managix uninstall command as follows: - - $ managix stop -n my_asterix - - $ managix uninstall -n my_asterix -d feeds -l testlib - - diff --git a/asterixdb/asterix-doc/src/site/site.xml b/asterixdb/asterix-doc/src/site/site.xml index 1167c37..6db028e 100644 --- a/asterixdb/asterix-doc/src/site/site.xml +++ b/asterixdb/asterix-doc/src/site/site.xml @@ -90,7 +90,7 @@ <menu name="Advanced Features"> <item name="Accessing External Data" href="aql/externaldata.html"/> - <item name="Support for Data Ingestion" href="feeds/tutorial.html"/> + <item name="Data Ingestion with Feeds" href="feeds.html"/> <item name="User Defined Functions" href="udf.html"/> <item name="Filter-Based LSM Index Acceleration" href="sqlpp/filters.html"/> <item name="Support of Full-text Queries" href="sqlpp/fulltext.html"/> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java index d915559..9381d09 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java @@ -146,22 +146,4 @@ private Tweet_User() { } } - - /* - The following assumes this DDL (but ignoring the field name orders): - create type ProcessedTweet if not exists as open { - id: string, - user_name:string, - location:point, - created_at:string, - message_text:string, - country: string, - topics: [string] - }; - */ - public static final class ProcessedTweet { - public static final String USER_NAME = "user_name"; - public static final String LOCATION = "location"; - public static final String TOPICS = "topics"; - } } diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java deleted file mode 100644 index 1b5fecd..0000000 --- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.asterix.external.library; - -import org.apache.asterix.external.library.java.JBuiltinType; -import org.apache.asterix.external.library.java.base.JDouble; -import org.apache.asterix.external.library.java.base.JPoint; -import org.apache.asterix.external.library.java.base.JRecord; -import org.apache.asterix.external.library.java.base.JString; -import org.apache.asterix.external.library.java.base.JUnorderedList; -import org.apache.asterix.external.api.IExternalScalarFunction; -import org.apache.asterix.external.api.IFunctionHelper; -import org.apache.asterix.external.library.java.JTypeTag; -import org.apache.asterix.external.util.Datatypes; - -public class AddHashTagsFunction implements IExternalScalarFunction { - - private JUnorderedList list = null; - private JPoint location = null; - - @Override - public void initialize(IFunctionHelper functionHelper) { - list = new JUnorderedList(JBuiltinType.JSTRING); - location = new JPoint(0, 0); - } - - @Override - public void deinitialize() { - } - - @Override - public void evaluate(IFunctionHelper functionHelper) throws Exception { - list.clear(); - JRecord inputRecord = (JRecord) functionHelper.getArgument(0); - JString text = (JString) inputRecord.getValueByName(Datatypes.Tweet.MESSAGE); - JDouble latitude = (JDouble) inputRecord.getValueByName(Datatypes.Tweet.LATITUDE); - JDouble longitude = (JDouble) inputRecord.getValueByName(Datatypes.Tweet.LONGITUDE); - - if (latitude != null && longitude != null) { - location.setValue(latitude.getValue(), longitude.getValue()); - } else { - location.setValue(0, 0); - } - - String[] tokens = text.getValue().split(" "); - for (String tk : tokens) { - if (tk.startsWith("#")) { - JString newField = (JString) functionHelper.getObject(JTypeTag.STRING); - newField.setValue(tk); - list.add(newField); - } - } - - JRecord outputRecord = (JRecord) functionHelper.getResultObject(); - outputRecord.setField(Datatypes.Tweet.ID, inputRecord.getValueByName(Datatypes.Tweet.ID)); - - JRecord userRecord = (JRecord) inputRecord.getValueByName(Datatypes.Tweet.USER); - outputRecord.setField(Datatypes.ProcessedTweet.USER_NAME, - userRecord.getValueByName(Datatypes.Tweet.SCREEN_NAME)); - - outputRecord.setField(Datatypes.ProcessedTweet.LOCATION, location); - outputRecord.setField(Datatypes.Tweet.CREATED_AT, inputRecord.getValueByName(Datatypes.Tweet.CREATED_AT)); - outputRecord.setField(Datatypes.Tweet.MESSAGE, text); - outputRecord.setField(Datatypes.ProcessedTweet.TOPICS, list); - - functionHelper.setResult(outputRecord); - } - -} diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java index 7873835..ecee876 100644 --- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java +++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java @@ -54,7 +54,7 @@ list.add(newField); } } - inputRecord.addField(Datatypes.ProcessedTweet.TOPICS, list); + inputRecord.addField("topics", list); functionHelper.setResult(inputRecord); } diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java similarity index 89% rename from asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java rename to asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java index db693a1..92e8ade 100644 --- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java +++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java @@ -21,11 +21,11 @@ import org.apache.asterix.external.api.IExternalScalarFunction; import org.apache.asterix.external.api.IFunctionFactory; -public class AddHashTagsFactory implements IFunctionFactory { +public class AddMentionedUsersFactory implements IFunctionFactory { @Override public IExternalScalarFunction getExternalFunction() { - return new AddHashTagsFunction(); + return new addMentionedUsersFunction(); } } diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java new file mode 100644 index 0000000..981aa2b --- /dev/null +++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java @@ -0,0 +1,63 @@ +/* 1 + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.library; + +import org.apache.asterix.external.library.java.JBuiltinType; +import org.apache.asterix.external.library.java.base.JRecord; +import org.apache.asterix.external.library.java.base.JString; +import org.apache.asterix.external.library.java.base.JUnorderedList; +import org.apache.asterix.external.api.IExternalScalarFunction; +import org.apache.asterix.external.api.IFunctionHelper; +import org.apache.asterix.external.library.java.JTypeTag; +import org.apache.asterix.external.util.Datatypes; + +public class addMentionedUsersFunction implements IExternalScalarFunction { + + private JUnorderedList list = null; + private String textFieldName; + + @Override + public void initialize(IFunctionHelper functionHelper) { + list = new JUnorderedList(JBuiltinType.JSTRING); + textFieldName = functionHelper.getParameters().get(0); + } + + @Override + public void deinitialize() { + } + + @Override + public void evaluate(IFunctionHelper functionHelper) throws Exception { + list.clear(); + JRecord inputRecord = (JRecord) functionHelper.getArgument(0); + JString text = (JString) inputRecord.getValueByName(textFieldName); + + String[] tokens = text.getValue().split(" "); + for (String tk : tokens) { + if (tk.startsWith("@")) { + JString newField = (JString) functionHelper.getObject(JTypeTag.STRING); + newField.setValue(tk); + list.add(newField); + } + } + inputRecord.addField("mentionedUsers", list); + functionHelper.setResult(inputRecord); + } + +} diff --git a/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml b/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml index 6b59041..de6a67f 100644 --- a/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml +++ b/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml @@ -42,16 +42,15 @@ <function_type>SCALAR</function_type> <argument_type>TweetInputType</argument_type> <return_type>TweetOutputType</return_type> - <definition>org.apache.asterix.external.library.ParseTweetFactory - </definition> + <definition>org.apache.asterix.external.library.ParseTweetFactory</definition> </libraryFunction> <libraryFunction> - <name>addHashTags</name> + <name>addMentionedUsers</name> <function_type>SCALAR</function_type> - <argument_type>Tweet</argument_type> - <return_type>ProcessedTweet</return_type> - <definition>org.apache.asterix.external.library.AddHashTagsFactory - </definition> + <argument_type>TweetType</argument_type> + <return_type>TweetType</return_type> + <definition>org.apache.asterix.external.library.AddMentionedUsersFactory</definition> + <parameters>text</parameters> </libraryFunction> <libraryFunction> <name>addHashTagsInPlace</name> diff --git a/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm b/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm index 20dc8c8..b1fa1d1 100644 --- a/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm +++ b/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm @@ -1,4 +1,4 @@ -{ "DataverseName": "externallibtest", "Name": "testlib#addHashTags", "Arity": "1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet", "Definition": "org.apache.asterix.external.library.AddHashTagsFactory", "Language": "JAVA", "Kind": "SCALAR" } +{ "DataverseName": "externallibtest", "Name": "testlib#addMentionedUsers", "Arity": "1", "Params": [ "TweetType" ], "ReturnType": "TweetType", "Definition": "org.apache.asterix.external.library.AddMentionedUsersFactory", "Language": "JAVA", "Kind": "SCALAR" } { "DataverseName": "externallibtest", "Name": "testlib#addHashTagsInPlace", "Arity": "1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet", "Definition": "org.apache.asterix.external.library.AddHashTagsInPlaceFactory", "Language": "JAVA", "Kind": "SCALAR" } { "DataverseName": "externallibtest", "Name": "testlib#allTypes", "Arity": "1", "Params": [ "AllType" ], "ReturnType": "AllType", "Definition": "org.apache.asterix.external.library.AllTypesFactory", "Language": "JAVA", "Kind": "SCALAR" } { "DataverseName": "externallibtest", "Name": "testlib#echoDelay", "Arity": "1", "Params": [ "TweetMessageType" ], "ReturnType": "TweetMessageType", "Definition": "org.apache.asterix.external.library.EchoDelayFactory", "Language": "JAVA", "Kind": "SCALAR" } -- To view, visit https://asterix-gerrit.ics.uci.edu/2804 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: I17b1b4d639ca38689298ce88145257e794eb90e1 Gerrit-PatchSet: 17 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Xikui Wang <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Ian Maxon <[email protected]> Gerrit-Reviewer: Ian2 Maxon <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Michael Carey <[email protected]> Gerrit-Reviewer: Taewoo Kim <[email protected]> Gerrit-Reviewer: Till Westmann <[email protected]> Gerrit-Reviewer: Wail Alkowaileet <[email protected]> Gerrit-Reviewer: Xikui Wang <[email protected]>
