Xikui Wang has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/2804
Change subject: [NO ISSUE] User-defined Function Documentation update
......................................................................
[NO ISSUE] User-defined Function Documentation update
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
1. Updated the UDF documentation to be consisten with current master.
2. Cleaned default UDF package to remove useless UDFs.
3. Added the example in documentation as a test case for IT.
4. Reorganized the documentation to keep up with the new structure.
5. Minor changes to other documentation pages to keep style consistent.
Change-Id: I17b1b4d639ca38689298ce88145257e794eb90e1
---
M asterixdb/asterix-app/src/test/resources/runtimets/only_sqlpp.xml
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm
M
asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm
M asterixdb/asterix-doc/pom.xml
R asterixdb/asterix-doc/src/main/data_ingestion/feeds.md
A asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md
M asterixdb/asterix-doc/src/main/installation/ansible_title.md
M asterixdb/asterix-doc/src/main/installation/aws_title.md
A asterixdb/asterix-doc/src/main/user-defined_function/udf.md
A asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md
M asterixdb/asterix-doc/src/site/markdown/ncservice.md
D asterixdb/asterix-doc/src/site/markdown/udf.md
M asterixdb/asterix-doc/src/site/site.xml
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java
D
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
D
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
R
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java
R
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java
M asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml
M
asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
27 files changed, 281 insertions(+), 405 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/04/2804/1
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/only_sqlpp.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/only_sqlpp.xml
index 334dd52..f20c532 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/only_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/only_sqlpp.xml
@@ -19,5 +19,15 @@
!-->
<test-suite xmlns="urn:xml.testframework.asterix.apache.org"
ResultOffsetPath="results" QueryOffsetPath="queries_sqlpp"
QueryFileExtension=".sqlpp">
<test-group name="failed">
+ <test-case FilePath="external-library">
+ <compilation-unit name="mysum">
+ <output-dir compare="Text">mysum</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="feeds">
+ <compilation-unit name="feed-with-external-function">
+ <output-dir compare="Text">feed-with-external-function</output-dir>
+ </compilation-unit>
+ </test-case>
</test-group>
</test-suite>
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp
index 9402e1f..a6a1cdc 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/mysum/mysum.3.query.sqlpp
@@ -18,5 +18,4 @@
*/
use externallibtest;
-let x=testlib#mysum(3,4)
-select VALUE x;
+testlib#mysum(3,4);
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp
index 4fdc669..3bc33de 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.1.ddl.sqlpp
@@ -22,34 +22,20 @@
* Date : 4th Oct 2017
*/
-drop dataverse externallibtest if exists;
-create dataverse externallibtest;
-use externallibtest;
+drop dataverse udfs if exists;
+create dataverse udfs;
+use udfs;
-create type TweetInputType as open {
- id: string,
- username : string,
- location : string,
- text : string,
- timestamp : string
-};
-
-create type TweetOutputType as open {
- id: string,
- username : string,
- location : string,
- text : string,
- timestamp : string,
- topics : {{string}}
+create type TweetType if not exists as open {
+ id: int64
};
create feed TweetFeed with
{
"adapter-name" : "localfs",
- "type-name" : "TweetInputType",
- "path" : "asterix_nc1://data/twitter/obamatweets.adm",
+ "type-name" : "TweetType",
+ "path" : "asterix_nc1://data/twitter/extrasmalltweets.txt",
"format" : "adm"
};
-create dataset TweetsFeedIngest(TweetOutputType)
-primary key id;
+create dataset ProcessedTweets(TweetType) primary key id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp
index d1e0e87..4f0c6d3 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.2.lib.sqlpp
@@ -16,4 +16,4 @@
* specific language governing permissions and limitations
* under the License.
*/
-install externallibtest testlib
target/data/externallib/asterix-external-data-testlib.zip
\ No newline at end of file
+install udfs testlib target/data/externallib/asterix-external-data-testlib.zip
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp
index 0d46387..1407514b 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.3.update.sqlpp
@@ -21,10 +21,8 @@
* Expected Res : Success
* Date : 4th Oct 2017
*/
-use externallibtest;
+use udfs;
-SET `compiler.parallelism` "5";
-
-connect feed TweetFeed to dataset TweetsFeedIngest apply function
`testlib#parseTweet`;
+connect feed TweetFeed to dataset ProcessedTweets apply function
testlib#addMentionedUsers;
start feed TweetFeed;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp
index 607e5bd..b95294a 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.5.pollquery.sqlpp
@@ -22,8 +22,8 @@
* Date : 4th Oct 2017
*/
// polltimeoutsecs=5
-use externallibtest;
+use udfs;
-select value t from TweetsFeedIngest t
+select value t from ProcessedTweets t
ORDER BY t.id;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp
index 86af80f..98c334d 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.6.lib.sqlpp
@@ -16,4 +16,4 @@
* specific language governing permissions and limitations
* under the License.
*/
-uninstall externallibtest testlib
\ No newline at end of file
+uninstall udfs testlib
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp
index 2a7acef..128c793 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/feeds/feed-with-external-function/feed-with-external-function.7.ddl.sqlpp
@@ -16,4 +16,4 @@
* specific language governing permissions and limitations
* under the License.
*/
-drop dataverse externallibtest if exists;
+drop dataverse udfs if exists;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm
index f0ad2b2..0417754 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/validate-default-library/validate-default-library.1.adm
@@ -1,5 +1,4 @@
-{ "Function": { "DataverseName": "externallibtest", "Name":
"testlib#addHashTags", "Arity": "1", "Params": [ "Tweet" ], "ReturnType":
"ProcessedTweet", "Definition":
"org.apache.asterix.external.library.AddHashTagsFactory", "Language": "JAVA",
"Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
-{ "Function": { "DataverseName": "externallibtest", "Name":
"testlib#addHashTagsInPlace", "Arity": "1", "Params": [ "Tweet" ],
"ReturnType": "ProcessedTweet", "Definition":
"org.apache.asterix.external.library.AddHashTagsInPlaceFactory", "Language":
"JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
+{ "Function": { "DataverseName": "externallibtest", "Name":
"testlib#addMentionedUsers", "Arity": "1", "Params": [ "TweetType" ],
"ReturnType": "TweetType", "Definition":
"org.apache.asterix.external.library.AddMentionedUsersFactory", "Language":
"JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
{ "Function": { "DataverseName": "externallibtest", "Name":
"testlib#allTypes", "Arity": "1", "Params": [ "AllType" ], "ReturnType":
"AllType", "Definition": "org.apache.asterix.external.library.AllTypesFactory",
"Language": "JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
{ "Function": { "DataverseName": "externallibtest", "Name":
"testlib#echoDelay", "Arity": "1", "Params": [ "TweetMessageType" ],
"ReturnType": "TweetMessageType", "Definition":
"org.apache.asterix.external.library.EchoDelayFactory", "Language": "JAVA",
"Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
{ "Function": { "DataverseName": "externallibtest", "Name":
"testlib#fnameDetector", "Arity": "1", "Params": [ "InputRecordType" ],
"ReturnType": "DetectResultType", "Definition":
"org.apache.asterix.external.library.KeywordsDetectorFactory", "Language":
"JAVA", "Kind": "SCALAR", "Dependencies": [ [ ], [ ] ] } }
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm
index 1291213..0f7eb82 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/feeds/feed-with-external-function/feed-with-external-function.1.adm
@@ -1,12 +1,21 @@
-{ "id": "nc1:1", "username": "BronsonMike", "location": "", "text":
"@GottaLaff @reutersus Christie and obama just foul weather friends",
"timestamp": "Thu Dec 06 16:53:06 PST 2012", "topics": {{ }} }
-{ "id": "nc1:100", "username": "KidrauhlProuds", "location": "", "text": "RT
@01Direclieber: A filha do Michael Jackson uma Belieber,a filha do Eminem e
uma Belieber,as filhas de Obama sao Beliebers, e a filha do meu pai e
Belieber", "timestamp": "Thu Dec 06 16:53:16 PST 2012", "topics": {{ }} }
-{ "id": "nc1:102", "username": "jaysauce82", "location": "", "text": "Not
voting for President Obama #BadDecision", "timestamp": "Thu Dec 06 16:53:16 PST
2012", "topics": {{ "#BadDecision" }} }
-{ "id": "nc1:104", "username": "princeofsupras", "location": "", "text": "RT
@01Direclieber: A filha do Michael Jackson e uma Belieber,a filha do Eminem e
uma Belieber,as filhas de Obama sao Beliebers, e a filha do meu pai e
Belieber", "timestamp": "Thu Dec 06 16:53:15 PST 2012", "topics": {{ }} }
-{ "id": "nc1:106", "username": "GulfDogs", "location": "", "text": "Obama
Admin Knew Libyan Terrorists Had US-Provided Weaponsteaparty #tcot #ccot
#NewGuards #BreitbartArmy #patriotwttp://t.co/vJxzrQUE", "timestamp": "Thu Dec
06 16:53:14 PST 2012", "topics": {{ "#tcot", "#ccot", "#NewGuards",
"#BreitbartArmy", "#patriotwttp://t.co/vJxzrQUE" }} }
-{ "id": "nc1:108", "username": "Laugzpz", "location": "", "text":
"@AlfredoJalife Maestro Obama se hace de la vista gorda, es un acuerdo de
siempre creo yo.", "timestamp": "Thu Dec 06 16:53:14 PST 2012", "topics": {{
}} }
-{ "id": "nc1:11", "username": "magarika", "location": "", "text": "RT
@ken24xavier: Obama tells SOROS - our plan is ALMOST finished
http://t.co/WvzK0GtU", "timestamp": "Thu Dec 06 16:53:05 PST 2012", "topics":
{{ }} }
-{ "id": "nc1:111", "username": "ToucanMall", "location": "", "text": "RT
@WorldWar3Watch: Michelle Obama Gets More Grammy Nominations Than Justin ...
#Obama #WW3 http://t.co/0Wv2GKij", "timestamp": "Thu Dec 06 16:53:13 PST 2012",
"topics": {{ "#Obama", "#WW3" }} }
-{ "id": "nc1:113", "username": "ToucanMall", "location": "", "text": "RT
@ObamaPalooza: Tiffany Shared What $2,000 Meant to Her ... and the President
Stopped by to Talk About It http://t.co/sgT7lsNV #Obama", "timestamp": "Thu Dec
06 16:53:12 PST 2012", "topics": {{ "#Obama" }} }
-{ "id": "nc1:115", "username": "thewildpitch", "location": "", "text": "RT
@RevkahJC: Dennis Miller: Obama Should Just Say He Wants To Tax Successful
People http://t.co/Ihlemy9Y", "timestamp": "Thu Dec 06 16:53:11 PST 2012",
"topics": {{ }} }
-{ "id": "nc1:117", "username": "Rnugent24", "location": "", "text": "RT
@ConservativeQuo: unemployment is above 8% again. I wonder how long it will
take for Obama to start blaming Bush? 3-2-1 #tcot #antiobama", "timestamp":
"Thu Dec 06 16:53:10 PST 2012", "topics": {{ "#tcot", "#antiobama" }} }
-{ "id": "nc1:119", "username": "ToucanMall", "location": "", "text": "RT
@Newitrsdotcom: I hope #Obama will win re-election... Other four years without
meaningless #wars", "timestamp": "Thu Dec 06 16:53:09 PST 2012", "topics": {{
"#Obama", "#wars" }} }
+{ "id": 21, "tweetid": 69902639026020352, "loc": point("34.5,-100.5"), "time":
datetime("2011-05-15T16:11:02.000Z"), "text": "thats that smokers cough maam
<<<<<--- @x_incredibleL :: Allergies. i got that
"cough" lol", "mentionedUsers": [ "@x_incredibleL" ] }
+{ "id": 22, "tweetid": 69988755800465408, "loc": point("34.5,-97.5"), "time":
datetime("2011-05-15T21:53:14.000Z"), "text": "Allergies fuckin over
me..#damn", "mentionedUsers": [ ] }
+{ "id": 23, "tweetid": 69940039605432320, "loc": point("34.5,-97.5"), "time":
datetime("2011-05-15T18:39:39.000Z"), "text": "Natural Asthma Remedy - Deal
With Your Asthma in a Natural Way.. Allergies", "mentionedUsers": [ ] }
+{ "id": 24, "tweetid": 69834276929159169, "loc": point("25.5,-100.5"), "time":
datetime("2011-05-15T11:39:23.000Z"), "text": "Damn Allergies... sneezing like
crazy! >_<", "mentionedUsers": [ ] }
+{ "id": 25, "tweetid": 69950146787553281, "loc": point("25.5,-97.5"), "time":
datetime("2011-05-15T19:19:49.000Z"), "text": "pass me an asthma pump",
"mentionedUsers": [ ] }
+{ "id": 26, "tweetid": 69754524767756289, "loc": point("25.5,-97.5"), "time":
datetime("2011-05-15T06:22:29.000Z"), "text": "Never knew allergies could
actually keep me from sleeping", "mentionedUsers": [ ] }
+{ "id": 27, "tweetid": 69999864498487297, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T22:37:22.000Z"), "text": "@ItsCrystal320 gooodd mommy!
Except my allergies have been acting up :( and Im having issues with you know
who. Smh nothing new. Lol", "mentionedUsers": [ "@ItsCrystal320" ] }
+{ "id": 28, "tweetid": 69996796616777728, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T22:25:11.000Z"), "text": "My allergies act up so much
while Im in this house!!! Idk why! Sneezing, now my eye is swollen!! Smh.",
"mentionedUsers": [ ] }
+{ "id": 29, "tweetid": 69977295351316480, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T21:07:41.000Z"), "text": "@GOLDenNote6 lmmmaaaoooo!!!!
nnnnooo! ur the one that needs the asthma pump!", "mentionedUsers": [
"@GOLDenNote6" ] }
+{ "id": 30, "tweetid": 69972022586912768, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T20:46:44.000Z"), "text": "@TinaLee90 hell yeah ! He
snapped cause she got allergies and heavy she be snorting and coughing while he
trying to study", "mentionedUsers": [ "@TinaLee90" ] }
+{ "id": 31, "tweetid": 69965044678524928, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T20:19:01.000Z"), "text": "Back home and my ears begin to
itch!!! Omg allergies go away please! #thingsicanlivewithout",
"mentionedUsers": [ ] }
+{ "id": 32, "tweetid": 69961997680246784, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T20:06:54.000Z"), "text": "@BravoAndy allergies acting up
again or you just digging the glasses? Haha u rock it though!",
"mentionedUsers": [ "@BravoAndy" ] }
+{ "id": 33, "tweetid": 69946356248215552, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T19:04:45.000Z"), "text": "My allergies act up at the worst
times -_-", "mentionedUsers": [ ] }
+{ "id": 34, "tweetid": 69929466691993600, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T17:57:38.000Z"), "text": "Hate being sick!!! -_____- I
hate you allergies! :/", "mentionedUsers": [ ] }
+{ "id": 35, "tweetid": 69928014615556096, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T17:51:52.000Z"), "text": "Allergies please go away :(",
"mentionedUsers": [ ] }
+{ "id": 36, "tweetid": 69916338092654592, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T17:05:28.000Z"), "text": "I feel tired....i got asthma :(
but it was still an awesome birthday", "mentionedUsers": [ ] }
+{ "id": 37, "tweetid": 69911241975529474, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T16:45:13.000Z"), "text": "Cant stand that asthma
commercial with the gold fish -__-", "mentionedUsers": [ ] }
+{ "id": 38, "tweetid": 69910467233062912, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T16:42:08.000Z"), "text": "@PapisFavWave whats wrong? Got a
cold? Asthma ?", "mentionedUsers": [ "@PapisFavWave" ] }
+{ "id": 39, "tweetid": 69908652202536961, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T16:34:56.000Z"), "text": "My allergies are killing me!",
"mentionedUsers": [ ] }
+{ "id": 40, "tweetid": 69897794273546240, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T15:51:47.000Z"), "text": "and allergies",
"mentionedUsers": [ ] }
+{ "id": 41, "tweetid": 69893733449080832, "loc": point("25.5,-80.5"), "time":
datetime("2011-05-15T15:35:39.000Z"), "text": "Repeated splashing of water
about the skin, specifically following an exposure to pollution and dirt, makes
sure... http://bit.ly/mnWnJo", "mentionedUsers": [ ] }
diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml
index 8ddc1d8..0cc43e4 100644
--- a/asterixdb/asterix-doc/pom.xml
+++ b/asterixdb/asterix-doc/pom.xml
@@ -69,6 +69,12 @@
<concat
destfile="${project.build.directory}/generated-site/markdown/aws.md">
<filelist dir="${project.basedir}/src/main/installation/"
files="aws_title.md,aws.md" />
</concat>
+ <concat
destfile="${project.build.directory}/generated-site/markdown/feeds.md">
+ <filelist
dir="${project.basedir}/src/main/data_ingestion/"
files="feeds_title.md,feeds.md" />
+ </concat>
+ <concat
destfile="${project.build.directory}/generated-site/markdown/udf.md">
+ <filelist
dir="${project.basedir}/src/main/user-defined_function/"
files="udf_title.md,udf.md" />
+ </concat>
</target>
</configuration>
<goals>
diff --git a/asterixdb/asterix-doc/src/site/markdown/feeds/tutorial.md
b/asterixdb/asterix-doc/src/main/data_ingestion/feeds.md
similarity index 96%
rename from asterixdb/asterix-doc/src/site/markdown/feeds/tutorial.md
rename to asterixdb/asterix-doc/src/main/data_ingestion/feeds.md
index f5635b8..23d8b8e 100644
--- a/asterixdb/asterix-doc/src/site/markdown/feeds/tutorial.md
+++ b/asterixdb/asterix-doc/src/main/data_ingestion/feeds.md
@@ -17,14 +17,6 @@
! under the License.
!-->
-# Support for Data Ingestion in AsterixDB #
-
-## <a id="#toc">Table of Contents</a> ##
-
-* [Introduction](#Introduction)
-* [Feed Adapters](#FeedAdapters)
-* [Feed Policies](#FeedPolicies)
-
## <a name="Introduction">Introduction</a> ##
In this document, we describe the support for data ingestion in
@@ -101,7 +93,12 @@
The "push_twitter" adapter takes as configuration the above mentioned
parameters. End users are required to obtain the above authentication
credentials prior to
using the "push_twitter" adapter. For further information on obtaining OAuth
keys and tokens and
-registering an application with Twitter, please visit http://apps.twitter.com
+registering an application with Twitter, please visit http://apps.twitter.com.
+
+Note that AsterixDB uses the Twitter4J API for getting data from Twitter. Due
to a license conflict,
+Apache AsterixDB cannot ship with the Twitter4J library. For using the Twitter
adapter in AsterixDB properly,
+please download the necessary dependencies (`twitter4j-core-4.0.x.jar` and
`twitter4j-stream-4.0.x.jar`) and drop
+them into the `repo/` directory before AsterixDB starts.
Given below is an example SQL++ statement that creates a feed called
"TwitterFeed" by using the
"push_twitter" adapter.
diff --git a/asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md
b/asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md
new file mode 100644
index 0000000..1b7293d
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/data_ingestion/feeds_title.md
@@ -0,0 +1,25 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements. See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership. The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied. See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# Data Ingestion with Feeds #
+
+## <a id="#toc">Table of Contents</a> ##
+* [Introduction](#Introduction)
+* [Feed Adapters](#FeedAdapters)
+* [Feed Policies](#FeedPolicies)
\ No newline at end of file
diff --git a/asterixdb/asterix-doc/src/main/installation/ansible_title.md
b/asterixdb/asterix-doc/src/main/installation/ansible_title.md
index 307580a..d72801f 100644
--- a/asterixdb/asterix-doc/src/main/installation/ansible_title.md
+++ b/asterixdb/asterix-doc/src/main/installation/ansible_title.md
@@ -16,7 +16,9 @@
! specific language governing permissions and limitations
! under the License.
!-->
+# Installation using Ansible #
+## <a id="#toc">Table of Contents</a> ##
* [Introduction](#Introduction)
* [Prerequisites](#Prerequisites)
* [Cluster Configuration](#config)
diff --git a/asterixdb/asterix-doc/src/main/installation/aws_title.md
b/asterixdb/asterix-doc/src/main/installation/aws_title.md
index abf01c9..9af36a9 100644
--- a/asterixdb/asterix-doc/src/main/installation/aws_title.md
+++ b/asterixdb/asterix-doc/src/main/installation/aws_title.md
@@ -16,7 +16,9 @@
! specific language governing permissions and limitations
! under the License.
!-->
+# Installation using Amazon Web Services #
+## <a id="#toc">Table of Contents</a> ##
* [Introduction](#Introduction)
* [Prerequisites](#Prerequisites)
* [Cluster Configuration](#config)
diff --git a/asterixdb/asterix-doc/src/main/user-defined_function/udf.md
b/asterixdb/asterix-doc/src/main/user-defined_function/udf.md
new file mode 100644
index 0000000..d1f5348
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/user-defined_function/udf.md
@@ -0,0 +1,147 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements. See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership. The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied. See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+## <a name="introduction">Introduction</a>##
+
+Apache AsterixDB supports two types of user-defined functions (UDFs): SQLPP
function and Programming language based function.
+A user can encapsulate a certain data processing logic into a UDF and invoke it
+later repeatedly. For SQLPP function, a user can refer to [SQLPP
Functions](sqlpp/manual.html#Functions)
+for its usage. In this document, we
+mainly focus on how to install/use/uninstall a programming language function
library using the Ansible script that we provided.
+Currently, AsterixDB only supports Java based programming UDF. We are working
on supporting more languages. :)
+
+
+## <a name="installingUDF">Installing an UDF Library</a>##
+
+Before we proceed, we assume you have followed the [installation
instruction](ansible.html)
+to deploy an AsterixDB instance on your local machine or cluster. The UDFs
have to be installed offline. Here are the
+instructions to install an UDF library.
+
+- Step 1: Stop the AsterixDB instance if it is ACTIVE.
+
+ $ bin/stop.sh
+
+- Step 2: Deploy the UDF package.
+
+ $ bin/udf.sh -m i -d DATAVERSE_NAME -l LIBRARY_NAME -p UDF_PACKAGE_PATH
+
+- Step 3: Start the AsterixDB
+
+ $ bin/start.sh
+
+After AsterixDB starts, you can use the following query to check whether your
UDFs are sucessfully installed.
+
+ SELECT * FROM Metadata.`Function`;
+
+In the AsterixDB source release, we provided several sample UDFs for you to
try out. You need to build the AsterixDB source to get
+the compile UDF package. The package can be found under
+the `asterixdb-external` sub-project. Assuming we installed the UDFs into the
`udfs` dataverse and `testlib` library,
+here is an example to use `mysum` to compute the sum of two input integers.
+
+ use udfs;
+
+ testlib#mysum(3,4);
+
+## <a id="UDFOnFeeds">Attaching an UDF on Data Feeds</a> ##
+
+In [Data Ingestion using feeds](feeds.html), we introduce an efficient way for
user to get data into AsterixDB. In
+some use cases, users may want to pre-process the incoming data before store
them into the dataset. For such scenarios,
+AsterixDB allows
+the user to attach an UDF onto the ingestion pipeline. Following the example
in [Data Ingestion](feeds.html), here we
+show an example of attaching an UDF that extracts the user names mentioned
from the incoming Tweet text and storing the
+processed Tweets into a dataset.
+
+We start with creating the datatype and dataset that are needed for the feed
and UDF. One thing that worth noticing is
+the data flows from the feed to the UDF and then to the dataset. This means
the feed datatype
+has to be the same as the input type of the UDF, and the output datatype of
the UDF has to be the same as the dataset
+datatype. Thus, in your configuration, you have to make sure the datatypes are
consistent. If you don't want to deal with
+these complications, you can take advantage of the open types in AsterixDB by
creating a minimum description of your data.
+Here we are going to use the later option for simplicity.
+
+ use udfs;
+
+ create type TweetType if not exists as open {
+ id: int64
+ };
+
+ create dataset ProcessedTweets(TweetType) primary key id;
+
+As we created the dataset with an open datatype, the processed Tweets can be
stored into the dataset even when they have
+an extra field contains the extracted user names. Then We create a Twitter
Feed with the same datatype. If you have
+troubles in creating the feed, please refer to section [Data
Ingestion](feeds.html).
+
+ use udfs;
+
+ create feed TwitterFeed with {
+ "adapter-name": "push_twitter",
+ "type-name": "TweetType",
+ "format": "twitter-status",
+ "consumer.key": "************",
+ "consumer.secret": "************",
+ "access.token": "**********",
+ "access.token.secret": "*************"
+ };
+
+After creating the feed, we attach the UDF onto the feed pipeline and start
the feed with following statements.
+
+ use udfs;
+
+ connect feed TwitterFeed to dataset ProcessedTweets apply function
udfs#addMentionedUsers;
+
+ start feed TwitterFeed;
+
+You will be able to check the processed Tweet by querying the
`ProcessedTweets` dataset.
+
+ SELECT * FROM ProcessedTweets LIMIT 10;
+
+## <a name="udfConfiguration">A quick look of the UDF configuration</a>##
+
+AsterixDB uses an XML configuration file to describe the UDFs. A user can use
it to define and reuse the compiled UDFs
+for different purposes. Here is a snippet of the configuration used in our
[previous example](#UDFOnFeeds).
+
+ <libraryFunction>
+ <name>addMentionedUsers</name>
+ <function_type>SCALAR</function_type>
+ <argument_type>TweetType</argument_type>
+ <return_type>TweetType</return_type>
+
<definition>org.apache.asterix.external.library.AddMentionedUsersFactory</definition>
+ <parameters>text</parameters>
+ </libraryFunction>
+
+Here are the explanations of the fields in the configuration file:
+
+ name: The proper name that is used for invoke the function.
+ function_type: The type of the function.
+ argument_type: The datatype of the arguments passed in. If there is
more than one parameters, separate them with comma(s), e.g., `AINT32,AINT32`.
+ return_type: The datatype of the returning value.
+ definition: The reference of the function factory.
+ parameters: The parameters that passed into the function.
+
+In our feeds example, we passed in `"text"` as the parameter for the function
so it knows where to find the Tweet text.
+If the Twitter API changes the field names in the future, we can match the
changes by tweaking the configuration file only,
+instead of recompiling the whole UDF package. This feature can be further
utilized in use cases where the users have a Machine Learning
+algorithm with different model files. You can find more interesting use cases
in our
[codebase](https://github.com/apache/asterixdb/tree/master/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library)
+
+## <a name="uninstall">Unstalling an UDF Library</a>##
+
+If you want to uninstall the UDFs, put AsterixDB into `INACTVIVE` mode and run
following command.
+
+ $ bin/udf.sh -m u -d DATAVERSE_NAME -l LIBRARY_NAME
+
+
diff --git a/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md
b/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md
new file mode 100644
index 0000000..659c13b
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md
@@ -0,0 +1,27 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements. See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership. The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied. See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# User-defined Functions #
+
+## <a id="#toc">Table of Contents</a> ##
+* [Introduction](#introduction)
+* [Installing an UDF Library](#installingUDF)
+* [Attaching an UDF on Data Feeds](#UDFOnFeeds)
+* [A quick look of the UDF configuration](#udfConfiguration)
+* [Unstalling an UDF Library](#uninstall)
\ No newline at end of file
diff --git a/asterixdb/asterix-doc/src/site/markdown/ncservice.md
b/asterixdb/asterix-doc/src/site/markdown/ncservice.md
index 2b309ce..ef2ac9b 100644
--- a/asterixdb/asterix-doc/src/site/markdown/ncservice.md
+++ b/asterixdb/asterix-doc/src/site/markdown/ncservice.md
@@ -17,6 +17,8 @@
! under the License.
!-->
+# Installation using NCService #
+
## <a id="toc">Table of Contents</a> ##
* [Quick Start](#quickstart)
diff --git a/asterixdb/asterix-doc/src/site/markdown/udf.md
b/asterixdb/asterix-doc/src/site/markdown/udf.md
deleted file mode 100644
index b2ef2bc..0000000
--- a/asterixdb/asterix-doc/src/site/markdown/udf.md
+++ /dev/null
@@ -1,189 +0,0 @@
-<!--
- ! Licensed to the Apache Software Foundation (ASF) under one
- ! or more contributor license agreements. See the NOTICE file
- ! distributed with this work for additional information
- ! regarding copyright ownership. The ASF licenses this file
- ! to you under the Apache License, Version 2.0 (the
- ! "License"); you may not use this file except in compliance
- ! with the License. You may obtain a copy of the License at
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing,
- ! software distributed under the License is distributed on an
- ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- ! KIND, either express or implied. See the License for the
- ! specific language governing permissions and limitations
- ! under the License.
- !-->
-
-# Support for User Defined Functions in AsterixDB #
-
-## <a id="#toc">Table of Contents</a> ##
-* [Using UDF to preprocess feed-collected data](#PreprocessingCollectedData)
-* [Writing an External UDF](#WritingAnExternalUDF)
-* [Creating an AsterixDB Library](#CreatingAnAsterixDBLibrary)
-* [Installing an AsterixDB Library](#installingUDF)
-
-In this document, we describe the support for implementing, using, and
installing user-defined functions (UDF) in
-AsterixDB. We will explain how we can use UDFs to preprocess, e.g., data
collected using feeds (see the [feeds tutorial](feeds/tutorial.html)).
-
-
-### <a name="installingUDF">Installing an AsterixDB Library</a>###
-
-We assume you have followed the [installation instructions](../install.html)
to set up a running AsterixDB instance. Let us refer your AsterixDB instance by
the name "my_asterix".
-
-- Step 1: Stop the AsterixDB instance if it is in the ACTIVE state.
-
- $ managix stop -n my_asterix
-
-- Step 2: Install the library using Managix install command. Just to
illustrate, we use the help command to look up the syntax
-
- $ managix help -cmd install
- Installs a library to an asterix instance.
- Options
- n Name of Asterix Instance
- d Name of the dataverse under which the library will be installed
- l Name of the library
- p Path to library zip bundle
-
-Above is a sample output and explains the usage and the required parameters.
Each library has a name and is installed under a dataverse. Recall that we had
created a dataverse by the name - "feeds" prior to creating our datatypes and
dataset. We shall name our library - "testlib".
-
-We assume you have a library zip bundle that needs to be installed.
-To install the library, use the Managix install command. An example is shown
below.
-
- $ managix install -n my_asterix -d feeds -l testlib -p
extlibs/asterix-external-data-0.8.7-binary-assembly.zip
-
-You should see the following message:
-
- INFO: Installed library testlib
-
-We shall next start our AsterixDB instance using the start command as shown
below.
-
- $ managix start -n my_asterix
-
-You may now use the AsterixDB library in AQL statements and queries. To look
at the installed artifacts, you may execute the following query at the
AsterixDB web-console.
-
- for $x in dataset Metadata.Function
- return $x
-
- for $x in dataset Metadata.Library
- return $x
-
-Our library is now installed and is ready to be used.
-
-
-## <a id="PreprocessingCollectedData">Preprocessing Collected Data</a> ###
-
-In the following we assume that you already created the `TwitterFeed` and its
corresponding data types and dataset following the instruction explained in the
[feeds tutorial](feeds/tutorial.html).
-
-A feed definition may optionally include the specification of a
-user-defined function that is to be applied to each feed object prior
-to persistence. Examples of pre-processing might include adding
-attributes, filtering out objects, sampling, sentiment analysis, feature
-extraction, etc. We can express a UDF, which can be defined in AQL or in a
programming
-language such as Java, to perform such pre-processing. An AQL UDF is a good
fit when
-pre-processing a object requires the result of a query (join or aggregate)
-over data contained in AsterixDB datasets. More sophisticated
-processing such as sentiment analysis of text is better handled
-by providing a Java UDF. A Java UDF has an initialization phase
-that allows the UDF to access any resources it may need to initialize
-itself prior to being used in a data flow. It is assumed by the
-AsterixDB compiler to be stateless and thus usable as an embarrassingly
-parallel black box. In contrast, the AsterixDB compiler can
-reason about an AQL UDF and involve the use of indexes during
-its invocation.
-
-We consider an example transformation of a raw tweet into its
-lightweight version called `ProcessedTweet`, which is defined next.
-
- use dataverse feeds;
-
- create type ProcessedTweet if not exists as open {
- id: string,
- user_name:string,
- location:point,
- created_at:string,
- message_text:string,
- country: string,
- topics: {{string}}
- };
-
- create dataset ProcessedTweets(ProcessedTweet)
- primary key id;
-
-The processing required in transforming a collected tweet to its lighter
version of type `ProcessedTweet` involves extracting the topics or hash-tags
(if any) in a tweet
-and collecting them in the referred "topics" attribute for the tweet.
-Additionally, the latitude and longitude values (doubles) are combined into
the spatial point type. Note that spatial data types are considered as
first-class citizens that come with the support for creating indexes. Next we
show a revised version of our example TwitterFeed that involves the use of a
UDF. We assume that the UDF that contains the transformation logic into a
"ProcessedTweet" is available as a Java UDF inside an AsterixDB library named
'testlib'. We defer the writing of a Java UDF and its installation as part of
an AsterixDB library to a later section of this document.
-
- use dataverse feeds;
-
- create feed ProcessedTwitterFeed if not exists
- using "push_twitter"
- (("type-name"="Tweet"),
- ("consumer.key"="************"),
- ("consumer.secret"="**************"),
- ("access.token"="**********"),
- ("access.token.secret"="*************"))
-
- apply function testlib#addHashTagsInPlace;
-
-Note that a feed adaptor and a UDF act as pluggable components. These
-contribute towards providing a generic "plug-and-play" model where
-custom implementations can be provided to cater to specific requirements.
-
-####Building a Cascade Network of Feeds####
-Multiple high-level applications may wish to consume the data
-ingested from a data feed. Each such application might perceive the
-feed in a different way and require the arriving data to be processed
-and/or persisted differently. Building a separate flow of data from
-the external source for each application is wasteful of resources as
-the pre-processing or transformations required by each application
-might overlap and could be done together in an incremental fashion
-to avoid redundancy. A single flow of data from the external source
-could provide data for multiple applications. To achieve this, we
-introduce the notion of primary and secondary feeds in AsterixDB.
-
-A feed in AsterixDB is considered to be a primary feed if it gets
-its data from an external data source. The objects contained in a
-feed (subsequent to any pre-processing) are directed to a designated
-AsterixDB dataset. Alternatively or additionally, these objects can
-be used to derive other feeds known as secondary feeds. A secondary
-feed is similar to its parent feed in every other aspect; it can
-have an associated UDF to allow for any subsequent processing,
-can be persisted into a dataset, and/or can be made to derive other
-secondary feeds to form a cascade network. A primary feed and a
-dependent secondary feed form a hierarchy. As an example, we next show an
-example AQL statement that redefines the previous feed
-"ProcessedTwitterFeed" in terms of their
-respective parent feed (TwitterFeed).
-
- use dataverse feeds;
-
- drop feed ProcessedTwitterFeed if exists;
-
- create secondary feed ProcessedTwitterFeed from feed TwitterFeed
- apply function testlib#addHashTags;
-
- connect feed ProcessedTwitterFeed to dataset ProcessedTweets;
-
-The `addHashTags` function is already provided in the example UDF.To see what
objects
-are being inserted into the dataset, we can perform a simple dataset scan after
-allowing a few moments for the feed to start ingesting data:
-
- use dataverse feeds;
-
- for $i in dataset ProcessedTweets limit 10 return $i;
-
-For an example of how to write a Java UDF from scratch, the source for the
example
-UDF that has been used in this tutorial is available [here]
(https://github.com/apache/asterixdb/tree/master/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library)
-
-## <a name="installingUDF">Unstalling an AsterixDB Library</a>###
-
-To uninstall a library, use the Managix uninstall command as follows:
-
- $ managix stop -n my_asterix
-
- $ managix uninstall -n my_asterix -d feeds -l testlib
-
-
diff --git a/asterixdb/asterix-doc/src/site/site.xml
b/asterixdb/asterix-doc/src/site/site.xml
index 90877a0..a201d25 100644
--- a/asterixdb/asterix-doc/src/site/site.xml
+++ b/asterixdb/asterix-doc/src/site/site.xml
@@ -99,7 +99,7 @@
<menu name="Advanced Features">
<item name="Support of Full-text Queries" href="aql/fulltext.html"/>
<item name="Accessing External Data" href="aql/externaldata.html"/>
- <item name="Support for Data Ingestion" href="feeds/tutorial.html"/>
+ <item name="Data Ingestion with Feeds" href="feeds.html"/>
<item name="User Defined Functions" href="udf.html"/>
<item name="Filter-Based LSM Index Acceleration"
href="aql/filters.html"/>
<item name="Support of Similarity Queries" href="aql/similarity.html"/>
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java
index 94d7b53..9381d09 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/Datatypes.java
@@ -18,8 +18,6 @@
*/
package org.apache.asterix.external.util;
-import org.apache.asterix.external.parser.TweetParser;
-
public class Datatypes {
/*
@@ -147,23 +145,5 @@
private Tweet_User() {
}
- }
-
- /*
- The following assumes this DDL (but ignoring the field name orders):
- create type ProcessedTweet if not exists as open {
- id: string,
- user_name:string,
- location:point,
- created_at:string,
- message_text:string,
- country: string,
- topics: [string]
- };
- */
- public static final class ProcessedTweet {
- public static final String USER_NAME = "user_name";
- public static final String LOCATION = "location";
- public static final String TOPICS = "topics";
}
}
diff --git
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
deleted file mode 100644
index db693a1..0000000
---
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library;
-
-import org.apache.asterix.external.api.IExternalScalarFunction;
-import org.apache.asterix.external.api.IFunctionFactory;
-
-public class AddHashTagsFactory implements IFunctionFactory {
-
- @Override
- public IExternalScalarFunction getExternalFunction() {
- return new AddHashTagsFunction();
- }
-
-}
diff --git
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
deleted file mode 100644
index 1b5fecd..0000000
---
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsFunction.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.library;
-
-import org.apache.asterix.external.library.java.JBuiltinType;
-import org.apache.asterix.external.library.java.base.JDouble;
-import org.apache.asterix.external.library.java.base.JPoint;
-import org.apache.asterix.external.library.java.base.JRecord;
-import org.apache.asterix.external.library.java.base.JString;
-import org.apache.asterix.external.library.java.base.JUnorderedList;
-import org.apache.asterix.external.api.IExternalScalarFunction;
-import org.apache.asterix.external.api.IFunctionHelper;
-import org.apache.asterix.external.library.java.JTypeTag;
-import org.apache.asterix.external.util.Datatypes;
-
-public class AddHashTagsFunction implements IExternalScalarFunction {
-
- private JUnorderedList list = null;
- private JPoint location = null;
-
- @Override
- public void initialize(IFunctionHelper functionHelper) {
- list = new JUnorderedList(JBuiltinType.JSTRING);
- location = new JPoint(0, 0);
- }
-
- @Override
- public void deinitialize() {
- }
-
- @Override
- public void evaluate(IFunctionHelper functionHelper) throws Exception {
- list.clear();
- JRecord inputRecord = (JRecord) functionHelper.getArgument(0);
- JString text = (JString)
inputRecord.getValueByName(Datatypes.Tweet.MESSAGE);
- JDouble latitude = (JDouble)
inputRecord.getValueByName(Datatypes.Tweet.LATITUDE);
- JDouble longitude = (JDouble)
inputRecord.getValueByName(Datatypes.Tweet.LONGITUDE);
-
- if (latitude != null && longitude != null) {
- location.setValue(latitude.getValue(), longitude.getValue());
- } else {
- location.setValue(0, 0);
- }
-
- String[] tokens = text.getValue().split(" ");
- for (String tk : tokens) {
- if (tk.startsWith("#")) {
- JString newField = (JString)
functionHelper.getObject(JTypeTag.STRING);
- newField.setValue(tk);
- list.add(newField);
- }
- }
-
- JRecord outputRecord = (JRecord) functionHelper.getResultObject();
- outputRecord.setField(Datatypes.Tweet.ID,
inputRecord.getValueByName(Datatypes.Tweet.ID));
-
- JRecord userRecord = (JRecord)
inputRecord.getValueByName(Datatypes.Tweet.USER);
- outputRecord.setField(Datatypes.ProcessedTweet.USER_NAME,
- userRecord.getValueByName(Datatypes.Tweet.SCREEN_NAME));
-
- outputRecord.setField(Datatypes.ProcessedTweet.LOCATION, location);
- outputRecord.setField(Datatypes.Tweet.CREATED_AT,
inputRecord.getValueByName(Datatypes.Tweet.CREATED_AT));
- outputRecord.setField(Datatypes.Tweet.MESSAGE, text);
- outputRecord.setField(Datatypes.ProcessedTweet.TOPICS, list);
-
- functionHelper.setResult(outputRecord);
- }
-
-}
diff --git
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFactory.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java
similarity index 89%
rename from
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFactory.java
rename to
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java
index a13da84..92e8ade 100644
---
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFactory.java
+++
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddMentionedUsersFactory.java
@@ -21,11 +21,11 @@
import org.apache.asterix.external.api.IExternalScalarFunction;
import org.apache.asterix.external.api.IFunctionFactory;
-public class AddHashTagsInPlaceFactory implements IFunctionFactory {
+public class AddMentionedUsersFactory implements IFunctionFactory {
@Override
public IExternalScalarFunction getExternalFunction() {
- return new AddHashTagsInPlaceFunction();
+ return new addMentionedUsersFunction();
}
}
diff --git
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java
similarity index 85%
rename from
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
rename to
asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java
index 7873835..981aa2b 100644
---
a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/AddHashTagsInPlaceFunction.java
+++
b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/addMentionedUsersFunction.java
@@ -27,13 +27,15 @@
import org.apache.asterix.external.library.java.JTypeTag;
import org.apache.asterix.external.util.Datatypes;
-public class AddHashTagsInPlaceFunction implements IExternalScalarFunction {
+public class addMentionedUsersFunction implements IExternalScalarFunction {
private JUnorderedList list = null;
+ private String textFieldName;
@Override
public void initialize(IFunctionHelper functionHelper) {
list = new JUnorderedList(JBuiltinType.JSTRING);
+ textFieldName = functionHelper.getParameters().get(0);
}
@Override
@@ -44,17 +46,17 @@
public void evaluate(IFunctionHelper functionHelper) throws Exception {
list.clear();
JRecord inputRecord = (JRecord) functionHelper.getArgument(0);
- JString text = (JString)
inputRecord.getValueByName(Datatypes.Tweet.MESSAGE);
+ JString text = (JString) inputRecord.getValueByName(textFieldName);
String[] tokens = text.getValue().split(" ");
for (String tk : tokens) {
- if (tk.startsWith("#")) {
+ if (tk.startsWith("@")) {
JString newField = (JString)
functionHelper.getObject(JTypeTag.STRING);
newField.setValue(tk);
list.add(newField);
}
}
- inputRecord.addField(Datatypes.ProcessedTweet.TOPICS, list);
+ inputRecord.addField("mentionedUsers", list);
functionHelper.setResult(inputRecord);
}
diff --git
a/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml
b/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml
index 6b59041..b5157a2 100644
--- a/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml
+++ b/asterixdb/asterix-external-data/src/test/resources/library_descriptor.xml
@@ -42,24 +42,15 @@
<function_type>SCALAR</function_type>
<argument_type>TweetInputType</argument_type>
<return_type>TweetOutputType</return_type>
- <definition>org.apache.asterix.external.library.ParseTweetFactory
- </definition>
+
<definition>org.apache.asterix.external.library.ParseTweetFactory</definition>
</libraryFunction>
<libraryFunction>
- <name>addHashTags</name>
+ <name>addMentionedUsers</name>
<function_type>SCALAR</function_type>
- <argument_type>Tweet</argument_type>
- <return_type>ProcessedTweet</return_type>
- <definition>org.apache.asterix.external.library.AddHashTagsFactory
- </definition>
- </libraryFunction>
- <libraryFunction>
- <name>addHashTagsInPlace</name>
- <function_type>SCALAR</function_type>
- <argument_type>Tweet</argument_type>
- <return_type>ProcessedTweet</return_type>
- <definition>org.apache.asterix.external.library.AddHashTagsInPlaceFactory
- </definition>
+ <argument_type>TweetType</argument_type>
+ <return_type>TweetType</return_type>
+
<definition>org.apache.asterix.external.library.AddMentionedUsersFactory</definition>
+ <parameters>text</parameters>
</libraryFunction>
<libraryFunction>
<name>mysum</name>
diff --git
a/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
b/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
index 20dc8c8..52b5f16 100644
---
a/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
+++
b/asterixdb/asterix-server/src/test/resources/integrationts/library/results/library-metadata/functionDataset/functionDataset.1.adm
@@ -1,5 +1,4 @@
-{ "DataverseName": "externallibtest", "Name": "testlib#addHashTags", "Arity":
"1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet", "Definition":
"org.apache.asterix.external.library.AddHashTagsFactory", "Language": "JAVA",
"Kind": "SCALAR" }
-{ "DataverseName": "externallibtest", "Name": "testlib#addHashTagsInPlace",
"Arity": "1", "Params": [ "Tweet" ], "ReturnType": "ProcessedTweet",
"Definition": "org.apache.asterix.external.library.AddHashTagsInPlaceFactory",
"Language": "JAVA", "Kind": "SCALAR" }
+{ "DataverseName": "externallibtest", "Name": "testlib#addMentionedUsers",
"Arity": "1", "Params": [ "TweetType" ], "ReturnType": "TweetType",
"Definition": "org.apache.asterix.external.library.AddMentionedUsersFactory",
"Language": "JAVA", "Kind": "SCALAR" }
{ "DataverseName": "externallibtest", "Name": "testlib#allTypes", "Arity":
"1", "Params": [ "AllType" ], "ReturnType": "AllType", "Definition":
"org.apache.asterix.external.library.AllTypesFactory", "Language": "JAVA",
"Kind": "SCALAR" }
{ "DataverseName": "externallibtest", "Name": "testlib#echoDelay", "Arity":
"1", "Params": [ "TweetMessageType" ], "ReturnType": "TweetMessageType",
"Definition": "org.apache.asterix.external.library.EchoDelayFactory",
"Language": "JAVA", "Kind": "SCALAR" }
{ "DataverseName": "externallibtest", "Name": "testlib#fnameDetector",
"Arity": "1", "Params": [ "InputRecordType" ], "ReturnType":
"DetectResultType", "Definition":
"org.apache.asterix.external.library.KeywordsDetectorFactory", "Language":
"JAVA", "Kind": "SCALAR" }
--
To view, visit https://asterix-gerrit.ics.uci.edu/2804
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I17b1b4d639ca38689298ce88145257e794eb90e1
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Xikui Wang <[email protected]>