When I run the script below I get lots of '()' output. Can anyone guide me why
I get no data in B (PIg version=0.12.1 and A dumps OK)
TIA!!!!
A = load 'hdfs:///user/hduser/smsCorpus_en_2012.04.30_all.xml' using
org.apache.pig.piggybank.storage.XMLLoader('message')
as (x:chararray);
describe A;
B = foreach A GENERATE FLATTEN(REGEX_EXTRACT_ALL(x,
'<message>\\n\\s*<text>(.*)</text>\\n\\s*
<source>\\n\\s*<srcNumber>(.*)</srcNumber>\\n\\s*<phoneModel
(.*)/>\\n\\s*<userProfile>\\n\\s*<userID>(.*)</userID>\\n\\s*<age>(.*)</age>\\n\\s*<gender>(.*)</gender>\\n\\s*<nativeSpeaker>(.*)</nativeSpeaker>\\n\\s*<country>(.*)</country>\\n\\s*<city>(.*)</city>\\n\\s*<experience>(.*)</experience>\\n\\s*<frequency>(.*)</frequency>\\n\\s*<inputMethod>(.*)</inputMethod>\\n\\s*</userProfile>\\n\\s*</source>\\n\\s*<destination
(.*)>\\n\\s*<destNumber>(.*)</destNumber>\\n\\s*</destination>\\n\\s*<messageProfile
(.*)/>\\n\\s*<collectionMethod (.*)/>\\n\\s*</message>'))
as (SMStext:chararray, srcNumber:chararray, phoneModel:chararray,
userID:chararray, age:chararray, gender:chararray, nativeSpeaker:chararray,
country:chararray, city:chararray, experience:chararray,
frequency:chararray,
inputMethod:chararray, destination:chararray, destNumber:chararray,
messageProfile:chararray, collectionMethod:chararray);
describe B;
dump B;
/* EXAMPLE DATA FROM NUS SMS CORPUS
<message id="1">
<text>K</text>
<source>
<srcNumber>79780a9dbe83fd1e5dd2bd2543e7da2a</srcNumber>
<phoneModel manufactuer="Nokia" smartphone="unknown"/>
<userProfile>
<userID>79780a9dbe83fd1e5dd2bd2543e7da2a</userID>
<age>21-25</age>
<gender>unknown</gender>
<nativeSpeaker>yes</nativeSpeaker>
<country>India</country>
<city>Tiruppur</city>
<experience>3 to 5 years</experience>
<frequency>More than 50 SMS daily</frequency>
<inputMethod>Multi-tap</inputMethod>
</userProfile>
</source>
<destination country="unknown">
<destNumber>0ffc7585148560b7520931d354c00a9b</destNumber>
</destination>
<messageProfile language="en" time="2010.10.24 11:59" type="send"/>
<collectionMethod collector="Tao Chen" method="SMS Export"
time="2010/11"/>
</message>
*/