register '/home/hduser/pig/lib/elephant-bird-hadoop-compat-4.5.jar'; register '/home/hduser/pig/lib/elephant-bird-core-4.5.jar'; register '/home/hduser/pig/lib/elephant-bird-pig-4.5.jar'; register '/home/hduser/pig/lib/json-simple-1.1.jar'; loadJson = LOAD ' /user/flume/TOI/15/09/11/FlumeData.1441966022367' USING com.twitter.elephantbird.pig.load.JsonLoader('-nestedLoad') AS (json:map []); loadJson1 = foreach loadJson generate FLATTEN(json#'text') as tweet:chararray,FLATTEN(json#'lang') as language:chararray, FLATTEN(json#'retweeted_status') as rt_stat:map[], FLATTEN(json#'user') as users:map[]; final = FOREACH loadJson1 GENERATE REPLACE(tweet, '\n', ' ') as tweet:chararray,language,users#'screen_name' AS screen_name:chararray,rt_stat#'retweet_count' as retweet_count:long,users#'followers_count' AS followers_count:int,users#'friends_count' AS friends_count:int,users#'created_at' AS createdate:chararray; final = FOREACH final GENERATE REPLACE(tweet, '\\|', ' ') as tweet:chararray,language,screen_name,retweet_count,followers_count,friends_count,REPLACE(createdate, '\\+0000 ', '') as createdate:chararray; final = FOREACH final GENERATE tweet,language,screen_name,retweet_count,followers_count,friends_count,DaysBetween(CurrentTime(), ToDate(TRIM(createdate), 'EEE MMM d HH:mm:ss yyyy')) as account_age; final = filter final BY (language=='en'); store final into '/user/hduser/sqoop/ABCD-2/Reviews' using PigStorage('|','-schema');