Hi users i need to test the performance of the query in hive and spark. Can any one convert these sql to spark sql. Here is the sql.
SELECT split(DTD.TRAN_RMKS,'/')[0] AS TRAB_RMK1, split(DTD.TRAN_RMKS,'/')[1] AS ATM_ID, DTD.ACID, G.FORACID, DTD.REF_NUM, DTD.TRAN_ID, DTD.TRAN_DATE, DTD.VALUE_DATE, DTD.TRAN_PARTICULAR, DTD.TRAN_RMKS, DTD.TRAN_AMT, SYSDATE_ORA(), DTD.PSTD_DATE, DTD.PSTD_FLG, G.CUSTID, NULL AS PROC_FLG, DTD.PSTD_USER_ID, DTD.ENTRY_USER_ID, G.schemecode as SCODE FROM DAILY_TRAN_DETAIL_TABLE2 DTD JOIN ods_gam G ON DTD.ACID = G.ACID where substr(DTD.TRAN_PARTICULAR,1,3) rlike '(PUR|POS).*' AND DTD.PART_TRAN_TYPE = 'D' AND DTD.DEL_FLG <> 'Y' AND DTD.PSTD_FLG = 'Y' AND G.schemecode IN ('SBPRV','SBPRS','WSSTF','BGFRN','NREPV','NROPV','BSNRE','BSNRO') AND (SUBSTR(split(DTD.TRAN_RMKS,'/')[0],1,6) IN ('405997','406228','406229','415527','415528','417917','417918','418210','421539','421572','432198','435736','450502','450503','450504','468805','469190','469191','469192','474856','478286','478287','486292','490222','490223','490254','512932','512932','514833','522346','522352','524458','526106','526701','527114','527479','529608','529615','529616','532731','532734','533102','534680','536132','536610','536621','539149','539158','549751','557654','607118','607407','607445','607529','652189','652190','652157') OR SUBSTR(split(DTD.TRAN_RMKS,'/')[0],1,8) IN ('53270200','53270201','53270202','60757401','60757402') ) limit 50; -- Best Regards, Sreeharsha Eedupuganti