Question about how to use hive, trying to make sure what I'm doing is making sense before going too far.
I'm trying to update hbase with the results from a hive query. What I'm doing right now is the hive jdbc server and run the query then the mapreduce job, then read the output files and use that to update HBase. This works, I just wonder if there's a better way. My questions are: 1) Hopefully I'm asking this right, but is there a way to save this as a job, where it doesn't have to build it each time? We'd probably run this exact same query a few times a day. 2) Is JDBC a good way to do this? I've seen some mention of using the CliDriver, but never able to get that working correctly, would that be a better option, or are there other ways to do this? Thanks! If anyone is interested, he's my code public class SetLabels extends Configured implements Tool { public static void main(String [] args) throws Exception { try { Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver"); } catch (ClassNotFoundException e) { System.out.println("Unable to load Hive Driver"); System.exit(1); } try { Connection con = DriverManager.getConnection("jdbc:hive://localhost:10000/default", ""); Statement stmt = con.createStatement(); sql = "INSERT OVERWRITE DIRECTORY 'hdfs://localhost:8020/tmp/labels'" + "SELECT t.key, COALESCE(ul.label, dl.label, u.description) AS label " + "FROM url u " + "LEFT OUTER JOIN default_labels dl ON t.description = dl.description " + "LEFT OUTER JOIN user_labels ul ON t.description = ul.description"; stmt.executeQuery(sql); } catch (SQLException e) { System.exit(1); } // run Map/Reduce to read UPDATED_LABELS_PATH and update HBase int res = ToolRunner.run(new Configuration(), new SetLabels(), args); System.exit(res); } public int run(String [] args) throws IOException, InterruptedException, ClassNotFoundException { runJob(); return 0; } public void runJob() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputPath = new Path("hdfs://localhost:8020/tmp/labels"); Job job = new Job(conf, "Set Labels"); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(SetLabelsMapper.class); TableMapReduceUtil.initTableReducerJob("urls", IdentityTableReducer.class, job); job.setNumReduceTasks(0); job.waitForCompletion(true); } } public class SetLabelsMapper extends Mapper<Object, Text, ImmutableBytesWritable, Put> { private String HIVE_DELIMETER = "" + (char)01; @Override public void map(Object key, Text line, Context context ) throws IOException { String tokens[] = line.toString().split(HIVE_DELIMETER, -1); byte[] rowKey = Bytes.toBytes(tokens[0]); String label = tokens[1]; Put put = new Put(rowKey); put.add(Bytes.toBytes("details"), Bytes.toBytes("label"), Bytes.toBytes(label)); try { context.write(new ImmutableBytesWritable(rowKey), put); } catch (InterruptedException e) { e.printStackTrace(); } } }