For a slightly unrelated reason, I needed to write a quick app to test some code running on our hadoop/hbase cluster. However, I seem to be having issues with getting consistent reads.
Here's the scenario: This application scans some directories in hdfs, and reads lines of text from each file. A user ID is extracted from the line, then hbase is checked to see that the ID exists. In *all* cases the ID should exist in hbase. However, only about the first 100 or so (of about 1000) return valid results. After about 100 reads or so, the rest return null for Result.getValue(). You can see from the code that it takes a userID as a parameter. This is to illustrate that data is in fact in hbase. Setting*any* of the userIDs that produced null results as a parameter will result in a valid hbase read. Here is an abbreviated output that illustrates this oddity: First execution of application: ...(many 'good' output lines, like the following 2) bytes for user 139|754436243196115533|c: 1920 bytes for user 139|754436243113796511|c: 1059 bytes for user 141|754999187733044577|c: 0 1/171 FILE MAY HAVE LINE MISSING FROM HBASE!: hdfs://elh00/user/hadoop/events/siteID-141/2010-04-12T00-0700/fiqgvrl.events bytes for user *141|754717712663942409|c*: 0 2/172 FILE MAY HAVE LINE MISSING FROM HBASE!: hdfs://elh00/user/hadoop/events/siteID-141/2010-04-12T00-0700/fwesvqn.events bytes for user 141|755280633926232247|c: 0 3/173 FILE MAY HAVE LINE MISSING FROM HBASE!: hdfs://elh00/user/hadoop/events/siteID-141/2010-04-12T01-0700/wydfvn.events bytes for user 141|754436237930862231|c: 0 4/174 FILE MAY HAVE LINE MISSING FROM HBASE!: hdfs://elh00/user/hadoop/events/siteID-141/2010-04-12T01-0700/zpjyod.events byte ...and this continues for the remaining files. Second execution with *any* of the seemingly missing userIDs yields the following sample: Count bytes for commandline user 141|754717712663942409|c: 855 ...(many 'good' output lines, like the following 1) bytes for user 141|qfbvndelauretis|a: 2907001 bytes for user 141|754436240987076893|c: 0 1/208 FILE MAY HAVE LINE MISSING FROM HBASE!: hdfs://elh00/user/hadoop/events/siteID-141/2010-04-12T14-0700/hehvln.events bytes for user 141|754436241315533944|c: 0 bytes for user 141|754436241215573999|c: 0 2/210 FILE MAY HAVE LINE MISSING FROM HBASE!: hdfs://elh00/user/hadoop/events/siteID-141/2010-04-12T15-0700/fvkeert.events ... Notice that the 'zeros' don't occur until file 208 this time. This is not random either, rerunning the above two will produce the exact same results, all day long. Its as if selecting the initial user allows its region to be read more consistently for the remainder of the run. Three last points: No exceptions are ever thrown, all region servers are up throughout the execution, and no other reads or writes are occurring on the cluster during the execution. Any thoughts of advice? This is really causing me pain at the moment. Oh, and here's the quick and dirty class that produces this: package com.touchcommerce.data.jobs.misc.partitioning_debug; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; import com.touchcommerce.data.Constants; import com.touchcommerce.data.services.resources.HDFSService; import com.touchcommerce.data.services.utils.EventUtils; public class TestIt { private final static HBaseConfiguration config = new HBaseConfiguration(HDFSService.newConfigInstance()); private static String userToCheckFirst; private static HTable userEventsTable; public static void main(String[] args) throws IOException { FileSystem hdfs = FileSystem.get(config); userEventsTable = new HTable(config, Constants.HBASE_USER_EVENTS_TABLE); int maxLinesPerFileToRead = Integer.parseInt(args[0]); FileStatus[] containedSiteEntries = hdfs.listStatus(new Path(Constants.HDFS_EVENTS_ROOT_DIR)); int good = 0; int bad = 0; /** * Passing in a key here that returned no data during the loop below will almost certainly result in event data, * meaning that hbase *does* have data for this key after all. So what's wrong with the loop below?????? */ userToCheckFirst = args.length > 1 ? args[1] : null; if (userToCheckFirst != null) { byte[] data = fetchData(Bytes.toBytes(userToCheckFirst)); System.out.println("Count bytes for commandline user " + userToCheckFirst + ": " + (data == null ? 0 : data.length)); } for (FileStatus siteStatus : containedSiteEntries) { if (siteStatus.isDir()) { FileStatus[] containedHourEntries = hdfs.listStatus(siteStatus.getPath()); for (FileStatus hourStatus : containedHourEntries) { String hourStatusPath = hourStatus.getPath().toString(); if (hourStatus.isDir() && hourStatusPath.indexOf(Constants.HDFS_INVALID_EVENTS_DIR) < 0 && (hourStatusPath.indexOf("2010-04-12") > 0)) { FileStatus[] containedHourFiles = hdfs.listStatus(hourStatus.getPath()); for (FileStatus hourFile : containedHourFiles) { if (hourFile.getLen() > 0) { Path hourFilePath = hourFile.getPath(); boolean containedUser = false; BufferedReader in = new BufferedReader(new InputStreamReader(hdfs.open(hourFilePath))); boolean fileIsGood = false; String line = in.readLine(); boolean processMoreLines = line != null; int linesRead = line == null ? 0 : 1; while (processMoreLines) { byte[] data = null; String siteID = EventUtils.extractField(line, "siteID"); String userID = EventUtils.extractCustomerID(line); String type = "c"; if (userID == null) { userID = EventUtils.extractAgentID(line); type = "a"; } if (userID != null) { containedUser = true; int attempts = 0; while (data == null || data.length == 0) { data = fetchData(Bytes.toBytes(siteID + "|" + userID + "|" + type)); if (data == null || data.length == 0) { //THIS SHOULD NOT HAPPEN, BUT ONCE IT DOES, THE REST SEEM TO FOLLOW WITH THE SAME } else { System.out.println("bytes for user " + siteID + "|" + userID + "|" + type + ": " + (data == null ? 0 : data.length)); break; } if (++attempts == 3) { System.out.println("bytes for user " + siteID + "|" + userID + "|" + type + ": " + (data == null ? 0 : data.length)); break; } } } if (data != null && data.length > 0) { fileIsGood = true; processMoreLines = false; } if (linesRead >= maxLinesPerFileToRead) processMoreLines = false; else { line = in.readLine(); processMoreLines = line != null; linesRead++; } } in.close(); if (fileIsGood || !containedUser) good++; else { bad++; System.out.println(bad +"/" + (good+bad) + " FILE MAY HAVE LINE MISSING FROM HBASE!: " + hourFilePath); } } } } } } } } private static byte[] fetchData(byte[] userBytes) throws IOException { Get userEventsQuery = new Get(userBytes); Result row = userEventsTable.get(userEventsQuery); if(row == null) return null; return row.getValue(Constants.HBASE_USER_EVENTS_TABLE_RAW_FAMILY, Constants.HBASE_USER_EVENTS_TABLE_EVENTS_COLUMN); } }