Vijay Sarvepali created HBASE-10385:
---------------------------------------

             Summary: ImportTsv to parse date time from typical loader formats
                 Key: HBASE-10385
                 URL: https://issues.apache.org/jira/browse/HBASE-10385
             Project: HBase
          Issue Type: New Feature
          Components: mapreduce
    Affects Versions: 0.96.1.1
            Reporter: Vijay Sarvepali
            Priority: Minor


Simple patch to enable parsing of standard date time fields from TSV files into 
Hbase.

***************
*** 57,62 ****
--- 57,70 ----
  import com.google.common.base.Splitter;
  import com.google.common.collect.Lists;
  
+ //2013-08-19T04:39:07
+ import java.text.DateFormat;
+ import java.util.*;
+ import java.text.SimpleDateFormat;
+ import java.text.ParseException;
+ 
+ 
+ 
  /**
   * Tool to import data from a TSV file.
   *
***************
*** 220,229 ****
              getColumnOffset(timestampKeyColumnIndex),
              getColumnLength(timestampKeyColumnIndex));
          try {
!           return Long.parseLong(timeStampStr);
          } catch (NumberFormatException nfe) {
            // treat this record as bad record
!           throw new BadTsvLineException("Invalid timestamp " + timeStampStr);
          }
        }
        
--- 228,239 ----
              getColumnOffset(timestampKeyColumnIndex),
              getColumnLength(timestampKeyColumnIndex));
          try {
!           return Long.parseLong(timeStampStr);
          } catch (NumberFormatException nfe) {
+           // Try this record with string to date in mseconds long
+           return extractTimestampInput(timeStampStr);
            // treat this record as bad record
!           //throw new BadTsvLineException("Invalid timestamp " + 
timeStampStr);
          }
        }
        
***************
*** 243,248 ****
--- 253,274 ----
          return lineBytes;
        }
      }
+  public static long extractTimestampInput(String strDate) throws 
BadTsvLineException{
+     final List<String> dateFormats = Arrays.asList("yyyy-MM-dd HH:mm:ss.SSS", 
"yyyy-MM-dd'T'HH:mm:ss");    
+ 
+     for(String format: dateFormats){
+         SimpleDateFormat sdf = new SimpleDateFormat(format);
+         try{
+             Date d= sdf.parse(strDate);
+           long msecs = d.getTime();
+           return msecs;
+         } catch (ParseException e) {
+           //intentionally empty
+         }
+     }
+     // If we come here we have a problem with converting timestamps for this 
row.
+     throw new BadTsvLineException("Invalid timestamp " + strDate); 
+  } 
  
      public static class BadTsvLineException extends Exception {
        public BadTsvLineException(String err) {



--
This message was sent by Atlassian JIRA
(v6.1.5#6160)

Reply via email to