[ 
https://issues.apache.org/jira/browse/HBASE-10385?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13876885#comment-13876885
 ] 

stack commented on HBASE-10385:
-------------------------------

Thank you for the contrib [~ericavijay].  Would you mind formatting it as a 
patch file attached to the issue and having the code follow the convention of 
the rest of the code base (see the reference guide on how to contribute if you 
need more).  Also, this looks like a behavior that should be optional?

> ImportTsv to parse date time from typical loader formats
> --------------------------------------------------------
>
>                 Key: HBASE-10385
>                 URL: https://issues.apache.org/jira/browse/HBASE-10385
>             Project: HBase
>          Issue Type: New Feature
>          Components: mapreduce
>    Affects Versions: 0.96.1.1
>            Reporter: Vijay Sarvepali
>            Priority: Minor
>              Labels: importtsv
>   Original Estimate: 2h
>  Remaining Estimate: 2h
>
> Simple patch to enable parsing of standard date time fields from TSV files 
> into Hbase.
> ***************
> *** 57,62 ****
> --- 57,70 ----
>   import com.google.common.base.Splitter;
>   import com.google.common.collect.Lists;
>   
> + //2013-08-19T04:39:07
> + import java.text.DateFormat;
> + import java.util.*;
> + import java.text.SimpleDateFormat;
> + import java.text.ParseException;
> + 
> + 
> + 
>   /**
>    * Tool to import data from a TSV file.
>    *
> ***************
> *** 220,229 ****
>               getColumnOffset(timestampKeyColumnIndex),
>               getColumnLength(timestampKeyColumnIndex));
>           try {
> !           return Long.parseLong(timeStampStr);
>           } catch (NumberFormatException nfe) {
>             // treat this record as bad record
> !           throw new BadTsvLineException("Invalid timestamp " + 
> timeStampStr);
>           }
>         }
>         
> --- 228,239 ----
>               getColumnOffset(timestampKeyColumnIndex),
>               getColumnLength(timestampKeyColumnIndex));
>           try {
> !         return Long.parseLong(timeStampStr);
>           } catch (NumberFormatException nfe) {
> +         // Try this record with string to date in mseconds long
> +         return extractTimestampInput(timeStampStr);
>             // treat this record as bad record
> !           //throw new BadTsvLineException("Invalid timestamp " + 
> timeStampStr);
>           }
>         }
>         
> ***************
> *** 243,248 ****
> --- 253,274 ----
>           return lineBytes;
>         }
>       }
> +  public static long extractTimestampInput(String strDate) throws 
> BadTsvLineException{
> +     final List<String> dateFormats = Arrays.asList("yyyy-MM-dd 
> HH:mm:ss.SSS", "yyyy-MM-dd'T'HH:mm:ss");    
> + 
> +     for(String format: dateFormats){
> +         SimpleDateFormat sdf = new SimpleDateFormat(format);
> +         try{
> +             Date d= sdf.parse(strDate);
> +         long msecs = d.getTime();
> +         return msecs;
> +         } catch (ParseException e) {
> +         //intentionally empty
> +         }
> +     }
> +     // If we come here we have a problem with converting timestamps for 
> this row.
> +     throw new BadTsvLineException("Invalid timestamp " + strDate); 
> +  } 
>   
>       public static class BadTsvLineException extends Exception {
>         public BadTsvLineException(String err) {



--
This message was sent by Atlassian JIRA
(v6.1.5#6160)

Reply via email to