[ 
https://issues.apache.org/jira/browse/NUTCH-650?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12803186#action_12803186
 ] 

Xiao Yang commented on NUTCH-650:
---------------------------------

Exception:

org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException: 
org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException: Column family 
mtdt: does not exist in region crawl,,1264048608430 in table {NAME => 'crawl', 
FAMILIES => [{NAME => 'bas', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'cnt', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'cnttyp', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'fchi', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'fcht', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'hdrs', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'ilnk', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'modt', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'mtdt', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'olnk', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'prsstt', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'prtstt', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'prvfch', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'prvsig', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'repr', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'rtrs', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'scr', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'sig', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'stt', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'ttl', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}, {NAME => 'txt', COMPRESSION => 'NONE', VERSIONS => '3', TTL => 
'2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 
'true'}]}
    at 
org.apache.hadoop.hbase.regionserver.HRegion.checkFamily(HRegion.java:2381)
    at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:1241)
    at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:1208)
    at 
org.apache.hadoop.hbase.regionserver.HRegionServer.put(HRegionServer.java:1834)
    at sun.reflect.GeneratedMethodAccessor12.invoke(Unknown Source)
    at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.hbase.ipc.HBaseRPC$Server.call(HBaseRPC.java:648)

    at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:915)

    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
    at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
    at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
    at 
org.apache.hadoop.hbase.RemoteExceptionHandler.decodeRemoteException(RemoteExceptionHandler.java:94)
    at 
org.apache.hadoop.hbase.client.HConnectionManager$TableServers.getRegionServerWithRetries(HConnectionManager.java:995)
    at 
org.apache.hadoop.hbase.client.HConnectionManager$TableServers$2.doCall(HConnectionManager.java:1193)
    at 
org.apache.hadoop.hbase.client.HConnectionManager$TableServers$Batch.process(HConnectionManager.java:1115)
    at 
org.apache.hadoop.hbase.client.HConnectionManager$TableServers.processBatchOfRows(HConnectionManager.java:1201)
    at org.apache.hadoop.hbase.client.HTable.flushCommits(HTable.java:605)
    at org.apache.hadoop.hbase.client.HTable.put(HTable.java:470)
    at org.apache.nutch.crawl.Injector$UrlMapper.map(Injector.java:92)
    at org.apache.nutch.crawl.Injector$UrlMapper.map(Injector.java:62)
    at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:583)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
    at org.apache.hadoop.mapred.Child.main(Child.java:170)

This may be caused by invalid column family names:
I found some names end with colon while some doesn't in package 
org.apache.nutch.util.hbase.WebTableColumns
Is this a bug?
public interface WebTableColumns {
  public static final String BASE_URL_STR         = "bas";
  public static final String STATUS_STR           = "stt";
  public static final String FETCH_TIME_STR       = "fcht";
  public static final String RETRIES_STR          = "rtrs";
  public static final String FETCH_INTERVAL_STR   = "fchi";
  public static final String SCORE_STR            = "scr";
  public static final String MODIFIED_TIME_STR    = "modt";
  public static final String SIGNATURE_STR        = "sig";
  public static final String CONTENT_STR          = "cnt";
  public static final String CONTENT_TYPE_STR     = "cnttyp:";
  public static final String TITLE_STR            = "ttl:";
  public static final String OUTLINKS_STR         = "olnk:";
  public static final String INLINKS_STR          = "ilnk:";
  public static final String PARSE_STATUS_STR     = "prsstt:";
  public static final String PROTOCOL_STATUS_STR  = "prtstt:";
  public static final String TEXT_STR             = "txt:";
  public static final String REPR_URL_STR         = "repr:";
  public static final String HEADERS_STR          = "hdrs:";
  public static final String METADATA_STR         = "mtdt:";

> Hbase Integration
> -----------------
>
>                 Key: NUTCH-650
>                 URL: https://issues.apache.org/jira/browse/NUTCH-650
>             Project: Nutch
>          Issue Type: New Feature
>    Affects Versions: 1.0.0
>            Reporter: Doğacan Güney
>            Assignee: Doğacan Güney
>             Fix For: 1.1
>
>         Attachments: hbase-integration_v1.patch, hbase_v2.patch, 
> malformedurl.patch, meta.patch, meta2.patch, nofollow-hbase.patch, 
> nutch-habase.patch, searching.diff, slash.patch
>
>
> This issue will track nutch/hbase integration

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.

Reply via email to