i am sure it's a bug. The bytes of column family and qualifier in KeyValue do
not contain the family/qualifier delimiter ":". Its format is
like this <rowlength> <row> <columnfamilylength> <columnfamily>
<columnqualifier> <timestamp> <keytype>.
So column "f:ts" stored as …fts…, and column "ft:1st" stored as …ft1st….
column "f:ts" should be less than "ft:1st", but without delimiter, in binary
search "ft:1st" is less than "f:ts".
Could anyone confirm this bug? thanks!
在 2012-8-26,下午5:38, lin weijian 写道:
> I think this is a bug of Result.getValue(byte[], byte[]). I debug and trace
> the code, find that the Result is
> right, but getValue is wrong.
>
> Result r = htable.get(get);
>
> byte[] res = r.getValue(Bytes.toBytes("f"), Bytes.toBytes("ts"));
>
> In function getColumnLatest() called by geValue, the value kvs is like this:
>
> [com.sohu.www:http//f:fi/1345888370605/Put/vlen=4,
> com.sohu.www:http//f:ts/1345888370605/Put/vlen=8,
> com.sohu.www:http//ft:1st/1345888370605/Put/vlen=1,
> com.sohu.www:http//mk:_injmrk_/1345888370605/Put/vlen=1,
> com.sohu.www:http//s:s/1345888370605/Put/vlen=4]
>
> but getColumnLatest("f", "ts") is null;
>
> Does the binarySearch or the KeyValue.COMPARATOR have a bug?
>
> 下面是被转发的邮件:
>
>> 发件人: lin weijian <[email protected]>
>> 主题: sty wrong with Get operation of hbase client
>> 日期: 2012年8月25日格林尼治标准时间+0800下午10时03分26秒
>> 收件人: [email protected]
>>
>> Hi,
>> I use hbase client 0.92.1 to get the row, but when Get add all the
>> columns and qualifiers, a filed (f:ts) is always return nothing. If Get
>> not add column mk or ft , it works right. Is it a bug?
>>
>>
>> The schema as follow:
>>
>> <table name="webpage">
>> <family name="p" maxVersions="1"/> <!-- This can also have params
>> like compression, bloom filters -->
>> <family name="f" maxVersions="1"/>
>> <family name="s" maxVersions="1"/>
>> <family name="il" maxVersions="1"/>
>> <family name="ol" maxVersions="1"/>
>> <family name="h" maxVersions="1"/>
>> <family name="mtdt" maxVersions="1"/>
>> <family name="mk" maxVersions="1"/>
>> <family name="ft" maxVersions="1"/>
>> </table>
>> <class table="webpage" keyClass="java.lang.String"
>> name="org.apache.nutch.storage.WebPage">
>>
>> <!-- fetch fields -->
>> <field name="baseUrl" family="f" qualifier="bas"/>
>> <field name="status" family="f" qualifier="st"/>
>> <field name="prevFetchTime" family="f" qualifier="pts"/>
>> <field name="fetchTime" family="f" qualifier="ts"/>
>> <field name="fetchInterval" family="f" qualifier="fi"/>
>> <field name="retriesSinceFetch" family="f" qualifier="rsf"/>
>> <field name="reprUrl" family="f" qualifier="rpr"/>
>> <field name="content" family="f" qualifier="cnt"/>
>> <field name="contentType" family="f" qualifier="typ"/>
>> <field name="protocolStatus" family="f" qualifier="prot"/>
>> <field name="modifiedTime" family="f" qualifier="mod"/>
>> <field name="pageType" family="f" qualifier="ptyp"/>
>> <field name="level" family="f" qualifier="l"/>
>> <field name="lastFetchInterval" family="f" qualifier="lfi"/>
>> <field name="newsTime" family="f" qualifier="nts"/>
>> <field name="findTime" family="f" qualifier="fts"/>
>>
>>
>> <field name="title" family="p" qualifier="t"/>
>> <field name="text" family="p" qualifier="c"/>
>> <field name="parseStatus" family="p" qualifier="st"/>
>> <field name="signature" family="p" qualifier="sig"/>
>> <field name="prevSignature" family="p" qualifier="psig"/>
>>
>> <!-- score fields -->
>> <field name="score" family="s" qualifier="s"/>
>> <field name="headers" family="h"/>
>> <field name="inlinks" family="il"/>
>> <field name="outlinks" family="ol"/>
>> <field name="metadata" family="mtdt"/>
>> <field name="markers" family="mk"/>
>>
>> <field name="features" family="ft"/>
>> </class>
>>
>> {"name": "WebPage",
>> "type": "record",
>> "namespace": "org.apache.nutch.storage",
>> "fields": [
>> {"name": "baseUrl", "type": "string"},
>> {"name": "status", "type": "int"},
>> {"name": "fetchTime", "type": "long"},
>> {"name": "prevFetchTime", "type": "long"},
>> {"name": "fetchInterval", "type": "int"},
>> {"name": "retriesSinceFetch", "type": "int"},
>> {"name": "modifiedTime", "type": "long"},
>> {"name": "protocolStatus", "type": {
>> "name": "ProtocolStatus",
>> "type": "record",
>> "namespace": "org.apache.nutch.storage",
>> "fields": [
>> {"name": "code", "type": "int"},
>> {"name": "args", "type": {"type": "array", "items":
>> "string"}},
>> {"name": "lastModified", "type": "long"}
>> ]
>> }},
>> {"name": "content", "type": "bytes"},
>> {"name": "contentType", "type": "string"},
>> {"name": "prevSignature", "type": "bytes"},
>> {"name": "signature", "type": "bytes"},
>> {"name": "title", "type": "string"},
>> {"name": "text", "type": "string"},
>> {"name": "parseStatus", "type": {
>> "name": "ParseStatus",
>> "type": "record",
>> "namespace": "org.apache.nutch.storage",
>> "fields": [
>> {"name": "majorCode", "type": "int"},
>> {"name": "minorCode", "type": "int"},
>> {"name": "args", "type": {"type": "array", "items":
>> "string"}}
>> ]
>> }},
>> {"name": "score", "type": "float"},
>> {"name": "reprUrl", "type": "string"},
>> {"name": "headers", "type": {"type": "map", "values": "string"}},
>> {"name": "outlinks", "type": {"type": "map", "values": "string"}},
>> {"name": "inlinks", "type": {"type": "map", "values": "string"}},
>> {"name": "markers", "type": {"type": "map", "values": "string"}},
>> {"name": "metadata", "type": {"type": "map", "values": "bytes"}},
>> {"name": "features", "type": {"type": "map", "values": "bytes"}},
>> {"name": "pageType", "type": "int"},
>> {"name": "newsTime", "type": "long"},
>> {"name": "level", "type": "int"},
>> {"name": "lastFetchInterval", "type":"int"},
>> {"name": "findTime", "type":"long"}
>> ]
>> }
>>
>>
>>
>>
>>
>>
>>
>>
>>
>