[ 
https://issues.apache.org/jira/browse/HBASE-30160?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18080703#comment-18080703
 ] 

Balazs Meszaros edited comment on HBASE-30160 at 5/13/26 6:11 PM:
------------------------------------------------------------------

And a last try with [hashclash|https://github.com/cr-marcstevens/hashclash]. In 
this test scenario, I created two collisions, the first one for {{table1,f}} 
input, the second one {{table1,p}} input. This collision generation was 
relatively slow, it took around 3 hours.
{noformat}
create 'table1', 'f'
put 'table1', 'a', 'f', 111
put 'table1', 'm', 'f', 222
put 'table1', 'z', 'f', 333

# split around 'f'
split 'table1', 
[102,61,98,132,17,1,117,211,77,235,128,147,222,49,193,217,48,69,251,190,30,113,240,10,99,117,168,48,170,152,23,202,227,162,107,142,61,68,169,143,242,14,103,150,72,0,0,0,0,5,150,198,218,160,218,87,222,25,240,161,133,156,198,0,152,167,150,187,93,139,78,129,122,113,239,204,157,137,33,213,98,241,149,163,234,96,59,9,180,104,203,1,163,13,167,33,78,242,134,110,78,71,187,20,241,71,90,72,184,173,17,225,159,19,5,76,209,2,22,238,206,61,34,103,2,198,112,134,71,75,60,124,197,221,85,223,111,121,217,175,0,156,236,46,230,179,169,71,89,119,15,142,62,239,81,37,37,90,77,83,177,45,143,10,115,129,129,184,45,44,181,137,77,88,216,229,54,86,56,104,123,173,196,68,183,161,155,255,173,191,189,119,253,150,227,192,128,111,116,159,219,250,32,238,46,120,13,94,73,171,210,71,36,170,116,183,88,10,83,147,71,150,60,124,236,248,175,8,32,28,101,64,216,58,51,90,119,133,189,204,126,43,65,34,194,228,214,252,177,83,135,42,51,225,40,58,105,137,152,204,168,18,58,223,83,81,79,66,223,104,100,107,3,204,53,49,8,235,54,162,13,227,171,37,63,4,130,203,208,53,246,153,138,178,68,242,183,19,130,111,0,238,191,241,116,27,120,174,94,128,27,150,54,196,234,83,95,197,243,110,29,48,26,107,154,164,158,91,76,14,72,40,171,196,2,139,50,106,94,44,190,86,247,251,76,110,102,108,211,3,30,92,247,136,125,8,145,34,45,169,15,119,175,171,53,43,75,240,88,78,39,94,223,113,240,84,92,194,167,213,159,215,113,215,53,49,58,30,211,76,131,105,100,134,162,3,98,101,41,22,151,220,178,61,81,173,149,10,171,191,10,198,0,217,232,152,56,74,27,240,123,32,101,251,210,247,231,173,83,139,39,171,128,124,105,73,96,13,213,53,43,184,223,100,101,20,250,141,77,231,33,135,94,246,171,148,191,95,7,134,200,19,77,48,4,182,25,50,48,70,244,0,206,136,32,183,188,226,222,224,187,200,41,242,184,55,226,69,249,67,113,198,208,77,242,66,220,164,135,215,191,83,213,84,255].pack('c*')

# wait some time (~1 min)

# split around 'p'
split 'table1', 
[112,85,215,205,134,229,95,208,131,1,155,77,85,6,97,171,136,17,138,250,77,52,179,117,89,70,86,151,239,108,74,7,144,204,254,25,215,207,111,146,3,156,145,170,165,0,0,0,0,113,185,103,122,25,215,166,111,25,240,161,133,156,198,0,152,167,150,187,93,139,78,129,122,113,239,204,157,137,33,213,98,241,149,163,234,96,59,9,180,104,203,1,163,13,167,33,78,242,134,110,78,71,188,20,241,71,90,72,184,173,17,225,159,19,5,76,209,2,22,238,206,61,34,103,2,198,112,134,71,75,60,124,197,221,85,223,111,121,217,175,0,156,236,46,230,179,169,71,89,119,15,142,62,239,81,37,37,90,77,83,177,45,143,10,115,129,129,184,29,44,181,137,77,88,216,229,54,86,56,104,123,173,196,68,183,161,155,255,173,191,189,119,253,150,227,192,128,111,116,159,219,250,32,238,46,120,13,94,73,171,210,71,36,170,116,183,88,10,83,147,71,150,60,124,236,248,175,8,32,28,101,64,232,58,51,90,119,133,189,204,126,43,65,34,194,228,214,252,177,83,135,42,51,225,40,58,105,137,152,204,168,18,58,223,83,81,79,66,223,104,100,107,3,204,53,49,8,235,54,162,13,227,171,37,63,4,130,203,208,53,246,153,138,178,68,114,184,19,130,111,0,238,191,241,116,27,120,174,94,128,27,150,54,196,234,83,95,197,243,110,29,48,26,107,154,164,158,91,76,14,72,40,171,196,2,139,50,106,94,44,190,86,247,251,76,110,102,108,211,3,30,92,247,136,125,8,145,38,45,169,15,119,175,171,53,43,75,240,88,78,39,94,223,113,240,84,92,194,167,213,159,215,113,215,53,49,58,30,211,76,131,105,100,134,162,3,98,101,41,22,151,220,178,61,81,173,149,10,171,191,10,198,0,217,232,152,56,74,27,240,123,32,105,251,210,247,231,173,83,139,39,171,128,124,105,73,96,13,213,53,43,184,223,100,101,20,250,141,77,231,33,135,94,246,171,148,191,95,7,134,200,19,77,48,4,182,25,50,48,70,244,0,206,136,32,183,188,226,222,224,187,200,41,242,184,55,226,71,249,67,113,198,208,77,242,66,220,164,135,215,191,83,213,84,255].pack('c*')

# split command hangs

# in another shell

scan 'table1'
ROW  COLUMN+CELL
 a column=f:, timestamp=2026-05-13T19:39:53.011, value=111

ERROR: No location found for 'table1', 
row='f=b\x84\x11\x01u\xD3M\xEB\x80\x93\xDE1\xC1\xD90E\xFB\xBE\x1Eq\xF0\x0Acu\xA80\xAA\x98\x17\xCA\xE3\xA2k\x8E=D\xA9\x8F\xF2\x0Eg\x96H\x00\x00\x00\x00\x05\x96\xC6\xDA\xA0\xDAW\xDE\x19\xF0\xA1\x85\x9C\xC6\x00\x98\xA7\x96\xBB]\x8BN\x81zq\xEF\xCC\x9D\x89!\xD5b\xF1\x95\xA3\xEA`;\x09\xB4h\xCB\x01\xA3\x0D\xA7!N\xF2\x86nNG\xBB\x14\xF1GZH\xB8\xAD\x11\xE1\x9F\x13\x05L\xD1\x02\x16\xEE\xCE="g\x02\xC6p\x86GK<|\xC5\xDDU\xDFoy\xD9\xAF\x00\x9C\xEC.\xE6\xB3\xA9GYw\x0F\x8E>\xEFQ%%ZMS\xB1-\x8F\x0As\x81\x81\xB8-,\xB5\x89MX\xD8\xE56V8h{\xAD\xC4D\xB7\xA1\x9B\xFF\xAD\xBF\xBDw\xFD\x96\xE3\xC0\x80ot\x9F\xDB\xFA
 \xEE.x\x0D^I\xAB\xD2G$\xAAt\xB7X\x0AS\x93G\x96<|\xEC\xF8\xAF\x08 
\x1Ce@\xD8:3Zw\x85\xBD\xCC~+A"\xC2\xE4\xD6\xFC\xB1S\x87*3\xE1(:i\x89\x98\xCC\xA8\x12:\xDFSQOB\xDFhdk\x03\xCC51\x08\xEB6\xA2\x0D\xE3\xAB%?\x04\x82\xCB\xD05\xF6\x99\x8A\xB2D\xF2\xB7\x13\x82o\x00\xEE\xBF\xF1t\x1Bx\xAE^\x80\x1B\x966\xC4\xEAS_\xC5\xF3n\x1D0\x1Ak\x9A\xA4\x9E[L\x0EH(\xAB\xC4\x02\x8B2j^,\xBEV\xF7\xFBLnfl\xD3\x03\x1E\x5C\xF7\x88}\x08\x91"-\xA9\x0Fw\xAF\xAB5+K\xF0XN'^\xDFq\xF0T\x5C\xC2\xA7\xD5\x9F\xD7q\xD751:\x1E\xD3L\x83id\x86\xA2\x03be)\x16\x97\xDC\xB2=Q\xAD\x95\x0A\xAB\xBF\x0A\xC6\x00\xD9\xE8\x988J\x1B\xF0{
 
e\xFB\xD2\xF7\xE7\xADS\x8B'\xAB\x80|iI`\x0D\xD55+\xB8\xDFde\x14\xFA\x8DM\xE7!\x87^\xF6\xAB\x94\xBF_\x07\x86\xC8\x13M0\x04\xB6\x1920F\xF4\x00\xCE\x88
 
\xB7\xBC\xE2\xDE\xE0\xBB\xC8)\xF2\xB87\xE2E\xF9Cq\xC6\xD0M\xF2B\xDC\xA4\x87\xD7\xBFS\xD5T\xFF',
 locateType=CURRENT

For usage try 'help "scan"'

Took 8.2906 seconds
{noformat}
I was able to hide {{'m'}} and {{'z'}} rows.


was (Author: balazs.meszaros):
And a last try with [hashclash|https://github.com/cr-marcstevens/hashclash]. In 
this test scenario, I created two collisions, the first one for {{table1,f}} 
input, the second one {{table1,p}} input. This collision generation was 
relatively slow, it took around 3 hours.

{noformat}
create 'table1', 'f'
put 'table1', 'a', 'f', 111
put 'table1', 'm', 'f', 222
put 'table1', 'z', 'f', 333

# split around 'f' => b98ebcf463bef5bb0526dd39a9241d04
split 'table1', 
[102,61,98,132,17,1,117,211,77,235,128,147,222,49,193,217,48,69,251,190,30,113,240,10,99,117,168,48,170,152,23,202,227,162,107,142,61,68,169,143,242,14,103,150,72,0,0,0,0,5,150,198,218,160,218,87,222,25,240,161,133,156,198,0,152,167,150,187,93,139,78,129,122,113,239,204,157,137,33,213,98,241,149,163,234,96,59,9,180,104,203,1,163,13,167,33,78,242,134,110,78,71,187,20,241,71,90,72,184,173,17,225,159,19,5,76,209,2,22,238,206,61,34,103,2,198,112,134,71,75,60,124,197,221,85,223,111,121,217,175,0,156,236,46,230,179,169,71,89,119,15,142,62,239,81,37,37,90,77,83,177,45,143,10,115,129,129,184,45,44,181,137,77,88,216,229,54,86,56,104,123,173,196,68,183,161,155,255,173,191,189,119,253,150,227,192,128,111,116,159,219,250,32,238,46,120,13,94,73,171,210,71,36,170,116,183,88,10,83,147,71,150,60,124,236,248,175,8,32,28,101,64,216,58,51,90,119,133,189,204,126,43,65,34,194,228,214,252,177,83,135,42,51,225,40,58,105,137,152,204,168,18,58,223,83,81,79,66,223,104,100,107,3,204,53,49,8,235,54,162,13,227,171,37,63,4,130,203,208,53,246,153,138,178,68,242,183,19,130,111,0,238,191,241,116,27,120,174,94,128,27,150,54,196,234,83,95,197,243,110,29,48,26,107,154,164,158,91,76,14,72,40,171,196,2,139,50,106,94,44,190,86,247,251,76,110,102,108,211,3,30,92,247,136,125,8,145,34,45,169,15,119,175,171,53,43,75,240,88,78,39,94,223,113,240,84,92,194,167,213,159,215,113,215,53,49,58,30,211,76,131,105,100,134,162,3,98,101,41,22,151,220,178,61,81,173,149,10,171,191,10,198,0,217,232,152,56,74,27,240,123,32,101,251,210,247,231,173,83,139,39,171,128,124,105,73,96,13,213,53,43,184,223,100,101,20,250,141,77,231,33,135,94,246,171,148,191,95,7,134,200,19,77,48,4,182,25,50,48,70,244,0,206,136,32,183,188,226,222,224,187,200,41,242,184,55,226,69,249,67,113,198,208,77,242,66,220,164,135,215,191,83,213,84,255].pack('c*')

# wait some time (~1 min)

# split around 'p'
split 'table1', 
[112,85,215,205,134,229,95,208,131,1,155,77,85,6,97,171,136,17,138,250,77,52,179,117,89,70,86,151,239,108,74,7,144,204,254,25,215,207,111,146,3,156,145,170,165,0,0,0,0,113,185,103,122,25,215,166,111,25,240,161,133,156,198,0,152,167,150,187,93,139,78,129,122,113,239,204,157,137,33,213,98,241,149,163,234,96,59,9,180,104,203,1,163,13,167,33,78,242,134,110,78,71,188,20,241,71,90,72,184,173,17,225,159,19,5,76,209,2,22,238,206,61,34,103,2,198,112,134,71,75,60,124,197,221,85,223,111,121,217,175,0,156,236,46,230,179,169,71,89,119,15,142,62,239,81,37,37,90,77,83,177,45,143,10,115,129,129,184,29,44,181,137,77,88,216,229,54,86,56,104,123,173,196,68,183,161,155,255,173,191,189,119,253,150,227,192,128,111,116,159,219,250,32,238,46,120,13,94,73,171,210,71,36,170,116,183,88,10,83,147,71,150,60,124,236,248,175,8,32,28,101,64,232,58,51,90,119,133,189,204,126,43,65,34,194,228,214,252,177,83,135,42,51,225,40,58,105,137,152,204,168,18,58,223,83,81,79,66,223,104,100,107,3,204,53,49,8,235,54,162,13,227,171,37,63,4,130,203,208,53,246,153,138,178,68,114,184,19,130,111,0,238,191,241,116,27,120,174,94,128,27,150,54,196,234,83,95,197,243,110,29,48,26,107,154,164,158,91,76,14,72,40,171,196,2,139,50,106,94,44,190,86,247,251,76,110,102,108,211,3,30,92,247,136,125,8,145,38,45,169,15,119,175,171,53,43,75,240,88,78,39,94,223,113,240,84,92,194,167,213,159,215,113,215,53,49,58,30,211,76,131,105,100,134,162,3,98,101,41,22,151,220,178,61,81,173,149,10,171,191,10,198,0,217,232,152,56,74,27,240,123,32,105,251,210,247,231,173,83,139,39,171,128,124,105,73,96,13,213,53,43,184,223,100,101,20,250,141,77,231,33,135,94,246,171,148,191,95,7,134,200,19,77,48,4,182,25,50,48,70,244,0,206,136,32,183,188,226,222,224,187,200,41,242,184,55,226,71,249,67,113,198,208,77,242,66,220,164,135,215,191,83,213,84,255].pack('c*')

# split command hangs

# in another shell

scan 'table1'
ROW  COLUMN+CELL
 a column=f:, timestamp=2026-05-13T19:39:53.011, value=111

ERROR: No location found for 'table1', 
row='f=b\x84\x11\x01u\xD3M\xEB\x80\x93\xDE1\xC1\xD90E\xFB\xBE\x1Eq\xF0\x0Acu\xA80\xAA\x98\x17\xCA\xE3\xA2k\x8E=D\xA9\x8F\xF2\x0Eg\x96H\x00\x00\x00\x00\x05\x96\xC6\xDA\xA0\xDAW\xDE\x19\xF0\xA1\x85\x9C\xC6\x00\x98\xA7\x96\xBB]\x8BN\x81zq\xEF\xCC\x9D\x89!\xD5b\xF1\x95\xA3\xEA`;\x09\xB4h\xCB\x01\xA3\x0D\xA7!N\xF2\x86nNG\xBB\x14\xF1GZH\xB8\xAD\x11\xE1\x9F\x13\x05L\xD1\x02\x16\xEE\xCE="g\x02\xC6p\x86GK<|\xC5\xDDU\xDFoy\xD9\xAF\x00\x9C\xEC.\xE6\xB3\xA9GYw\x0F\x8E>\xEFQ%%ZMS\xB1-\x8F\x0As\x81\x81\xB8-,\xB5\x89MX\xD8\xE56V8h{\xAD\xC4D\xB7\xA1\x9B\xFF\xAD\xBF\xBDw\xFD\x96\xE3\xC0\x80ot\x9F\xDB\xFA
 \xEE.x\x0D^I\xAB\xD2G$\xAAt\xB7X\x0AS\x93G\x96<|\xEC\xF8\xAF\x08 
\x1Ce@\xD8:3Zw\x85\xBD\xCC~+A"\xC2\xE4\xD6\xFC\xB1S\x87*3\xE1(:i\x89\x98\xCC\xA8\x12:\xDFSQOB\xDFhdk\x03\xCC51\x08\xEB6\xA2\x0D\xE3\xAB%?\x04\x82\xCB\xD05\xF6\x99\x8A\xB2D\xF2\xB7\x13\x82o\x00\xEE\xBF\xF1t\x1Bx\xAE^\x80\x1B\x966\xC4\xEAS_\xC5\xF3n\x1D0\x1Ak\x9A\xA4\x9E[L\x0EH(\xAB\xC4\x02\x8B2j^,\xBEV\xF7\xFBLnfl\xD3\x03\x1E\x5C\xF7\x88}\x08\x91"-\xA9\x0Fw\xAF\xAB5+K\xF0XN'^\xDFq\xF0T\x5C\xC2\xA7\xD5\x9F\xD7q\xD751:\x1E\xD3L\x83id\x86\xA2\x03be)\x16\x97\xDC\xB2=Q\xAD\x95\x0A\xAB\xBF\x0A\xC6\x00\xD9\xE8\x988J\x1B\xF0{
 
e\xFB\xD2\xF7\xE7\xADS\x8B'\xAB\x80|iI`\x0D\xD55+\xB8\xDFde\x14\xFA\x8DM\xE7!\x87^\xF6\xAB\x94\xBF_\x07\x86\xC8\x13M0\x04\xB6\x1920F\xF4\x00\xCE\x88
 
\xB7\xBC\xE2\xDE\xE0\xBB\xC8)\xF2\xB87\xE2E\xF9Cq\xC6\xD0M\xF2B\xDC\xA4\x87\xD7\xBFS\xD5T\xFF',
 locateType=CURRENT

For usage try 'help "scan"'

Took 8.2906 seconds
{noformat}

I was able to hide {{'m'}} and {{'z'}} rows.

> Prevent region creation if the encoded region names are the same
> ----------------------------------------------------------------
>
>                 Key: HBASE-30160
>                 URL: https://issues.apache.org/jira/browse/HBASE-30160
>             Project: HBase
>          Issue Type: Sub-task
>            Reporter: Balazs Meszaros
>            Priority: Major
>
> HBase region names are hashed like this: MD5(tableName,startKey,...). With a 
> special startKey we can create collisions easily, like this:
> {noformat}
> hbase:001:0> create 'table1', 'f', SPLITS => 
> ["\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00^B\xb9\x99\xdb\xb7\x98W\xfa\xa1\xe0\xf1\xbc\x09h]1S[&u*\x93\xa1&RzF\x87\x9e\x970\x84\xe5\xb9\xe3ln*l\x07\x0c\xef\x03\x96Q\xbdC!\xb1\xdec-\xfb+\x11\x83h\xc1\xbe$\x1f\xae\x95\xaf\xd3W\x07\x8a\x01\xfa\xf1\xba\x83\x8c}\xa5A1\x83\xae\xae\xf8\xe6\xf9\xe5F\xa7\xc9\x1a\xfeM\xec\x07\xdem\x0em\x9e\x97\xf4\x16\x08\x94\xa8\x8a87\x07\xb5v\xac\xe7\x07\x10\x22\xfc\xb9\x1fm\xbd\x13V\xa9\xedX\xf0\xb1",
>  
> "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00^B\xb9\x99\xdb\xb7\x98W\xfa\xa1\xe0\xf1\xbc\x09h]1S[\xa6u*\x93\xa1&RzF\x87\x9e\x970\x84\xe5\xb9\xe3ln*l\x07\x0c\xef\x03\x96\xd1\xbcC!\xb1\xdec-\xfb+\x11\x83h\xc1>$\x1f\xae\x95\xaf\xd3W\x07\x8a\x01\xfa\xf1\xba\x83\x8c}\xa5A1\x83\xae\xae\xf8f\xf9\xe5F\xa7\xc9\x1a\xfeM\xec\x07\xdem\x0em\x9e\x97\xf4\x16\x08\x94\xa8\x8a87\x075w\xac\xe7\x07\x10\x22\xfc\xb9\x1fm\xbd\x13V)\xedX\xf0\xb1"]
> ERROR: The procedure 9 is still running
> For usage try 'help "create"'
> Took 608.8101 seconds
> {noformat}
> The table creation fails, because hashes are the same:
> {noformat}
> 2026-05-13 09:34:23,762 INFO  org.apache.hadoop.hbase.regionserver.HRegion: 
> [RegionOpenAndInit-table1-pool-2]: creating {ENCODED => 
> 647314dfe2b7e604e08fd7fd3fec44fc, NAME => 'table1,...
> 2026-05-13 09:34:23,764 INFO  org.apache.hadoop.hbase.regionserver.HRegion: 
> [RegionOpenAndInit-table1-pool-1]: creating {ENCODED => 
> 647314dfe2b7e604e08fd7fd3fec44fc, NAME => 'table1,...
> 2026-05-13 09:34:23,772 WARN  org.apache.hadoop.hdfs.DataStreamer: 
> [Thread-140]: DataStreamer Exception
> java.io.FileNotFoundException: File does not exist: 
> /hbase/data/default/table1/647314dfe2b7e604e08fd7fd3fec44fc/.regioninfo 
> (inode 16653) [Lease.  Holder: DFSClient_NONMAPREDUCE_1353520776_1, pending 
> creates: 3]
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:3194)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.analyzeFileState(FSDirWriteFileOp.java:609)
> ...
> {noformat}
> The procedure never finishes and prohibits further creation of {{table1}}.
> This issue should be triggered with splitting the table twice:
> {noformat}
> split 'table1', 'malicious-key1'
> split 'table1', 'malicious-key2'
> {noformat}
> It would be hard to change MD5 to something else, but we should handle these 
> collisions better. We should check if the region hashes are the same and fail 
> immediately. Under normal circumstances, the chance of a collision with 
> automatic splitting is very-very-low.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to