Xiao Liu created HBASE-30266:
--------------------------------
Summary: Do not drop visibility delete markers as redundant during
minor compaction
Key: HBASE-30266
URL: https://issues.apache.org/jira/browse/HBASE-30266
Project: HBase
Issue Type: Bug
Reporter: Xiao Liu
Assignee: Xiao Liu
Fix For: 2.7.0, 3.0.0-beta-2, 2.5.16, 2.6.7
HBASE-30036 added an optimization to skip redundant delete markers during minor
compaction.
For tables using VisibilityController, VisibilityScanDeleteTracker inherits the
generic ScanDeleteTracker#isRedundantDelete implementation. This check is
visibility-label blind, so a newer DeleteColumn marker with one visibility
expression can make an older DeleteColumn marker for the same qualifier but a
different visibility expression look redundant.
Example:
{code}
Put f:q@40 with visibility A
DeleteColumn f:q@50 with visibility A
DeleteColumn f:q@100 with visibility B
{code}
During minor compaction, the A@50 delete marker may be dropped after seeing
B@100. If the old A@40 put is in another HFile not included in that compaction,
it can become visible again.
Fix by making VisibilityScanDeleteTracker never report visibility delete
markers as redundant.
----
Reproduce steps:
1. add configs in hbase-site.xml
{code}
<property>
<name>hbase.security.authorization</name>
<value>true</value>
</property>
<property>
<name>hbase.coprocessor.master.classes</name>
<value>org.apache.hadoop.hbase.security.visibility.VisibilityController</value>
</property>
<property>
<name>hbase.coprocessor.region.classes</name>
<value>org.apache.hadoop.hbase.security.visibility.VisibilityController</value>
</property>
{code}
2. hbase shell commands:
{code}
# Prepare labels and authorizations.
add_labels ['A', 'B'] rescue nil
set_auths 'liuxiao132', ['A', 'B']
# Clean up the old test table if it exists.
disable 'vis_del' rescue nil
drop 'vis_del' rescue nil
# Create the table.
# Disable normal compaction first, so we can create exactly three HFiles.
create 'vis_del',
{NAME => 'f',
VERSIONS => 5,
NEW_VERSION_BEHAVIOR => 'false',
COMPRESSION => 'NONE',
CONFIGURATION => {
'hbase.hstore.compaction.min' => '1000',
'hbase.hstore.compaction.max' => '2',
'hbase.hstore.compaction.max.size' => '65536'
}}
# Write a large A-visible cell. The large value makes this HFile too large
# to be selected by the later minor compaction.
big = 'a-v40-' + ('x' * 200000)
put 'vis_del', 'r1', 'f:q', big, 40, {VISIBILITY => 'A'}
flush 'vis_del'
# Write an A-visible DeleteColumn marker. This marker should delete A@40.
deleteall 'vis_del', 'r1', 'f:q', 50, {VISIBILITY => 'A'}
flush 'vis_del'
# Write a newer B-visible DeleteColumn marker. It must not make the A marker
redundant.
deleteall 'vis_del', 'r1', 'f:q', 100, {VISIBILITY => 'B'}
flush 'vis_del'
# Before compaction, A@40 should be hidden.
scan 'vis_del', {AUTHORIZATIONS => ['A'], VERSIONS => 5, MAXLENGTH => 80}
# Before compaction, RAW scan should show B@100 delete, A@50 delete, and A@40
put.
scan 'vis_del', {RAW => true, VERSIONS => 10, AUTHORIZATIONS => ['A', 'B'],
MAXLENGTH => 80}
# Re-enable minor compaction for exactly two small files.
disable 'vis_del'
alter 'vis_del',
{NAME => 'f',
CONFIGURATION => {
'hbase.hstore.compaction.min' => '2',
'hbase.hstore.compaction.max' => '2',
'hbase.hstore.compaction.max.size' => '65536'
}}
enable 'vis_del'
# Trigger minor compaction and wait until it finishes.
compact 'vis_del'
while compaction_state('vis_del') != 'NONE'
sleep 1
end
# On the buggy version, A@50 DeleteColumn disappears.
# On the fixed version, A@50 DeleteColumn should still be present.
scan 'vis_del', {RAW => true, VERSIONS => 10, AUTHORIZATIONS => ['A', 'B'],
MAXLENGTH => 80}
# On the buggy version, A@40 becomes visible again.
# On the fixed version, this should return 0 rows.
scan 'vis_del', {AUTHORIZATIONS => ['A'], VERSIONS => 5, MAXLENGTH => 80}
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)