Repository: kudu Updated Branches: refs/heads/master 8613415b2 -> 4923a74d7
KUDU-1508: script for testing presence of bug and finding upper bounds This patch introduces a script that tests for the presence of KUDU-1508 and establishes a "safe" upper bound on the number of extents an ext4 file can have before it becomes vulnerable to the bug. The upper bound is actually a function of the filesystem's block size, since that's the granularity on which hole punching operates. When run on my laptop (Ubuntu 16.04): This kernel is not vulnerable to KUDU-1508, skipping test When run on ve0518 (an el6.6 box): This kernel is vulnerable to KUDU-1508, finding per-block size upper bounds Block size 1024: searching for block number upper bound (MIN=0,MAX=16384) Block size 1024: 8192 bad (MIN=0,MAX=16384) Block size 1024: 4095 bad (MIN=0,MAX=8191) Block size 1024: 2047 bad (MIN=0,MAX=4094) Block size 1024: 1023 bad (MIN=0,MAX=2046) Block size 1024: 511 good (MIN=0,MAX=1022) Block size 1024: 767 bad (MIN=512,MAX=1022) Block size 1024: 639 good (MIN=512,MAX=766) Block size 1024: 703 bad (MIN=640,MAX=766) Block size 1024: 671 good (MIN=640,MAX=702) Block size 1024: 687 bad (MIN=672,MAX=702) Block size 1024: 679 bad (MIN=672,MAX=686) Block size 1024: 675 bad (MIN=672,MAX=678) Block size 1024: 673 good (MIN=672,MAX=674) Block size 1024: upper bound found at 673 Block size 2048: searching for block number upper bound (MIN=0,MAX=16384) Block size 2048: 8192 bad (MIN=0,MAX=16384) Block size 2048: 4095 bad (MIN=0,MAX=8191) Block size 2048: 2047 bad (MIN=0,MAX=4094) Block size 2048: 1023 good (MIN=0,MAX=2046) Block size 2048: 1535 bad (MIN=1024,MAX=2046) Block size 2048: 1279 good (MIN=1024,MAX=1534) Block size 2048: 1407 bad (MIN=1280,MAX=1534) Block size 2048: 1343 good (MIN=1280,MAX=1406) Block size 2048: 1375 bad (MIN=1344,MAX=1406) Block size 2048: 1359 bad (MIN=1344,MAX=1374) Block size 2048: 1351 good (MIN=1344,MAX=1358) Block size 2048: 1355 bad (MIN=1352,MAX=1358) Block size 2048: 1353 good (MIN=1352,MAX=1354) Block size 2048: upper bound found at 1353 Block size 4096: searching for block number upper bound (MIN=0,MAX=16384) Block size 4096: 8192 bad (MIN=0,MAX=16384) Block size 4096: 4095 bad (MIN=0,MAX=8191) Block size 4096: 2047 good (MIN=0,MAX=4094) Block size 4096: 3071 bad (MIN=2048,MAX=4094) Block size 4096: 2559 good (MIN=2048,MAX=3070) Block size 4096: 2815 bad (MIN=2560,MAX=3070) Block size 4096: 2687 good (MIN=2560,MAX=2814) Block size 4096: 2751 bad (MIN=2688,MAX=2814) Block size 4096: 2719 good (MIN=2688,MAX=2750) Block size 4096: 2735 bad (MIN=2720,MAX=2750) Block size 4096: 2727 bad (MIN=2720,MAX=2734) Block size 4096: 2723 bad (MIN=2720,MAX=2726) Block size 4096: 2721 good (MIN=2720,MAX=2722) Block size 4096: upper bound found at 2721 Change-Id: I710918a153a9e8e05e989fe63281891c9ebc7178 Reviewed-on: http://gerrit.cloudera.org:8080/4730 Tested-by: Kudu Jenkins Reviewed-by: Todd Lipcon <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4923a74d Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4923a74d Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4923a74d Branch: refs/heads/master Commit: 4923a74d768e903761ebf96cd5ef5f1286dafca4 Parents: 8613415 Author: Adar Dembo <[email protected]> Authored: Fri Oct 14 18:51:29 2016 -0700 Committer: Adar Dembo <[email protected]> Committed: Tue Oct 18 01:04:21 2016 +0000 ---------------------------------------------------------------------- .../experiments/KUDU-1508/hole_punch_range.c | 74 +++++++++ src/kudu/experiments/KUDU-1508/run_test.sh | 160 +++++++++++++++++++ 2 files changed, 234 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/4923a74d/src/kudu/experiments/KUDU-1508/hole_punch_range.c ---------------------------------------------------------------------- diff --git a/src/kudu/experiments/KUDU-1508/hole_punch_range.c b/src/kudu/experiments/KUDU-1508/hole_punch_range.c new file mode 100644 index 0000000..442ad61 --- /dev/null +++ b/src/kudu/experiments/KUDU-1508/hole_punch_range.c @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// For fallocate(2) +#define _GNU_SOURCE + +#include <fcntl.h> +#include <linux/falloc.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +int main(int argc, char** argv) { + if (argc != 5) { + fprintf(stderr, "usage: %s <path> <start block> <end block> <stride>\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "Punches holes in an existing file designated by <path>.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Holes are punched on a per-filesystem block basis, " + "beginning at <start block> and ending at <end block> " + "(exclusive). Blocks can be skipped with <stride>; a value " + "of 1 means every block in the range will be punched.\n"); + exit(1); + } + + int start_block = atoi(argv[2]); + int end_block = atoi(argv[3]); + int stride = atoi(argv[4]); + + int fd = open(argv[1], O_WRONLY, 0644); + if (fd < 0) { + perror("open"); + return fd; + } + + struct stat sbuf; + int ret = fstat(fd, &sbuf); + if (ret < 0) { + perror("fstat"); + return ret; + } + + int block_num; + for (block_num = start_block; block_num < end_block; block_num += stride) { + ret = fallocate(fd, + FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + block_num * sbuf.st_blksize, + sbuf.st_blksize); + if (ret < 0) { + perror("fallocate"); + return ret; + } + } + + fsync(fd); + close(fd); +} http://git-wip-us.apache.org/repos/asf/kudu/blob/4923a74d/src/kudu/experiments/KUDU-1508/run_test.sh ---------------------------------------------------------------------- diff --git a/src/kudu/experiments/KUDU-1508/run_test.sh b/src/kudu/experiments/KUDU-1508/run_test.sh new file mode 100755 index 0000000..df77759 --- /dev/null +++ b/src/kudu/experiments/KUDU-1508/run_test.sh @@ -0,0 +1,160 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script tests for KUDU-1508 in the local kernel's implementation of ext4. +# To use it, you must compile hole_punch_range.c and make it available in the +# current working directory. +# +# This script must either be run as root, or as a user with sudo. + +# Configurable options. +BLOCK_DEVICE_FILE=block +MOUNT_DIR=mnt +TEST_FILE=test +BLOCK_SIZES="1024 2048 4096" + +# Run a command given in the first argument. +# +# If the second argument is set and the command fails, the exit code is +# returned and the command output is squelched. +# +# If the second argument is unset and the command fails, the command output is +# emitted and the script exits. +function run() { + local CMD="$1" + local FAIL_OK="$2" + local OUT RETVAL + + OUT=$($CMD 2>&1) + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + if [ -n "$FAIL_OK" ]; then + return $RETVAL + fi + + echo "Command failed: $CMD" + echo "Result: $OUT" + exit $RETVAL + fi +} + +# Run an iteration of the test in a local test-only filesystem. +# +# Returns whether the filesystem was corrupted. +function run_test() { + local BLOCK_SIZE=$1 + local BLOCK_NUM=$2 + + local FILESYSTEM_SIZE=$(($BLOCK_SIZE * $BLOCK_NUM * 4)) + local TEST_PATH=$MOUNT_DIR/$TEST_FILE + local FSCK_RESULT + local INTERIOR_NODE_LAST_BLOCK + + # Clean up in preparation. + run "sudo umount $MOUNT_DIR" fail_ok + run "mkdir -p $MOUNT_DIR" + run "rm -f $BLOCK_DEVICE_FILE" + + # Create the test filesystem and file. + # + # The 'sync' at the end speeds up subsequent hole punching. + run "fallocate --length $FILESYSTEM_SIZE $BLOCK_DEVICE_FILE" + run "mkfs -F -t ext4 -b $BLOCK_SIZE $BLOCK_DEVICE_FILE" + run "sudo mount -o loop $BLOCK_DEVICE_FILE $MOUNT_DIR" + run "sudo chown $EUID $MOUNT_DIR" + run "dd if=/dev/zero of=$TEST_PATH conv=notrunc bs=$BLOCK_SIZE count=$BLOCK_NUM" + run "sync" + + # Maximize the number of extents in the file by punching holes in every other + # block. + # + # The 'sync' at the end makes the extent tree visible to debugfs. + run "./hole_punch_range $TEST_PATH 0 $BLOCK_SIZE 2" + run "sync" + + # Determine the block number of the last block in the first level 1 interior + # node in the file's extent tree. + while read LINE; do + # Find the node. Its debugfs line will look something like this: + # + # 1/ 2 1/340 1 - 680 163969 680 + # + # The last number in the line is the number of blocks pointed to by the + # node, and since we're looking at the first level 1 interior node, it's + # also the last block number pointed to by that node. + if echo $LINE | grep -q "[[:space:]]*1/[[:space:]]*[[:digit:]]*[[:space:]]*1/"; then + INTERIOR_NODE_LAST_BLOCK=$(echo $LINE | awk '{ print $NF }') + break + fi + done < <(debugfs -R "dump_extents -n $TEST_FILE" $BLOCK_DEVICE_FILE 2> /dev/null) + + # If we can't find the last block number, the file does not have an extent + # tree with at least two interior levels, and the subsequent corruption test + # can be skipped. + if [ -n "$INTERIOR_NODE_LAST_BLOCK" ]; then + # Try to corrupt the filesystem by punching out all of the remaining blocks + # belonging to this interior node. + # + # If the filesystem is vulnerable to KUDU-1508, it'll fail to update the + # interior node's parent, corrupting the filesystem until the next fsck. + run "./hole_punch_range $TEST_PATH 1 $INTERIOR_NODE_LAST_BLOCK 2" + fi + + run "sudo umount $MOUNT_DIR" + + # Test the filesystem for corruption and return the result. + run "fsck.ext4 $BLOCK_DEVICE_FILE -f -n" fail_ok + FSCK_RESULT=$? + + # Clean up after the test. + run "rmdir $MOUNT_DIR" + run "rm -f $BLOCK_DEVICE_FILE" + + return $FSCK_RESULT +} + +# Run the test with known bad parameters to see if this kernel is worth testing. +run_test 4096 $((16 * 1024)) +if [[ $? -eq 0 ]]; then + echo "This kernel is not vulnerable to KUDU-1508, skipping test" + exit 0 +fi + +echo "This kernel is vulnerable to KUDU-1508, finding per-block size upper bounds" + +# Now figure out, for each block size, what the max number of blocks is. +for BLOCK_SIZE in $BLOCK_SIZES; do + MIN=0 + MAX=$((1024 * 16)) + CUR= + echo "Block size $BLOCK_SIZE: searching for block number upper bound (MIN=$MIN,MAX=$MAX)" + while [[ $MIN -lt $MAX ]] ; do + CUR=$((($MAX + $MIN) / 2)) + run_test $BLOCK_SIZE $CUR + if [[ $? -eq 0 ]]; then + echo "Block size $BLOCK_SIZE: $CUR good (MIN=$MIN,MAX=$MAX)" + MIN=$(($CUR + 1)) + else + echo "Block size $BLOCK_SIZE: $CUR bad (MIN=$MIN,MAX=$MAX)" + MAX=$(($CUR - 1)) + fi + done + + echo "Block size $BLOCK_SIZE: upper bound found at $CUR" +done
