One of the build.conf changes applies to __ALL_TESTS__, not to libs. (There is no such section).
I’m not sure why we add tools that are not used by the test tools to the __ALL_TESTS__ target? Do you intend to run it from the test suite. (On which platforms?) Bert *From:* [email protected] *Sent:* December 25, 2012 11:22 PM *To:* [email protected] *Subject:* svn commit: r1425778 - in /subversion/trunk: build.conf tools/dev/fsfs-access-map.c Author: stefan2 Date: Tue Dec 25 22:22:07 2012 New Revision: 1425778 URL: http://svn.apache.org/viewvc?rev=1425778&view=rev Log: A new developer tool: take an strace from some fsfs-based operation and create a read access heat map for it. The latter gets written as BMP file along with a few stats. This allows to visualize the efficiency (or lack thereof) of our FS usage. * build.conf (libs): add new dependency (fsfs-access-map): new tool * tools/dev/fsfs-access-map.c (): new tool Added: subversion/trunk/tools/dev/fsfs-access-map.c Modified: subversion/trunk/build.conf Modified: subversion/trunk/build.conf URL: http://svn.apache.org/viewvc/subversion/trunk/build.conf?rev=1425778&r1=1425777&r2=1425778&view=diff ============================================================================== --- subversion/trunk/build.conf (original) +++ subversion/trunk/build.conf Tue Dec 25 22:22:07 2012 @@ -1224,7 +1224,7 @@ libs = __ALL__ ra-local-test svndiff-test vdelta-test entries-dump atomic-ra-revprop-change wc-lock-tester wc-incomplete-tester - diff diff3 diff4 fsfs-reorg fsfs-stats svn-bench + diff diff3 diff4 fsfs-reorg fsfs-stats fsfs-access-map svn-bench client-test conflict-data-test db-test pristine-store-test entries-compat-test op-depth-test dirent_uri-test wc-queries-test wc-test @@ -1287,6 +1287,13 @@ sources = fsfs-stats.c install = tools libs = libsvn_delta libsvn_subr apr +[fsfs-access-map] +type = exe +path = tools/dev +sources = fsfs-access-map.c +install = tools +libs = libsvn_subr apr + [diff] type = exe path = tools/diff Added: subversion/trunk/tools/dev/fsfs-access-map.c URL: http://svn.apache.org/viewvc/subversion/trunk/tools/dev/fsfs-access-map.c?rev=1425778&view=auto ============================================================================== --- subversion/trunk/tools/dev/fsfs-access-map.c (added) +++ subversion/trunk/tools/dev/fsfs-access-map.c Tue Dec 25 22:22:07 2012 @@ -0,0 +1,604 @@ +/* fsfs-access-map.c -- convert strace output into FSFS access bitmap + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include "svn_pools.h" +#include "svn_string.h" +#include "svn_io.h" + +#include "private/svn_string_private.h" + +/* The information we gather for each file. There will be one instance + * per file name - even if the file got deleted and re-created. + */ +typedef struct file_stats_t +{ + /* file name as found in the open() call */ + const char *name; + + /* file size as determined during the this tool run. Will be 0 for + * files that no longer exist. However, there may still be entries + * in the read_map. */ + apr_int64_t size; + + /* for rev files (packed or non-packed), this will be the first revision + * that file. -1 for non-rev files. */ + apr_int64_t rev_num; + + /* number of times this file got opened */ + apr_int64_t open_count; + + /* number of lseek counts */ + apr_int64_t seek_count; + + /* number of lseek calls to clusters not previously read */ + apr_int64_t uncached_seek_count; + + /* number of read() calls */ + apr_int64_t read_count; + + /* total number of bytes returned by those reads */ + apr_int64_t read_size; + + /* number of clusters read */ + apr_int64_t clusters_read; + + /* number of different clusters read + * (i.e. number of non-zero entries in read_map). */ + apr_int64_t unique_clusters_read; + + /* cluster -> read count mapping (1 byte per cluster, saturated at 255) */ + apr_array_header_t *read_map; + +} file_stats_t; + +/* Represents an open file handle. It refers to a file and concatenates + * consecutive reads such that we don't artificially hit the same cluster + * multiple times. Instances of this type will be reused to limit the + * allocation load on the lookup map. + */ +typedef struct handle_info_t +{ + /* the open file */ + file_stats_t *file; + + /* file offset at which the current series of reads started (default: 0) */ + apr_int64_t last_read_start; + + /* bytes read so far in the current series of reads started (default: 0) */ + apr_int64_t last_read_size; + +} handle_info_t; + +/* useful typedef */ +typedef unsigned char byte; + +/* global const char * file name -> *file_info_t map */ +static apr_hash_t *files = NULL; + +/* global int handle -> *handle_info_t map. Entries don't get removed + * by close(). Instead, we simply recycle (and re-initilize) existing + * instances. */ +static apr_hash_t *handles = NULL; + +/* assume cluster size. 64 and 128kB are typical values for RAIDs. */ +static apr_int64_t cluster_size = 64 * 1024; + +/* Call this after a sequence of reads has been ended by either close() + * or lseek() for this HANDLE_INFO. This will update the read_map and + * unique_clusters_read members of the underlying file_info_t structure. + */ +static void +store_read_info(handle_info_t *handle_info) +{ + if (handle_info->last_read_size) + { + apr_size_t i; + apr_size_t first_cluster + = (apr_size_t)(handle_info->last_read_start / cluster_size); + apr_size_t last_cluster + = (apr_size_t)(( handle_info->last_read_start + + handle_info->last_read_size + - 1) / cluster_size); + + /* auto-expand access map in case the file later shrunk or got deleted */ + while (handle_info->file->read_map->nelts <= last_cluster) + APR_ARRAY_PUSH(handle_info->file->read_map, byte) = 0; + + /* accumulate the accesses per cluster. Saturate and count first + * (i.e. disjoint) accesses clusters */ + handle_info->file->clusters_read += last_cluster - first_cluster + 1; + for (i = first_cluster; i <= last_cluster; ++i) + { + byte *count = &APR_ARRAY_IDX(handle_info->file->read_map, i, byte); + if (*count == 0) + handle_info->file->unique_clusters_read++; + if (*count < 255) + ++*count; + } + } +} + +/* Handle a open() call. Ensures that a file_info_t for the given NAME + * exists. Auto-create and initialize a handle_info_t for it linked to + * HANDLE. + */ +static void +open_file(const char *name, int handle) +{ + file_stats_t *file = apr_hash_get(files, name, APR_HASH_KEY_STRING); + handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle)); + + /* auto-create file info */ + if (!file) + { + apr_pool_t *pool = apr_hash_pool_get(files); + apr_pool_t *sub_pool = svn_pool_create(pool); + + apr_file_t *apr_file = NULL; + apr_finfo_t finfo = { 0 }; + apr_size_t cluster_count = 0; + + /* determine file size (if file still exists) */ + apr_file_open(&apr_file, name, + APR_READ | APR_BUFFERED, APR_OS_DEFAULT, sub_pool); + if (apr_file) + apr_file_info_get(&finfo, APR_FINFO_SIZE, apr_file); + apr_pool_destroy(sub_pool); + + file = apr_pcalloc(pool, sizeof(*file)); + file->name = apr_pstrdup(pool, name); + file->size = finfo.size; + + /* pre-allocate cluster map accordingly + * (will be auto-expanded later if necessary) */ + cluster_count = (apr_size_t)(1 + (file->size - 1) / cluster_size); + file->read_map = apr_array_make(pool, file->size + ? cluster_count + : 1, sizeof(byte)); + + while (file->read_map->nelts < cluster_count) + APR_ARRAY_PUSH(file->read_map, byte) = 0; + + /* determine first revision of rev / packed rev files */ + if (strstr(name, "/db/revs/") != NULL && strstr(name, "manifest") == NULL) + if (strstr(name, ".pack/pack") != NULL) + file->rev_num = atoi(strstr(name, "/db/revs/") + 9); + else + file->rev_num = atoi(strrchr(name, '/') + 1); + else + file->rev_num = -1; + + apr_hash_set(files, file->name, APR_HASH_KEY_STRING, file); + } + + file->open_count++; + + /* auto-create handle instance */ + if (!handle_info) + { + apr_pool_t *pool = apr_hash_pool_get(handles); + int *key = apr_palloc(pool, sizeof(*key)); + *key = handle; + + handle_info = apr_pcalloc(pool, sizeof(*handle_info)); + apr_hash_set(handles, key, sizeof(handle), handle_info); + } + + /* link handle to file */ + handle_info->file = file; + handle_info->last_read_start = 0; + handle_info->last_read_size = 0; +} + +/* COUNT bytes have been read from file with the given HANDLE. + */ +static void +read_file(int handle, apr_int64_t count) +{ + handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle)); + if (handle_info) + { + /* known file handle -> expand current read sequence */ + + handle_info->last_read_size += count; + handle_info->file->read_count++; + handle_info->file->read_size += count; + } +} + +/* Seek to offset LOCATION in file given by HANDLE. + */ +static void +seek_file(int handle, apr_int64_t location) +{ + handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle)); + if (handle_info) + { + /* known file handle -> end current read sequence and start a new one */ + + apr_size_t cluster = (apr_size_t)(location / cluster_size); + + store_read_info(handle_info); + + handle_info->last_read_size = 0; + handle_info->last_read_start = location; + handle_info->file->seek_count++; + + /* if we seek to a location that had not been read from before, + * there will probably be a real I/O seek on the following read. + */ + if ( handle_info->file->read_map->nelts <= cluster + || APR_ARRAY_IDX(handle_info->file->read_map, cluster, byte) == 0) + handle_info->file->uncached_seek_count++; + } +} + +/* The given file HANDLE has been closed. + */ +static void +close_file(int handle) +{ + /* for known file handles, end current read sequence */ + + handle_info_t *handle_info = apr_hash_get(handles, &handle, sizeof(handle)); + if (handle_info) + store_read_info(handle_info); +} + +/* Parse / process non-empty the LINE from an strace output. + */ +static void +parse_line(svn_stringbuf_t *line) +{ + /* determine function name, first parameter and return value */ + char *func_end = strchr(line->data, '('); + char *return_value = strrchr(line->data, ' '); + + char *first_param_end = strchr(func_end, ','); + if (first_param_end == NULL) + first_param_end = strchr(func_end, ')'); + + *func_end++ = 0; + *first_param_end = 0; + ++return_value; + + /* process those operations that we care about */ + if (strcmp(line->data, "open") == 0) + { + /* remove double quotes from file name parameter */ + *func_end++ = 0; + *--first_param_end = 0; + + open_file(func_end, atoi(return_value)); + } + else if (strcmp(line->data, "read") == 0) + read_file(atoi(func_end), atoi(return_value)); + else if (strcmp(line->data, "lseek") == 0) + seek_file(atoi(func_end), atoi(return_value)); + else if (strcmp(line->data, "close") == 0) + close_file(atoi(func_end)); +} + +/* Process the strace output stored in FILE. + */ +static void +parse_file(apr_file_t *file) +{ + apr_pool_t *pool = svn_pool_create(NULL); + apr_pool_t *iter_pool = svn_pool_create(pool); + + /* limit lines to 4k (usually, we need less than 200 bytes) */ + svn_stringbuf_t *line = svn_stringbuf_create_ensure(4096, pool); + + do + { + svn_error_t *err = NULL; + + line->len = line->blocksize-1; + err = svn_io_read_length_line(file, line->data, &line->len, iter_pool); + svn_error_clear(err); + if (err) + break; + + parse_line(line); + apr_pool_clear(iter_pool); + } + while (line->len > 0); +} + +/* qsort() callback. Sort files by revision number. + */ +static int +compare_files(file_stats_t **lhs, file_stats_t **rhs) +{ + return (*lhs)->rev_num < (*rhs)->rev_num; +} + +/* Return all rev (and packed rev) files sorted by revision number. + * Allocate the result in POOL. + */ +static apr_array_header_t * +get_rev_files(apr_pool_t *pool) +{ + apr_hash_index_t *hi; + apr_array_header_t *result = apr_array_make(pool, + apr_hash_count(files), + sizeof(file_stats_t *)); + + /* select all files that have a rev number */ + for (hi = apr_hash_first(pool, files); hi; hi = apr_hash_next(hi)) + { + const char *name = NULL; + apr_ssize_t len = 0; + file_stats_t *file = NULL; + + apr_hash_this(hi, (const void **)&name, &len, (void**)&file); + if (file->rev_num >= 0) + APR_ARRAY_PUSH(result, file_stats_t *) = file; + } + + /* sort them */ + qsort(result->elts, result->nelts, result->elt_size, + (int (*)(const void *, const void *))compare_files); + + /* return the result */ + return result; +} + +/* store VALUE to DEST in little-endian format. Assume that the target + * buffer is filled with 0. + */ +static void +write_number(byte *dest, int value) +{ + while (value) + { + *dest = (byte)(value % 256); + value /= 256; + ++dest; + } +} + +/* Return a linearly interpolated y value for X with X0 <= X <= X1 and + * the corresponding Y0 and Y1 values. + */ +static int +interpolate(int y0, int x0, int y1, int x1, int x) +{ + return y0 + ((y1 - y0) * (x - x0)) / (x1 - x0); +} + +/* Return the BMP-encoded 24 bit COLOR for the given value. + */ +static void +select_color(byte color[3], byte value) +{ + /* value -> color table. Missing values get interpolated. + * { count, B - G - R } */ + byte table[7][4] = + { + { 0, 255, 255, 255 }, /* unread -> white */ + { 1, 128, 128, 0 }, /* read once -> turquoise */ + { 2, 0, 128, 0 }, /* twice -> green */ + { 4, 0, 192, 192 }, /* 4x -> yellow */ + { 16, 0, 0, 192 }, /* 16x -> red */ + { 64, 192, 0, 128 }, /* 64x -> purple */ + { 255, 0, 0, 0 } /* max -> black */ + }; + + /* find upper limit entry for value */ + int i; + for (i = 0; i < 7; ++i) + if (table[i][0] >= value) + break; + + /* exact match? */ + if (table[i][0] == value) + { + color[0] = table[i][1]; + color[1] = table[i][2]; + color[2] = table[i][3]; + } + else + { + /* interpolate */ + color[0] = (byte)interpolate(table[i-1][1], table[i-1][0], + table[i][1], table[i][0], + value); + color[1] = (byte)interpolate(table[i-1][2], table[i-1][0], + table[i][2], table[i][0], + value); + color[2] = (byte)interpolate(table[i-1][3], table[i-1][0], + table[i][3], table[i][0], + value); + } +} + +/* write the cluster read map for all files in INFO as BMP image to FILE. + */ +static void +write_bitmap(apr_array_header_t *info, apr_file_t *file) +{ + /* BMP file header (some values need to filled in later)*/ + byte header[54] = + { + 'B', 'M', /* magic */ + 0, 0, 0, 0, /* file size (to be written later) */ + 0, 0, 0, 0, /* reserved, unused */ + 54, 0, 0, 0, /* pixel map starts at offset 54dec */ + + 40, 0, 0, 0, /* DIB header has 40 bytes */ + 0, 0, 0, 0, /* x size in pixel */ + 0, 0, 0, 0, /* y size in pixel */ + 1, 0, /* 1 color plane */ + 24, 0, /* 24 bits / pixel */ + 0, 0, 0, 0, /* no pixel compression used */ + 0, 0, 0, 0, /* size of pixel array (to be written later) */ + 0xe8, 3, 0, 0, /* 1 pixel / mm */ + 0xe8, 3, 0, 0, /* 1 pixel / mm */ + 0, 0, 0, 0, /* no colors in palette */ + 0, 0, 0, 0 /* no colors to import */ + }; + + int ysize = info->nelts; + int xsize = 0; + int x, y; + int row_size; + int padding; + apr_size_t written; + + /* xsize = max cluster number */ + for (y = 0; y < ysize; ++y) + if (xsize < APR_ARRAY_IDX(info, y, file_stats_t *)->read_map->nelts) + xsize = APR_ARRAY_IDX(info, y, file_stats_t *)->read_map->nelts; + + /* rows in BMP files must be aligned to 4 bytes */ + row_size = APR_ALIGN(xsize * 3, 4); + padding = row_size - xsize * 3; + + /* write numbers to header */ + write_number(header + 2, ysize * row_size + 54); + write_number(header + 18, xsize); + write_number(header + 22, ysize); + write_number(header + 38, ysize * row_size); + + /* write header to file */ + written = sizeof(header); + apr_file_write(file, header, &written); + + /* write all rows */ + for (y = 0; y < ysize; ++y) + { + file_stats_t *file_info = APR_ARRAY_IDX(info, y, file_stats_t *); + for (x = 0; x < xsize; ++x) + { + byte color[3] = { 128, 128, 128 }; + if (x < file_info->read_map->nelts) + { + byte count = APR_ARRAY_IDX(file_info->read_map, x, byte); + select_color(color, count); + } + + written = sizeof(color); + apr_file_write(file, color, &written); + } + + if (padding) + { + char pad[3] = { 0 }; + written = padding; + apr_file_write(file, pad, &written); + } + } +} + +/* Write a summary of the I/O ops to stdout. + * Use POOL for temporaries. + */ +static void +print_stats(apr_pool_t *pool) +{ + apr_int64_t open_count = 0; + apr_int64_t seek_count = 0; + apr_int64_t read_count = 0; + apr_int64_t read_size = 0; + apr_int64_t clusters_read = 0; + apr_int64_t unique_clusters_read = 0; + apr_int64_t uncached_seek_count = 0; + + apr_hash_index_t *hi; + for (hi = apr_hash_first(pool, files); hi; hi = apr_hash_next(hi)) + { + const char *name = NULL; + apr_ssize_t len = 0; + file_stats_t *file = NULL; + + apr_hash_this(hi, (const void **)&name, &len, (void**)&file); + + open_count += file->open_count; + seek_count += file->seek_count; + read_count += file->read_count; + read_size += file->read_size; + clusters_read += file->clusters_read; + unique_clusters_read += file->unique_clusters_read; + uncached_seek_count += file->uncached_seek_count; + } + + printf("%20s files\n", svn__i64toa_sep(apr_hash_count(files), ',', pool)); + printf("%20s files opened\n", svn__i64toa_sep(open_count, ',', pool)); + printf("%20s seeks\n", svn__i64toa_sep(seek_count, ',', pool)); + printf("%20s uncached seeks\n", svn__i64toa_sep(uncached_seek_count, ',', pool)); + printf("%20s reads\n", svn__i64toa_sep(read_count, ',', pool)); + printf("%20s unique clusters read\n", svn__i64toa_sep(unique_clusters_read, ',', pool)); + printf("%20s clusters read\n", svn__i64toa_sep(clusters_read, ',', pool)); + printf("%20s bytes read\n", svn__i64toa_sep(read_size, ',', pool)); +} + +/* Some help output. */ +static void +print_usage() +{ + printf("fsfs-access-map <file>\n\n"); + printf("Reads strace of some FSFS-based tool from <file>, prints some stats\n"); + printf("and writes a cluster access map to 'access.bmp' the current folder.\n"); + printf("Each pixel corresponds to one 64kB cluster and every line to a rev\n"); + printf("or packed rev file in the repository. Turquoise and greed indicate\n"); + printf("1 and 2 hits, yellow to read-ish colors for up to 20, shares of\n"); + printf("for up to 100 and black for > 200 hits.\n\n"); + printf("A typical strace invocation looks like this:\n"); + printf("strace -e trace=open,close,read,lseek -o strace.txt svn log ...\n"); +} + +/* linear control flow */ +int main(int argc, const char *argv[]) +{ + apr_pool_t *pool = NULL; + apr_file_t *file = NULL; + + apr_initialize(); + atexit(apr_terminate); + + pool = svn_pool_create(NULL); + files = apr_hash_make(pool); + handles = apr_hash_make(pool); + + if (argc == 1) + apr_file_open(&file, argv[1], APR_READ | APR_BUFFERED, APR_OS_DEFAULT, + pool); + if (file == NULL) + { + print_usage(); + return 0; + } + parse_file(file); + apr_file_close(file); + + print_stats(pool); + + apr_file_open(&file, "access.bmp", + APR_WRITE | APR_CREATE | APR_TRUNCATE | APR_BUFFERED, + APR_OS_DEFAULT, pool); + write_bitmap(get_rev_files(pool), file); + apr_file_close(file); + + return 0; +} \ No newline at end of file
