Hi Yu,
Thanks.
On 07/04/2014 02:21 PM, Chao Yu wrote:

> Hi Jaegeuk, Gu, Changman
> 
>> -----Original Message-----
>> From: Jaegeuk Kim [mailto:jaeg...@kernel.org]
>> Sent: Friday, July 04, 2014 1:36 PM
>> To: Gu Zheng
>> Cc: f2fs; fsdevel; 이창만; 俞
>> Subject: Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le 
>> in, find_in_block
>>
>> Well, how about testing with many ones in the bit streams?
>> Thanks,
>>
>> On Thu, Jul 03, 2014 at 06:14:02PM +0800, Gu Zheng wrote:
>>> Hi Jaegeuk, Changman
>>>
>>> Just a simple test, not very sure it can address
>>> our qualm.
>>>
>>> Bitmap size:216(the same as f2fs dentry_bits).
>>> CPU: Intel i5 x86_64.
>>>
>>> Time counting based on tsc(the less the fast).
>>> [Index of 1]        find_next_bit_le        test_bit_le
>>> 0           20                      117
>>> 1           20                      114
>>> 2           20                      113
>>> 3           20                      139
>>> 4           22                      121
>>> 5           22                      118
>>> 6           22                      115
>>> 8           22                      112
>>> 9           22                      106
>>> 10          22                      105
>>> 11          22                      100
>>> 16          22                      98
>>> 48          22                      97
>>> 80          27                      95
>>> 104         27                      92
>>> 136         32                      95
>>> 160         32                      92
>>> 184         32                      90
>>> 200         27                      87
>>> 208         35                      84
>>>
>>> According to the result, find_next_bit_le is always
>>> better than test_bit_le, though there may be some
>>> noise, but I think the result is clear.
>>> Hope it can help us.:)
>>> ps.The sample is attached too.
>>>
>>> Thanks,
>>> Gu
> 
> I hope this could provide some help for this patch.
> 
> I modify Gu's code like this, and add few test case:
> 
> static void test_bit_search_speed(void)
> {
>       unsigned long flags;
>       uint64_t tsc_s_b1, tsc_s_e1, tsc_s_b2, tsc_s_e2;
>       int i, j, pos;
>       const void *bit_addr;
> 
>       local_irq_save(flags);
>       preempt_disable();
>       
>       printk("find_next_bit   test_bit_le\n");
> 
>       for (i = 0; i < 24; i++) {
> 
>               bit_addr = &bitmaps[i];
> 
>               tsc_s_b1 = rdtsc();
> 
>               for (j = 0, pos = 0; j < 1000; j++, pos = 0)
>                       while (pos < 216)
>                               pos = find_next_bit_le(bit_addr, 216, pos) + 1;
> 
>               mb();
>               tsc_s_e1 = rdtsc();
>               tsc_s_e1 -= tsc_s_b1;
>               do_div(tsc_s_e1, 1000);
> 
>               tsc_s_b2 = rdtsc();
> 
>               for (j = 0, pos = 0; j < 1000; j++, pos = 0)
>                       while (pos < 216)
>                               test_bit_le(pos++, bit_addr);
> 
>               mb();
>               tsc_s_e2 = rdtsc();
>               tsc_s_e2 -= tsc_s_b2;
>               do_div(tsc_s_e2, 1000);
> 
>               printk("%-16llu %-16llu\n", tsc_s_e1, tsc_s_e2);
>       }
> 
>       preempt_enable();
>       local_irq_restore(flags);
> }
> case: 11111111 11111111
>               {255, 255, 255, 255, 255, 255, 255,
>                255, 255, 255, 255, 255, 255, 255,
>                255, 255, 255, 255, 255, 255, 255,
>                255, 255, 255, 255, 255, 255},
> case: 10101010 10101010
>               {170, 170, 170, 170, 170, 170, 170,
>                170, 170, 170, 170, 170, 170, 170,
>                170, 170, 170, 170, 170, 170, 170,
>                170, 170, 170, 170, 170, 170},
> case: 11111111 00000000
>               {255, 0, 255, 0, 255, 0, 255,
>                0, 255, 0, 255, 0, 255, 0,
>                255, 0, 255, 0, 255, 0, 255,
>                0, 255, 0, 255, 0, 255},
> case: 00001111 00001111
>                {15, 15, 15, 15, 15, 15, 15,
>                15, 15, 15, 15, 15, 15, 15,
>                15, 15, 15, 15, 15, 15, 15,
>                15, 15, 15, 15, 15, 15}
> 
> and here are test result in my env. (Ubuntu vm, 768MB, i3-3220)
> It seems find_next_bit works not so bad as I thought.
> 
> find_next_bit    test_bit_le
> 73               4209
> 62               1271
> 69               1585
> 50               2031
> 67               2255
> 82               2261
> 52               4007
> 79               2159
> 50               2043
> 55               2215
> 53               2393
> 72               3784
> 76               1879
> 61               2562
> 70               2702
> 62               2489
> 56               2307
> 54               2063
> 51               2258
> 69               2712
> 4133             3989  -- case: 11111111 11111111
> 2370             3024  -- case: 10101010 10101010
> 2608             2413  -- case: 11111111 00000000
> 2457             2506  -- case: 00001111 00001111

The time cost of test_bit_le shakes violently, it should be
very smooth according to the test case, maybe it has relation
to your vm env.

To Jaegeuk,
Following test result is walking a bitmap via find_next_bit_le
and test_bit_le.
(Front 7 are random bitmaps, last four are cases from Yu, see
attached sample for detail):

find_next_bit_le    test_bit_le

     3615                3492 
     2640                3492 
     2431                3492 
     1957                3494 
     2160                3492 
     1933                3495 
     1096                3492 

     8078                3492 
     3732                3492 
     4237                3493 
     3824                3492

Thanks,
Gu 

> 
> .
> 


#include <linux/module.h>

__u8 bitmaps[11][27] = {
                {1, 123, 2, 3, 127, 200, 100,
                 123, 23, 13, 78, 42, 123, 50,
                 123, 56, 198, 0, 58, 22, 19,
                 123, 23, 13, 78, 42, 123},
                {2, 0, 0, 0, 0, 0, 0,
                 123, 23, 13, 78, 42, 123, 50,
                 123, 56, 198, 0, 58, 22, 19,
                 23, 123, 2, 34, 123, 12},
                {4, 0, 0, 0, 0, 0, 0,
                 123, 56, 198, 0, 58, 22, 19,
                 123, 23, 13, 78, 42, 123,
                 0, 0, 0, 34, 45, 29},
                {8, 0, 0, 0, 0, 0, 0,
                 1, 123, 2, 3, 127, 200, 100,
                 123, 23, 13, 78, 42, 123, 50,
                 0, 0, 0, 0, 0, 0},
                {16, 0, 0, 0, 0, 0, 0,
                 8, 78, 0, 0, 23, 0, 213,
                 0, 12, 0, 45, 0, 109, 111,
                 231, 11, 11, 88, 77, 99},
                {32, 0, 0, 0, 0, 0, 0,
                 8, 78, 0, 0, 23, 0, 213,
                 0, 12, 0, 45, 0, 109, 111,
                 231, 11, 11, 88, 77, 99},
                {64, 0, 0, 0, 0, 0, 0,
                 8, 78, 0, 0, 23, 0, 213,
                 0, 12, 0, 45, 0, 109, 111,
                 0, 0, 0, 0, 0, 0},
                {255, 255, 255, 255, 255, 255, 255,
                 255, 255, 255, 255, 255, 255, 255,
                 255, 255, 255, 255, 255, 255, 255,
                 255, 255, 255, 255, 255, 255},
                {170, 170, 170, 170, 170, 170, 170,
                 170, 170, 170, 170, 170, 170, 170,
                 170, 170, 170, 170, 170, 170, 170,
                 170, 170, 170, 170, 170, 170},
                {255, 0, 255, 0, 255, 0, 255,
                 0, 255, 0, 255, 0, 255, 0,
                 255, 0, 255, 0, 255, 0, 255,
                 0, 255, 0, 255, 0, 255},
                 {15, 15, 15, 15, 15, 15, 15,
                 15, 15, 15, 15, 15, 15, 15,
                 15, 15, 15, 15, 15, 15, 15,
                 15, 15, 15, 15, 15, 15}
                };

uint64_t rdtsc(void) {
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}

static void test_bit_search_speed(void)
{
        unsigned long flags;
        uint64_t tsc_s, tsc_diff;
        int i, j, pos;
        const void *bit_addr;

        local_irq_save(flags);
        preempt_disable();

        printk("find_next_bit_le    test_bit_le\n");

        for (i = 0; i < 11; i++) {
                bit_addr = &bitmaps[i];

                pos = 0;
                tsc_s = rdtsc();

                for (j = 0; j < 1000; j++, pos = 0)
                        while (pos < 216)
                                pos = find_next_bit_le(bit_addr, 216, pos) + 1;
                mb();
                tsc_diff = (rdtsc() - tsc_s)/1000;

                printk("%8llu", tsc_diff);

                pos = 0;
                tsc_s = rdtsc();

                for (j = 0; j < 1000; j++, pos = 0)
                        while (pos < 216)
                                test_bit_le(pos++, bit_addr);

                mb();
                tsc_diff = (rdtsc() - tsc_s)/1000;

                printk("%20llu \n", tsc_diff);
        }

        preempt_enable();
        local_irq_restore(flags);
}

static int __init start_test(void) {
        test_bit_search_speed();
        return 0;
}

static void __exit exit_test(void) {

}

module_init(start_test);
module_exit(exit_test);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gu Zheng");
------------------------------------------------------------------------------
Open source business process management suite built on Java and Eclipse
Turn processes into business applications with Bonita BPM Community Edition
Quickly connect people, data, and systems into organized workflows
Winner of BOSSIE, CODIE, OW2 and Gartner awards
http://p.sf.net/sfu/Bonitasoft
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to