Hi Stanislav,

As you noted, vlib_buffer_validate_alloc_free() is the only place that
would change the "known state" of a buffer. All of the calls to that
function are in vlib/buffer_funcs.h in inline functions in conditional
blocks with a condition like 'if (CLIB_DEBUG > 0)'. One of those calls is
made when vlib_buffer_alloc() is called - it
calls vlib_buffer_alloc_on_numa() which calls vlib_buffer_alloc_from_pool()
which has several invocations of vlib_buffer_validate_alloc_free() which
all look like this:

      if (CLIB_DEBUG > 0)
        vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
                                         VLIB_BUFFER_KNOWN_FREE);

Since these calls occur in inline functions and are only made when
CLIB_DEBUG is defined as a value greater than 0, it seems like the issue is
probably that VPP and the router plugin are being built with CLIB_DEBUG
defined to different values - one of them is being built with CLIB_DEBUG ==
0 and one of them is built with CLIB_DEBUG > 0. Since your backtrace
shows vlib_buffer_validate_alloc_free() being called from one of the VPP
virtio nodes, that suggests that VPP was built with CLIB_DEBUG > 0. You are
probably building the router plugin without explicitly defining a value for
CLIB_DEBUG so it is ending up defined to 0. To remedy the issue, you could
either build VPP with CLIB_DEBUG set to 0 (run 'make build-release' instead
of 'make build') or explicitly define CLIB_DEBUG when you build the router
plugin.

-Matt


On Sat, Jan 16, 2021 at 5:50 AM Stanislav Zaikin <zsta...@gmail.com> wrote:

> I've dug more into this issue. And I noticed strange thing:
>
> 187   {
> (gdb) next
> 188     tap_inject_main_t * im = tap_inject_get_main ();
> (gdb)
> 190     u32 bi = ~0;
> (gdb)
> 194     sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd);
> (gdb)
> 195     if (sw_if_index == ~0)
> (gdb)
> 199     if( vlib_buffer_alloc (vm, &bi, 1) != 1 )
> (gdb)
> 202     b = vlib_get_buffer (vm, bi);
> (gdb)
> 207     n_bytes = read (fd, b->data, vlib_buffer_get_default_data_size (vm));
> (gdb)
> 208     if (n_bytes < 0)
> (gdb) print b
> $31 = (vlib_buffer_t *) 0x10027db5c0
> (gdb) print *b
> $32 = {{cacheline0 = 0x10027db5c0 "", current_data = 0, current_length = 0, 
> flags = 0, flow_id = 0, ref_count = 1 '\001', buffer_pool_index = 0 '\000', 
> error = 0, next_buffer = 0, {current_config_index = 0,
>       punt_reason = 0}, opaque = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, template_end 
> = 0x10027db600 "", second_half = 0x10027db600 "", trace_handle = 0, 
> total_length_not_including_first_buffer = 0, opaque2 = {
>       0 <repeats 14 times>}, headroom = 0x10027db640 "", pre_data = '\000' 
> <repeats 126 times>, "\001", data = 0x10027db6c0 "33"}}
> (gdb) print bi
> $33 = 653015
> (gdb) call vlib_buffer_is_known (vm, bi)
> $34 = VLIB_BUFFER_KNOWN_FREE
>
> For some reason the allocated buffer was marked as free. The only function 
> that could change its state is vlib_buffer_validate_alloc_free.
>
> always_inline __clib_warn_unused_result u32
> vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32
> n_buffers,
> u8 buffer_pool_index)
> {
> /* omit */
> if (CLIB_DEBUG > 0)
> vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
> VLIB_BUFFER_KNOWN_FREE);
> return n_buffers;
>
> But there's another strange issue. I've tried to debug it with gdb and
> sometimes vlib_buffer_validate_alloc_free isn't calling for some reason:
>
> Thread 1 "vpp_main" hit Breakpoint 1, vlib_buffer_alloc_from_pool 
> (buffer_pool_index=0 '\000', n_buffers=1, buffers=0x7fffafc727e0, 
> vm=0x7ffff6d67bc0 <vlib_global_main>)
>     at /usr/include/vlib/buffer_funcs.h:569
> 569     vlib_buffer_main_t *bm = vm->buffer_main;
> (gdb) next
> 585     bp = vec_elt_at_index (bm->buffer_pools, buffer_pool_index);
> (gdb)
> 586     bpt = vec_elt_at_index (bp->threads, vm->thread_index);
> (gdb)
> 588     dst = buffers;
> (gdb)
> 589     n_left = n_buffers;
> (gdb)
> 590     len = bpt->n_cached;
> (gdb)
> 593     if (len >= n_buffers)
> (gdb)
> 595         src = bpt->cached_buffers + len - n_buffers;
> (gdb)
> 596         vlib_buffer_copy_indices (dst, src, n_buffers);
> (gdb)
> 597         bpt->n_cached -= n_buffers;
> (gdb)
> 602         return n_buffers;
> (gdb) nextvlib_buffer_alloc_on_numa (numa_node=0, n_buffers=1, 
> buffers=0x7fffafc727e0, vm=0x7ffff6d67bc0 <vlib_global_main>) at 
> /usr/include/vlib/buffer_funcs.h:664
> 664     return vlib_buffer_alloc_from_pool (vm, buffers, n_buffers, index);
> (gdb) cont
> Continuing.
> [New Thread 0x7fffacc9b700 (LWP 127836)]
> [Thread 0x7fffacc9b700 (LWP 127836) exited]
>
> Thread 1 "vpp_main" hit Breakpoint 1, vlib_buffer_alloc_from_pool 
> (vm=0x7ffff6d67bc0 <vlib_global_main>, buffers=0x7fffb74e4dfc, n_buffers=64, 
> buffer_pool_index=0 '\000')
>     at /home/zstas/vpp/src/vlib/buffer_funcs.h:569
> 569     vlib_buffer_main_t *bm = vm->buffer_main;
> (gdb) next
> 585     bp = vec_elt_at_index (bm->buffer_pools, buffer_pool_index);
> (gdb)
> 586     bpt = vec_elt_at_index (bp->threads, vm->thread_index);
> (gdb)
> 588     dst = buffers;
> (gdb)
> 589     n_left = n_buffers;
> (gdb)
> 590     len = bpt->n_cached;
> (gdb)
> 593     if (len >= n_buffers)
> (gdb)
> 595         src = bpt->cached_buffers + len - n_buffers;
> (gdb)
> 596         vlib_buffer_copy_indices (dst, src, n_buffers);
> (gdb)
> 597         bpt->n_cached -= n_buffers;
> (gdb) *600            vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
> *(gdb)
> 602         return n_buffers;
> (gdb)
> 648   }
> (gdb) cont
>
> I have no idea why in the first run vlib_buffer_validate_alloc_free hadn't 
> been called (second run was ok). Because there is the same code according to 
> debug info in gdb.
>
> I hope you'll point me where I'm wrong.
>
>
> On Sat, 16 Jan 2021 at 12:07, Stanislav Zaikin via lists.fd.io <zstaseg=
> gmail....@lists.fd.io> wrote:
>
>> Hello Damjan,
>> Thanks for the response.
>>
>> Nothing special happens, I just want to send this packet to another
>> interface.
>> Interface TX is just a regular VPP node (TX for tapv2/virtio). This
>> packet went to error-node because the interface was down. But if I set
>> interface up, everything looks just the same (error on freeing this buffer):
>>
>> (gdb) bt
>> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
>> #1  0x00007ffff6936859 in __GI_abort () at abort.c:79
>> #2  0x00007ffff6bd73bf in os_panic () at 
>> /home/zstas/vpp/src/vppinfra/unix-misc.c:177
>> #3  0x00007ffff6b37ea4 in debugger () at 
>> /home/zstas/vpp/src/vppinfra/error.c:84
>> #4  0x00007ffff6b38295 in _clib_error (how_to_die=2, 
>> function_name=0x7ffff6d39c40 <__FUNCTION__.16> 
>> "vlib_buffer_validate_alloc_free", line_number=366, fmt=0x7ffff6d3974a "%s 
>> %U buffer 0x%x")
>>     at /home/zstas/vpp/src/vppinfra/error.c:143
>> #5  0x00007ffff6c644fc in vlib_buffer_validate_alloc_free (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, buffers=0x7fffafc730b0, n_buffers=1, 
>> expected_state=VLIB_BUFFER_KNOWN_ALLOCATED)
>>     at /home/zstas/vpp/src/vlib/buffer.c:366
>> #6  0x00007ffff6f753ec in vlib_buffer_pool_put (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, buffer_pool_index=0 '\000', buffers=0x7fffafc730b0, 
>> n_buffers=1) at /home/zstas/vpp/src/vlib/buffer_funcs.h:754
>> #7  0x00007ffff6f75c29 in vlib_buffer_free_inline (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, buffers=0x7fffb74f0044, n_buffers=0, maybe_next=1) at 
>> /home/zstas/vpp/src/vlib/buffer_funcs.h:924
>> #8  0x00007ffff6f75c79 in vlib_buffer_free (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, buffers=0x7fffb74f0040, n_buffers=1) at 
>> /home/zstas/vpp/src/vlib/buffer_funcs.h:943
>> #9  0x00007ffff6f75d1a in vlib_buffer_free_from_ring (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, ring=0x7fffb74f0040, start=0, ring_size=256, 
>> n_buffers=1) at /home/zstas/vpp/src/vlib/buffer_funcs.h:991
>> #10 0x00007ffff6f79af4 in virtio_free_used_device_desc_split 
>> (vm=0x7ffff6d67bc0 <vlib_global_main>, vring=0x7fffb75ab540, node_index=679) 
>> at /home/zstas/vpp/src/vnet/devices/virtio/device.c:200
>> #11 0x00007ffff6f79dc2 in virtio_free_used_device_desc (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, vring=0x7fffb75ab540, node_index=679, packed=0) at 
>> /home/zstas/vpp/src/vnet/devices/virtio/device.c:277
>> #12 0x00007ffff6f7baed in virtio_device_class_tx_fn_hsw (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, node=0x7fffb6b73800, frame=0x7fffb96ff8c0) at 
>> /home/zstas/vpp/src/vnet/devices/virtio/device.c:1017
>> #13 0x00007ffff6cba2e2 in dispatch_node (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, node=0x7fffb6b73800, type=VLIB_NODE_TYPE_INTERNAL, 
>> dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x7fffb96ff8c0,
>>     last_time_stamp=1108992938029) at /home/zstas/vpp/src/vlib/main.c:1223
>> #14 0x00007ffff6cbaa19 in dispatch_pending_node (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, pending_frame_index=1, last_time_stamp=1108992938029) at 
>> /home/zstas/vpp/src/vlib/main.c:1384
>> #15 0x00007ffff6cbcdd3 in vlib_main_or_worker_loop (vm=0x7ffff6d67bc0 
>> <vlib_global_main>, is_main=1) at /home/zstas/vpp/src/vlib/main.c:1912
>> #16 0x00007ffff6cbd848 in vlib_main_loop (vm=0x7ffff6d67bc0 
>> <vlib_global_main>) at /home/zstas/vpp/src/vlib/main.c:2040
>> #17 0x00007ffff6cbe66b in vlib_main (vm=0x7ffff6d67bc0 <vlib_global_main>, 
>> input=0x7fffafc73fb0) at /home/zstas/vpp/src/vlib/main.c:2286
>> #18 0x00007ffff6d29ac5 in thread0 (arg=140737334639552) at 
>> /home/zstas/vpp/src/vlib/unix/main.c:670
>> #19 0x00007ffff6b56f10 in clib_calljmp () at 
>> /home/zstas/vpp/src/vppinfra/longjmp.S:123
>> #20 0x00007fffffffd150 in ?? ()
>> #21 0x00007ffff6d2a095 in vlib_unix_main (argc=28, argv=0x555555631e50) at 
>> /home/zstas/vpp/src/vlib/unix/main.c:747
>> #22 0x000055555555cd8b in main (argc=28, argv=0x555555631e50) at 
>> /home/zstas/vpp/src/vpp/vnet/main.c:338
>>
>>
>> On Fri, 15 Jan 2021 at 19:57, Damjan Marion <dmar...@me.com> wrote:
>>
>>>
>>> This looks right (except the wrong assumption that
>>> vlib_get_frame_to_node will give you empty frame).
>>>
>>>
>>> What happens between this code and error-drop node? Is interface TX also
>>> your custom code?
>>>
>>> —
>>> Damjan
>>>
>>>
>>> On 15.01.2021., at 17:03, Stanislav Zaikin <zsta...@gmail.com> wrote:
>>>
>>> Hello,
>>>
>>> I'm trying to reincarnate router plugin. But I stuck with a pretty
>>> simple thing, when I need to allocate a buffer to store a packet from a tap
>>> device, everything is going well. But when the next node decides to free
>>> this buffer I get a clib_error.
>>>
>>> Basically, code is pretty simple and straightforward:
>>> u32 bi = ~0;
>>> vlib_buffer_t * b;
>>>
>>> if( vlib_buffer_alloc (vm, &bi, 1) != 1 )
>>> //err handling
>>>
>>> b = vlib_get_buffer (vm, bi);
>>> VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b);
>>>
>>> n_bytes = read (fd, b->data, vlib_buffer_get_default_data_size (vm));
>>> // err handling
>>>
>>> b->current_length = n_bytes;
>>> vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index;
>>> vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
>>>
>>> vnet_hw_interface_t * hw;
>>> vlib_frame_t * new_frame;
>>> u32 * to_next;
>>>
>>> hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index);
>>>
>>> new_frame = vlib_get_frame_to_node (vm, hw->output_node_index);
>>> to_next = vlib_frame_vector_args (new_frame);
>>> to_next[0] = bi;
>>> new_frame->n_vectors = 1;
>>>
>>> vlib_put_frame_to_node (vm, hw->output_node_index, new_frame);
>>>
>>> Error looks like:
>>>
>>> gdb) bt
>>> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
>>> #1  0x00007ffff6936859 in __GI_abort () at abort.c:79
>>> #2  0x00007ffff6bd73bf in os_panic () at 
>>> /home/zstas/vpp/src/vppinfra/unix-misc.c:177
>>> #3  0x00007ffff6b37ea4 in debugger () at 
>>> /home/zstas/vpp/src/vppinfra/error.c:84
>>> #4  0x00007ffff6b38295 in _clib_error (how_to_die=2, 
>>> function_name=0x7ffff6d39c40 <__FUNCTION__.16> 
>>> "vlib_buffer_validate_alloc_free", line_number=366, fmt=0x7ffff6d3974a "%s 
>>> %U buffer 0x%x")
>>>     at /home/zstas/vpp/src/vppinfra/error.c:143
>>> #5  0x00007ffff6c644fc in vlib_buffer_validate_alloc_free 
>>> (vm=0x7ffff6d67bc0 <vlib_global_main>, buffers=0x7fffafc725f0, n_buffers=1, 
>>> expected_state=VLIB_BUFFER_KNOWN_ALLOCATED)
>>>     at /home/zstas/vpp/src/vlib/buffer.c:366
>>> #6  0x00007ffff6c4a08c in vlib_buffer_pool_put (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, buffer_pool_index=0 '\000', buffers=0x7fffafc725f0, 
>>> n_buffers=1) at /home/zstas/vpp/src/vlib/buffer_funcs.h:754
>>> #7  0x00007ffff6c4a8c9 in vlib_buffer_free_inline (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, buffers=0x7fffb6b37ed4, n_buffers=0, maybe_next=1) at 
>>> /home/zstas/vpp/src/vlib/buffer_funcs.h:924
>>> #8  0x00007ffff6c4a919 in vlib_buffer_free (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, buffers=0x7fffb6b37ed0, n_buffers=1) at 
>>> /home/zstas/vpp/src/vlib/buffer_funcs.h:943
>>> #9  0x00007ffff6c4b3b5 in process_drop_punt (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, node=0x7fffb6b772c0, frame=0x7fffb6b37ec0, 
>>> disposition=ERROR_DISPOSITION_DROP) at /home/zstas/vpp/src/vlib/drop.c:231
>>> #10 0x00007ffff6c4b4bf in error_drop_node_fn_hsw (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, node=0x7fffb6b772c0, frame=0x7fffb6b37ec0) at 
>>> /home/zstas/vpp/src/vlib/drop.c:247
>>> #11 0x00007ffff6cba2e2 in dispatch_node (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, node=0x7fffb6b772c0, type=VLIB_NODE_TYPE_INTERNAL, 
>>> dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x7fffb6b37ec0,
>>>     last_time_stamp=77969722318942) at /home/zstas/vpp/src/vlib/main.c:1223
>>> #12 0x00007ffff6cbaa19 in dispatch_pending_node (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, pending_frame_index=2, last_time_stamp=77969722318942) 
>>> at /home/zstas/vpp/src/vlib/main.c:1384
>>> #13 0x00007ffff6cbcdd3 in vlib_main_or_worker_loop (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, is_main=1) at /home/zstas/vpp/src/vlib/main.c:1912
>>> #14 0x00007ffff6cbd848 in vlib_main_loop (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>) at /home/zstas/vpp/src/vlib/main.c:2040
>>> #15 0x00007ffff6cbe66b in vlib_main (vm=0x7ffff6d67bc0 <vlib_global_main>, 
>>> input=0x7fffafc73fb0) at /home/zstas/vpp/src/vlib/main.c:2286
>>> #16 0x00007ffff6d29ac5 in thread0 (arg=140737334639552) at 
>>> /home/zstas/vpp/src/vlib/unix/main.c:670
>>> #17 0x00007ffff6b56f10 in clib_calljmp () at 
>>> /home/zstas/vpp/src/vppinfra/longjmp.S:123
>>> #18 0x00007fffffffd150 in ?? ()
>>> #19 0x00007ffff6d2a095 in vlib_unix_main (argc=28, argv=0x555555631e50) at 
>>> /home/zstas/vpp/src/vlib/unix/main.c:747
>>> #20 0x000055555555cd8b in main (argc=28, argv=0x555555631e50) at 
>>> /home/zstas/vpp/src/vpp/vnet/main.c:338
>>>
>>> Buffer index seems right:
>>>
>>> Thread 1 "vpp_main" hit Breakpoint 1, tap_rx (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, node=0x7fffb6a75d00, f=0x0, fd=23) at 
>>> router/tap_inject_node.c:187
>>> warning: Source file is more recent than executable.
>>> (gdb) info localsim = 0x7fffb002b7c0 <tap_inject_main>sw_if_index = 1bi = 
>>> 653093b = 0x10027dc940n_bytes = 90__FUNCTION__ = "tap_rx"
>>>
>>> /* later */
>>>
>>> Thread 1 "vpp_main" received signal SIGABRT, Aborted.__GI_raise 
>>> (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
>>> 50  ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
>>> (gdb) bt
>>> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
>>> #1  0x00007ffff6936859 in __GI_abort () at abort.c:79
>>> #2  0x00007ffff6bd73bf in os_panic () at 
>>> /home/zstas/vpp/src/vppinfra/unix-misc.c:177
>>> #3  0x00007ffff6b37ea4 in debugger () at 
>>> /home/zstas/vpp/src/vppinfra/error.c:84
>>> #4  0x00007ffff6b38295 in _clib_error (how_to_die=2, 
>>> function_name=0x7ffff6d39c40 <__FUNCTION__.16> 
>>> "vlib_buffer_validate_alloc_free", line_number=366, fmt=0x7ffff6d3974a "%s 
>>> %U buffer 0x%x")
>>>     at /home/zstas/vpp/src/vppinfra/error.c:143
>>> #5  0x00007ffff6c644fc in vlib_buffer_validate_alloc_free 
>>> (vm=0x7ffff6d67bc0 <vlib_global_main>, buffers=0x7fffafc725f0, n_buffers=1, 
>>> expected_state=VLIB_BUFFER_KNOWN_ALLOCATED)
>>>     at /home/zstas/vpp/src/vlib/buffer.c:366
>>> #6  0x00007ffff6c4a08c in vlib_buffer_pool_put (vm=0x7ffff6d67bc0 
>>> <vlib_global_main>, buffer_pool_index=0 '\000', buffers=0x7fffafc725f0, 
>>> n_buffers=1) at /home/zstas/vpp/src/vlib/buffer_funcs.h:754
>>> /* omit */
>>> (gdb) select 6
>>> (gdb) print buffers[0]
>>> $6 = 653093
>>>
>>> Maybe I need to set some flags on the allocated buffer? Or what else am I 
>>> missing?
>>>
>>> --
>>> Best regards
>>> Stanislav Zaikin
>>>
>>>
>>>
>>>
>>>
>>
>> --
>> Best regards
>> Stanislav Zaikin
>>
>>
>>
>>
>
> --
> Best regards
> Stanislav Zaikin
>
> 
>
>
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#18535): https://lists.fd.io/g/vpp-dev/message/18535
Mute This Topic: https://lists.fd.io/mt/79705076/21656
Group Owner: vpp-dev+ow...@lists.fd.io
Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub [arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to