Issue 178535
Summary clang hangs when building Linux kernel's rkvdec-vdpu383-h264.c for ARCH=hexagon
Labels clang
Assignees
Reporter nathanchance
    When building `ARCH=hexagon allmodconfig` in the Linux kernel's -next tree at [`next-20260128`](https://git.kernel.org/next/linux-next/l/next-20260128), there is a hang when building [`drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c`](https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c?h=next-20260128), a recently added file.

```
$ make -sj"$(nproc)" ARCH=hexagon LLVM=1 mrproper allmodconfig drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.o
...
  CC [M]  drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.o
<hangs>
```

I have not seen this with any of the other architectures that the kernel supports building with LLVM.

I bisected this down to commit [8c0483bba2d2](https://github.com/llvm/llvm-project/commit/8c0483bba2d25ae7e4b6cac150dba9447dfed59c) ("RegisterCoalescer: Fix assert on remat to copy-to-physreg with subregs (#121734)").

```
# bad: [3bd3e06f3fe418e24af65457877f40cee0544f9d] Bump version to 21.0.0git (#124870)
# good: [10c6d6349e51bb245b9deec4aafca9885971135b] Clear release notes for upcoming LLVM 20 dev cycle
git bisect start 'llvmorg-21-init' 'llvmorg-20-init'
# good: [93802815abdb1e1f326162b930a8028eaccb73d5] [NewPM][CodeGen] Port VirtRegMap to NPM (#109936)
git bisect good 93802815abdb1e1f326162b930a8028eaccb73d5
# good: [755519f7f661375be05750001ff11e106e6b7f87] [clang][driver] Use $ prefix with config file options to have them added after all of the command line options (#117573)
git bisect good 755519f7f661375be05750001ff11e106e6b7f87
# bad: [44e8ee73591bad22ae19748be825c4b66d7b3dde] [flang][doc] refine zero initialization extension documentation (#121956)
git bisect bad 44e8ee73591bad22ae19748be825c4b66d7b3dde
# good: [49fd2dde21655f95309abb17ad1d3392afe4985f] [X86] LowerShift - don't prematurely lower to x86 vector shift imm instructions (#120282)
git bisect good 49fd2dde21655f95309abb17ad1d3392afe4985f
# good: [db7123fbbc530587941ea3c78666103233282120] [LLD][COFF] Use EC symbol table for CHPE metadata (#120328)
git bisect good db7123fbbc530587941ea3c78666103233282120
# good: [7700695739d078eff01aad6f4d40c933419d08bc] [VPlan] Fix crash with EVL tail folding intrinsic with no corresponding VP (#121542)
git bisect good 7700695739d078eff01aad6f4d40c933419d08bc
# good: [97097958fdf525e8c14fcdde94231bae72ea2673] [Coverage] MCDC: Move findIndependencePairs deferred into MCDCRecord (#121188)
git bisect good 97097958fdf525e8c14fcdde94231bae72ea2673
# bad: [8557a57c4b1a228ce63f2409dd5cc4c70a25e6fc] [flang][OpenMP][NFC] Move reduction init and cleanup region gen to helper (#120761)
git bisect bad 8557a57c4b1a228ce63f2409dd5cc4c70a25e6fc
# bad: [1729e6e742ba9f6f210550000ace4bec72530c2e] [AArch64] Improve bf16 fp_extend lowering. (#118966)
git bisect bad 1729e6e742ba9f6f210550000ace4bec72530c2e
# bad: [d82d53b2e3d7fb2f44f91dc1ca9ce8bb5487da57] [flang][openmp] initialize allocatable components of firstprivate copies (#121808)
git bisect bad d82d53b2e3d7fb2f44f91dc1ca9ce8bb5487da57
# bad: [8d2e611802d5f3bdd681d308ceb293e5ace8a894] [AMDGPU] Calculate getDivNumBits' AtLeast using bitwidth (#121758)
git bisect bad 8d2e611802d5f3bdd681d308ceb293e5ace8a894
# good: [a8f3ebaf11c3745e5123054776eb71755d16f2f9] AMDGPU: Mark test as XFAIL in expensive_checks builds
git bisect good a8f3ebaf11c3745e5123054776eb71755d16f2f9
# good: [93e63460a2958c253dcbb7681faa532962a306bc] RegAllocGreedy: Un-disable test in expensive_checks builds
git bisect good 93e63460a2958c253dcbb7681faa532962a306bc
# bad: [653a54727eaa18c43447ad686c987db67f1dda74] [Darwin][Driver][clang] apple-none-macho orders the resource directory after internal-externc-isystem when nostdlibinc is used (#120507)
git bisect bad 653a54727eaa18c43447ad686c987db67f1dda74
# bad: [8c0483bba2d25ae7e4b6cac150dba9447dfed59c] RegisterCoalescer: Fix assert on remat to copy-to-physreg with subregs (#121734)
git bisect bad 8c0483bba2d25ae7e4b6cac150dba9447dfed59c
# first bad commit: [8c0483bba2d25ae7e4b6cac150dba9447dfed59c] RegisterCoalescer: Fix assert on remat to copy-to-physreg with subregs (#121734)
```

`cvise` spits out the following for a reproducer:

```c
int assemble_hw_pps_pic_width;
struct v4l2_ctrl_h264_decode_params {
  int bottom_field_order_cnt;
};
unsigned pic_parameter_set_id;
struct rkvdec_sps {
  short : 8;
  short : 4;
  short : 16;
  short : 16;
  short direct_8x8_inference_flag : 1;
} __attribute__((__packed__));
void *priv, *cpu;
struct rkvdec_pps {
  unsigned pps_seq_parameter_set_id : 5;
  int entropy_coding_mode_flag : 1;
  unsigned : 1;
  int : 5;
  int : 1;
  int : 2;
  unsigned pic_init_qp_minus26 : 7;
  int pic_init_qs_minus26 : 6;
  unsigned chroma_qp_index_offset : 5;
  unsigned constrained_intra_pred_flag1;
  int : 5;
  unsigned scaling_list_enable_flag;
  int : 1;
  int : 1;
  unsigned : 32;
  int cur_bot_field : 32;
  int top_field_order_cnt0 : 32;
  int : 32;
  int top_field_order_cnt1 : 32;
  unsigned bot_field_order_cnt1 : 32;
  unsigned top_field_order_cnt2 : 32;
  unsigned bot_field_order_cnt2 : 32;
  int : 32;
  unsigned bot_field_order_cnt3 : 32;
  unsigned top_field_order_cnt4 : 32;
  int bot_field_order_cnt4 : 32;
  int bot_field_order_cnt5 : 32;
  unsigned : 32;
  unsigned : 32;
  unsigned top_field_order_cnt7 : 32;
  unsigned : 32;
  int : 32;
  unsigned : 32;
  int : 32;
  unsigned : 32;
  unsigned : 32;
  unsigned : 32;
  int : 32;
  unsigned : 32;
  unsigned top_field_order_cnt14 : 32;
  int : 32;
  unsigned : 32;
  unsigned : 32;
  int ref_field_flags : 16;
  unsigned ref_topfield_used : 16;
} __attribute__((__packed__));
struct v4l2_ctrl_h264_pps {
  char seq_parameter_set_id;
  char pic_init_qp_minus26;
  char pic_init_qs_minus26;
  char chroma_qp_index_offset;
  short flags;
} *assemble_hw_pps_run_2;
struct v4l2_ctrl_h264_sps {
  short pic_height_in_map_units_minus1;
  unsigned flags;
} *assemble_hw_pps_run_1;
struct rkvdec_h264_ctx {
  int reflists;
};
struct rkvdec_h264_run {
  struct v4l2_ctrl_h264_decode_params decode_params;
  struct v4l2_ctrl_h264_pps;
} lookup_ref_buf_idx(int *, struct rkvdec_h264_run *),
    *assemble_hw_pps_run, rkvdec_h264_run_run;
struct rkvdec_sps_pps {
  struct rkvdec_sps sps;
  struct rkvdec_pps pps;
} *assemble_hw_pps_hw_ps;
struct rkvdec_h264_priv_tbl {
  char cabac_table[4464];
  struct rkvdec_sps_pps param_set[6];
  int rps;
};
struct {
  char fields;
  int top_field_order_cnt;
  int bottom_field_order_cnt;
} *set_field_order_cnt_dpb, *assemble_hw_pps_dpb;
void *memset();
void assemble_hw_rps(int *, struct rkvdec_h264_run *, int *, int *);
void set_field_order_cnt(struct rkvdec_pps *pps) {
  pps->top_field_order_cnt0 = pps->top_field_order_cnt1;
  pps->bot_field_order_cnt1 = set_field_order_cnt_dpb[1].bottom_field_order_cnt;
  pps->top_field_order_cnt2 = set_field_order_cnt_dpb[2].top_field_order_cnt;
  pps->bot_field_order_cnt2 = pps->bot_field_order_cnt3 =
      set_field_order_cnt_dpb[3].bottom_field_order_cnt;
  pps->top_field_order_cnt4 = set_field_order_cnt_dpb[4].top_field_order_cnt;
  pps->bot_field_order_cnt4 = set_field_order_cnt_dpb[4].bottom_field_order_cnt;
  pps->bot_field_order_cnt5 = set_field_order_cnt_dpb[5].bottom_field_order_cnt;
  pps->top_field_order_cnt7 = set_field_order_cnt_dpb[7].top_field_order_cnt =
      set_field_order_cnt_dpb[8].top_field_order_cnt;
  pps->top_field_order_cnt14 = set_field_order_cnt_dpb[4].top_field_order_cnt;
}
void rkvdec_h264_run(int *ctx) {
  int reflist_builder;
  struct rkvdec_h264_ctx *h264_ctx = priv;
  struct rkvdec_h264_priv_tbl *tbl = cpu;
  {
    struct v4l2_ctrl_h264_sps *sps = assemble_hw_pps_run_1;
    struct v4l2_ctrl_h264_pps *pps = assemble_hw_pps_run_2;
    struct v4l2_ctrl_h264_decode_params *dec_params =
        &assemble_hw_pps_run->decode_params;
    struct rkvdec_h264_priv_tbl *priv_tbl = 0;
    int pic_height;
    unsigned i;
    assemble_hw_pps_hw_ps = &priv_tbl->param_set[pic_parameter_set_id];
    memset(assemble_hw_pps_hw_ps, 0, sizeof *assemble_hw_pps_hw_ps);
    pic_height = sps->pic_height_in_map_units_minus1 =
        assemble_hw_pps_pic_width;
    pic_height;
    assemble_hw_pps_hw_ps->sps.direct_8x8_inference_flag =
        assemble_hw_pps_run_1->flags;
    assemble_hw_pps_hw_ps->pps.pps_seq_parameter_set_id =
        pps->seq_parameter_set_id;
    assemble_hw_pps_hw_ps->pps.entropy_coding_mode_flag = !(pps->flags & 2);
    assemble_hw_pps_hw_ps->pps.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
    assemble_hw_pps_hw_ps->pps.pic_init_qs_minus26 = pps->pic_init_qs_minus26;
    assemble_hw_pps_hw_ps->pps.chroma_qp_index_offset =
        pps->chroma_qp_index_offset;
    assemble_hw_pps_hw_ps->pps.scaling_list_enable_flag = !pps->flags;
    set_field_order_cnt(&assemble_hw_pps_hw_ps->pps);
    i = 0;
    for (; i < sizeof(assemble_hw_pps_dpb[0]); i++) {
      assemble_hw_pps_hw_ps->pps.ref_field_flags |= i;
      assemble_hw_pps_hw_ps->pps.ref_topfield_used |= i;
    }
    assemble_hw_pps_hw_ps->pps.cur_bot_field =
        dec_params->bottom_field_order_cnt;
  }
  lookup_ref_buf_idx(ctx, &rkvdec_h264_run_run);
  assemble_hw_rps(&reflist_builder, &rkvdec_h264_run_run, &h264_ctx->reflists,
                  &tbl->rps);
}
```

This appears to be related to the `-ffixed-r19` that the kernel uses for Hexagon.

With LLVM @ 93e63460a2958c253dcbb7681faa532962a306bc:

```
$ hyperfine -w 2 'clang --target=hexagon-linux -ffixed-r19 -O2 -c -o /dev/null rkvdec-vdpu383-h264.i'
Benchmark 1: clang --target=hexagon-linux -ffixed-r19 -O2 -c -o /dev/null rkvdec-vdpu383-h264.i
  Time (mean ± σ):     523.0 ms ±  16.1 ms    [User: 512.8 ms, System: 8.7 ms]
  Range (min … max):   505.3 ms … 557.0 ms    10 runs
```

At the blamed commit:

```
$ timeout 10s clang --target=hexagon-linux -ffixed-r19 -O2 -c -o /dev/null rkvdec-vdpu383-h264.i
...

$ echo $status
124

$ hyperfine -w 2 'clang --target=hexagon-linux -O2 -c -o /dev/null rkvdec-vdpu383-h264.i'
Benchmark 1: clang --target=hexagon-linux -O2 -c -o /dev/null rkvdec-vdpu383-h264.i
  Time (mean ± σ):     513.7 ms ±  14.1 ms    [User: 503.8 ms, System: 8.5 ms]
  Range (min … max):   500.7 ms … 540.3 ms    10 runs
```

I have tried teasing out an LLVM IR reproducer but my manual combinations of `opt` and `llc` have a harder time reproducing this.

cc @arsenm @androm3da, I am not sure how to label this for proper notifications to relevant parties.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to