ports@, I'd like to update our misc/llama.cpp to the last snapshot (b6934).
It allows to run https://huggingface.co/collections/Qwen/qwen3-vl models. We don't have GPU but with -t 32 I had run Qwen3 VL 30B model on CPU only at AMD Ryzen 9 7950X3D with acceptable to use speed like 2 tokens/second which more or leass useble. But it requires memory. 120G as :datasize is enough. Because we uses libggml as dedicated port, it must to be updated to the last version, and it contains a bug which brokes large models under large number of threads: https://github.com/ggml-org/llama.cpp/issues/16960 I had included a patch to fix this issue as well. I had tested analyze of .pdf document, chat and "explain the picture" on -current/amd64. Everything works. Ok? Index: misc/llama.cpp/Makefile =================================================================== RCS file: /home/cvs/ports/misc/llama.cpp/Makefile,v diff -u -p -r1.10 Makefile --- misc/llama.cpp/Makefile 1 Oct 2025 19:44:07 -0000 1.10 +++ misc/llama.cpp/Makefile 3 Nov 2025 13:49:51 -0000 @@ -2,7 +2,7 @@ COMMENT = LLM inference system GH_ACCOUNT = ggml-org GH_PROJECT = llama.cpp -GH_TAGNAME = b6641 +GH_TAGNAME = b6934 PKGNAME = llama.cpp-0.0.${GH_TAGNAME:S/b//} SHARED_LIBS += llama 2.0 Index: misc/llama.cpp/distinfo =================================================================== RCS file: /home/cvs/ports/misc/llama.cpp/distinfo,v diff -u -p -r1.4 distinfo --- misc/llama.cpp/distinfo 1 Oct 2025 19:44:07 -0000 1.4 +++ misc/llama.cpp/distinfo 3 Nov 2025 13:50:07 -0000 @@ -1,2 +1,2 @@ -SHA256 (llama.cpp-b6641.tar.gz) = 0xJrrTblSgapCD4ujzjkobbpyI5bL6fT+4tYchCwwuA= -SIZE (llama.cpp-b6641.tar.gz) = 25867942 +SHA256 (llama.cpp-b6934.tar.gz) = qsr4P+8j/z/nK8k8/Iv1/QTdSFhqIshFYwCI7L5/a7k= +SIZE (llama.cpp-b6934.tar.gz) = 26417348 Index: devel/libggml/Makefile =================================================================== RCS file: /home/cvs/ports/devel/libggml/Makefile,v diff -u -p -r1.2 Makefile --- devel/libggml/Makefile 20 Oct 2025 17:25:51 -0000 1.2 +++ devel/libggml/Makefile 3 Nov 2025 01:44:00 -0000 @@ -2,7 +2,8 @@ COMMENT= tensor library for machine lea GH_ACCOUNT= ggml-org GH_PROJECT= ggml -GH_TAGNAME= v0.9.4 +GH_COMMIT= 09aa758381718f7731c148238574a7e169001f13 +DISTNAME= ggml-0.9.4.20251101 PKGNAME= lib${DISTNAME} SHARED_LIBS += ggml 2.0 Index: devel/libggml/distinfo =================================================================== RCS file: /home/cvs/ports/devel/libggml/distinfo,v diff -u -p -r1.1.1.1 distinfo --- devel/libggml/distinfo 1 Oct 2025 19:42:10 -0000 1.1.1.1 +++ devel/libggml/distinfo 3 Nov 2025 01:44:20 -0000 @@ -1,2 +1,2 @@ -SHA256 (ggml-0.9.4.tar.gz) = JL0VAK7ycUe5LQI8/23P/fyNqcVnUPlB9dXRtTh1xiM= -SIZE (ggml-0.9.4.tar.gz) = 2193279 +SHA256 (ggml-0.9.4.20251101-09aa7583.tar.gz) = fx+ZI4GhV5KlZlGM3QDWERyj1xXY8t5vh5ELtkwK15Y= +SIZE (ggml-0.9.4.20251101-09aa7583.tar.gz) = 2330931 Index: devel/libggml/patches/patch-src_ggml-backend-reg_cpp =================================================================== RCS file: /home/cvs/ports/devel/libggml/patches/patch-src_ggml-backend-reg_cpp,v diff -u -p -r1.1.1.1 patch-src_ggml-backend-reg_cpp --- devel/libggml/patches/patch-src_ggml-backend-reg_cpp 1 Oct 2025 19:42:10 -0000 1.1.1.1 +++ devel/libggml/patches/patch-src_ggml-backend-reg_cpp 3 Nov 2025 12:01:00 -0000 @@ -1,7 +1,7 @@ Index: src/ggml-backend-reg.cpp --- src/ggml-backend-reg.cpp.orig +++ src/ggml-backend-reg.cpp -@@ -517,7 +517,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const +@@ -524,7 +524,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const search_paths.push_back(fs::u8path(GGML_BACKEND_DIR)); #endif // default search paths: executable directory, current directory Index: devel/libggml/patches/patch-src_ggml-cpu_repack_cpp =================================================================== RCS file: devel/libggml/patches/patch-src_ggml-cpu_repack_cpp diff -N devel/libggml/patches/patch-src_ggml-cpu_repack_cpp --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ devel/libggml/patches/patch-src_ggml-cpu_repack_cpp 3 Nov 2025 12:01:17 -0000 @@ -0,0 +1,59 @@ +https://github.com/ggml-org/llama.cpp/pull/16956 + +Index: src/ggml-cpu/repack.cpp +--- src/ggml-cpu/repack.cpp.orig ++++ src/ggml-cpu/repack.cpp +@@ -1678,10 +1678,24 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int6 + int64_t chunk_size = (nr + nth_scaled - 1) / nth_scaled; + int64_t nchunk = (nr + chunk_size - 1) / chunk_size; + ++ // Ensure minimum chunk size to avoid alignment issues with high thread counts ++ // Minimum chunk size should be at least NB_COLS to prevent overlapping chunks after alignment ++ const int64_t min_chunk_size = NB_COLS; ++ if (nchunk > 0 && (nr / nchunk) < min_chunk_size && nr >= min_chunk_size) { ++ nchunk = (nr + min_chunk_size - 1) / min_chunk_size; ++ } ++ + if (nth == 1 || nchunk < nth || disable_chunking) { + nchunk = nth; + } + ++ // Ensure nchunk doesn't exceed the number of rows divided by minimum chunk size ++ // This prevents creating too many tiny chunks that could overlap after alignment ++ const int64_t max_nchunk = (nr + min_chunk_size - 1) / min_chunk_size; ++ if (nchunk > max_nchunk) { ++ nchunk = max_nchunk; ++ } ++ + if (ith == 0) { + // Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start. + ggml_threadpool_chunk_set(params->threadpool, nth); +@@ -1695,8 +1709,15 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int6 + while (current_chunk < nchunk) { + int64_t src0_start = (current_chunk * ne01) / nchunk; + int64_t src0_end = ((current_chunk + 1) * ne01) / nchunk; ++ ++ // Align boundaries to NB_COLS - round up to ensure all data is included ++ // The chunk size limiting above ensures chunks are large enough to prevent overlaps + src0_start = (src0_start % NB_COLS) ? src0_start + NB_COLS - (src0_start % NB_COLS) : src0_start; + src0_end = (src0_end % NB_COLS) ? src0_end + NB_COLS - (src0_end % NB_COLS) : src0_end; ++ if (src0_end > ne01) { ++ src0_end = ne01; ++ } ++ + if (src0_start >= src0_end) { + break; + } +@@ -1808,8 +1829,12 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int6 + int64_t src0_cur_start = (ith * ne01) / nth; + int64_t src0_cur_end = ((ith + 1) * ne01) / nth; + ++ // Align boundaries to NB_COLS - round up to ensure all data is included + src0_cur_start = (src0_cur_start % NB_COLS) ? src0_cur_start + NB_COLS - (src0_cur_start % NB_COLS) : src0_cur_start; + src0_cur_end = (src0_cur_end % NB_COLS) ? src0_cur_end + NB_COLS - (src0_cur_end % NB_COLS) : src0_cur_end; ++ if (src0_cur_end > ne01) { ++ src0_cur_end = ne01; ++ } + + if (src0_cur_start >= src0_cur_end) { + return; -- wbr, Kirill
