This is on 0.5. Let's first look at a simple example function f{N}(::Type{Val{N}}, v::Vector{Int}) # yes v is unused here q = ntuple(i -> 1 + i, Val{N}) end
The generated code for this looks simple and nice: julia> @code_llvm f(Val{4}, [1,2,3,4]) define void @julia_f_50045([4 x i64]* sret, %jl_value_t*, %jl_value_t*) #0 { top: %3 = alloca [4 x i64], align 8 call void @julia_ntuple_50046([4 x i64]* nonnull sret %3, %jl_value_t* inttoptr (i64 140673751236272 to %jl_value_t*)) #0 %.fca.0.gep = getelementptr inbounds [4 x i64], [4 x i64]* %3, i64 0, i64 0 %.fca.0.load = load i64, i64* %.fca.0.gep, align 8 %.fca.0.insert = insertvalue [4 x i64] undef, i64 %.fca.0.load, 0 %.fca.1.gep = getelementptr inbounds [4 x i64], [4 x i64]* %3, i64 0, i64 1 %.fca.1.load = load i64, i64* %.fca.1.gep, align 8 %.fca.1.insert = insertvalue [4 x i64] %.fca.0.insert, i64 %.fca.1.load, 1 %.fca.2.gep = getelementptr inbounds [4 x i64], [4 x i64]* %3, i64 0, i64 2 %.fca.2.load = load i64, i64* %.fca.2.gep, align 8 %.fca.2.insert = insertvalue [4 x i64] %.fca.1.insert, i64 %.fca.2.load, 2 %.fca.3.gep = getelementptr inbounds [4 x i64], [4 x i64]* %3, i64 0, i64 3 %.fca.3.load = load i64, i64* %.fca.3.gep, align 8 %.fca.3.insert = insertvalue [4 x i64] %.fca.2.insert, i64 %.fca.3.load, 3 store [4 x i64] %.fca.3.insert, [4 x i64]* %0, align 8 ret v On the other hand if we change f a little so it accesses v: function f2{N}(::Type{Val{N}}, v::Vector{Int}) q = ntuple(i -> 1 + v[i], Val{N}) end We then get: julia> @code_llvm f2(Val{4}, [1,2,3,4]) define void @julia_f2_50063([4 x i64]* sret, %jl_value_t*, %jl_value_t*) #0 { top: %3 = alloca [4 x i64], align 8 %4 = call %jl_value_t*** @jl_get_ptls_states() %5 = alloca [5 x %jl_value_t*], align 8 %.sub = getelementptr inbounds [5 x %jl_value_t*], [5 x %jl_value_t*]* %5, i64 0, i64 0 %6 = getelementptr [5 x %jl_value_t*], [5 x %jl_value_t*]* %5, i64 0, i64 2 store %jl_value_t* null, %jl_value_t** %6, align 8 %7 = getelementptr [5 x %jl_value_t*], [5 x %jl_value_t*]* %5, i64 0, i64 3 store %jl_value_t* null, %jl_value_t** %7, align 8 %8 = getelementptr [5 x %jl_value_t*], [5 x %jl_value_t*]* %5, i64 0, i64 4 store %jl_value_t* null, %jl_value_t** %8, align 8 %9 = bitcast [5 x %jl_value_t*]* %5 to i64* store i64 6, i64* %9, align 8 %10 = getelementptr [5 x %jl_value_t*], [5 x %jl_value_t*]* %5, i64 0, i64 1 %11 = bitcast %jl_value_t*** %4 to i64* %12 = load i64, i64* %11, align 8 %13 = bitcast %jl_value_t** %10 to i64* store i64 %12, i64* %13, align 8 store %jl_value_t** %.sub, %jl_value_t*** %4, align 8 %14 = call %jl_value_t* @jl_gc_alloc_1w() store %jl_value_t* %14, %jl_value_t** %6, align 8 %15 = getelementptr inbounds %jl_value_t, %jl_value_t* %14, i64 -1, i32 0 store %jl_value_t* inttoptr (i64 140673762914912 to %jl_value_t*), %jl_value_t** %15, align 8 store %jl_value_t* %2, %jl_value_t** %7, align 8 %16 = getelementptr inbounds %jl_value_t, %jl_value_t* %14, i64 0, i32 0 store %jl_value_t* %2, %jl_value_t** %16, align 8 store %jl_value_t* %14, %jl_value_t** %8, align 8 * call void @julia_ntuple_50064([4 x i64]* nonnull sret %3, %jl_value_t* %14, %jl_value_t* inttoptr (i64 140673751236272 to %jl_value_t*)) #0* * %.fca.0.gep = getelementptr inbounds [4 x i64], [4 x i64]* %3, i64 0, i64 0* * %.fca.0.load = load i64, i64* %.fca.0.gep, align 8* * %.fca.0.insert = insertvalue [4 x i64] undef, i64 %.fca.0.load, 0* * %.fca.1.gep = getelementptr inbounds [4 x i64], [4 x i64]* %3, i64 0, i64 1* * %.fca.1.load = load i64, i64* %.fca.1.gep, align 8* * %.fca.1.insert = insertvalue [4 x i64] %.fca.0.insert, i64 %.fca.1.load, 1* * %.fca.2.gep = getelementptr inbounds [4 x i64], [4 x i64]* %3, i64 0, i64 2* * %.fca.2.load = load i64, i64* %.fca.2.gep, align 8* * %.fca.2.insert = insertvalue [4 x i64] %.fca.1.insert, i64 %.fca.2.load, 2* * %.fca.3.gep = getelementptr inbounds [4 x i64], [4 x i64]* %3, i64 0, i64 3* * %.fca.3.load = load i64, i64* %.fca.3.gep, align 8* * %.fca.3.insert = insertvalue [4 x i64] %.fca.2.insert, i64 %.fca.3.load, 3* * store [4 x i64] %.fca.3.insert, [4 x i64]* %0, align 8* * %17 = load i64, i64* %13, align 8* * store i64 %17, i64* %11, align 8* * ret void* } The bold text is very similar to the previous function but there is a large chunk of allocations and stores above as well as a call to jl_get_ptls_states().. A generated function that does the same works well and does not allocate: @generated function f2{N}(::Type{Val{N}}, v::Vector{Int}) return Expr(:tuple, [:($i+v[$i]) for i=1:N]...) end but I try to avoid using @generated by using ntuple.. Anyone has some experience with this on how to use ntuple like this without getting allocations. Thanks! // Kristoffer