Hi Steven and Daniel, thank you so much for the corrections and suggestions!


> On Sep 2, 2016, at 9:17 AM, Steven G. Johnson <stevenj....@gmail.com> wrote:
> 
> Try:
> 
> function foo_old!(a)
>     for i in 1:size(a, 2)
>         a[:, i] /= norm(a[:, i])
>     end
>     return a
> end
> 
> function foo_new!(a)
>     for i in 1:size(a, 2)
>         s = zero(eltype(a))
>         @simd for j = 1:size(a,1)
>             @inbounds s += abs2(a[j, i])
>         end
>         scale_factor = 1 / sqrt(s)
>         @simd for j = 1:size(a,1)
>             @inbounds a[j, i] *= scale_factor
>         end
>     end
>     return a
> end
> 
> a = rand(1000,10000);
> @time foo_old!(a);
> @time foo_old!(a);
> @time foo_old!(a);
> a = rand(1000,10000);
> @time foo_new!(a);
> @time foo_new!(a);
> @time foo_new!(a);
> 
> On my machine, foo_new! is at least 10x faster than foo_old!, and also avoids 
> all the allocations for the slices a[:,i].

Reply via email to