Hello community, here is the log from the commit of package ocaml-parmap for openSUSE:Factory checked in at 2019-10-08 19:57:01 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/ocaml-parmap (Old) and /work/SRC/openSUSE:Factory/.ocaml-parmap.new.2352 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ocaml-parmap" Tue Oct 8 19:57:01 2019 rev:4 rq:734221 version:20190330.8d19c66 Changes: -------- --- /work/SRC/openSUSE:Factory/ocaml-parmap/ocaml-parmap.changes 2017-08-18 15:03:27.994074563 +0200 +++ /work/SRC/openSUSE:Factory/.ocaml-parmap.new.2352/ocaml-parmap.changes 2019-10-08 19:57:04.756350411 +0200 @@ -1,0 +2,6 @@ +Mon Sep 23 16:35:39 UTC 2019 - [email protected] + +- Update to version 20190330.8d19c66 + Fix for ocaml 4.06 + +------------------------------------------------------------------- Old: ---- ocaml-parmap-20170223.c9b0ee7.tar.xz New: ---- ocaml-parmap-20190330.8d19c66.tar.xz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ ocaml-parmap.spec ++++++ --- /var/tmp/diff_new_pack.JhjWdt/_old 2019-10-08 19:57:05.356348613 +0200 +++ /var/tmp/diff_new_pack.JhjWdt/_new 2019-10-08 19:57:05.360348602 +0200 @@ -17,7 +17,7 @@ Name: ocaml-parmap -Version: 20170223.c9b0ee7 +Version: 20190330.8d19c66 Release: 0 %{?ocaml_preserve_bytecode} Summary: Exploit multicore architectures for OCaml programs with minimal modifications ++++++ ocaml-parmap-20170223.c9b0ee7.tar.xz -> ocaml-parmap-20190330.8d19c66.tar.xz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocaml-parmap-20170223.c9b0ee7/README new/ocaml-parmap-20190330.8d19c66/README --- old/ocaml-parmap-20170223.c9b0ee7/README 2017-02-24 01:29:58.000000000 +0100 +++ new/ocaml-parmap-20190330.8d19c66/README 2019-03-30 17:57:21.000000000 +0100 @@ -31,7 +31,7 @@ Parmap is not meant to run on a cluster, see one of the many available (re)implementations of the map-reduce schema for that. -By forking the parent process on a sigle machine, the children get access, for +By forking the parent process on a single machine, the children get access, for free, to all the data structures already built, even the imperative ones, and as far as your computation inside the map/fold does not produce side effects that need to be preserved, the final result will be the same as performing the @@ -123,4 +123,4 @@ 1 and 2 among subsequent calls to the parallel function by preallocating the result array and the shared memory buffer, and passing them as optional parameters to the array_float_parmap function: this may save a significant amount of time if the -array is very large. \ No newline at end of file +array is very large. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocaml-parmap-20170223.c9b0ee7/README.md new/ocaml-parmap-20190330.8d19c66/README.md --- old/ocaml-parmap-20170223.c9b0ee7/README.md 2017-02-24 01:29:58.000000000 +0100 +++ new/ocaml-parmap-20190330.8d19c66/README.md 2019-03-30 17:57:21.000000000 +0100 @@ -29,7 +29,7 @@ `Parmap` _is not meant_ to run on a cluster, see one of the many available (re)implementations of the map-reduce schema for that. -By forking the parent process on a sigle machine, the children get access, for +By forking the parent process on a single machine, the children get access, for free, to all the data structures already built, even the imperative ones, and as far as your computation inside the map/fold does not produce side effects that need to be preserved, the final result will be the same as performing the @@ -102,13 +102,14 @@ # let d = Unix.gettimeofday() in ignore(Array.create 10000000 0.); Unix.gettimeofday() -. d;; - : float = 0.0501301288604736328 ``` - 2. create a shared memory area - 3. possibly copy the result array to the shared memory area + 2. create a shared memory area , - 4. perform the computation in the children writing the result in the shared memory area + 3. possibly copy the result array to the shared memory area, - 5. possibly copy the result back to the OCaml array + 4. perform the computation in the children writing the result in the shared memory area, + + 5. possibly copy the result back to the OCaml array. All implementations need to do 1, 2 and 4; steps 3 and/or 5 may be omitted depending on what the user wants to do with the result. @@ -117,4 +118,4 @@ 1 and 2 among subsequent calls to the parallel function by preallocating the result array and the shared memory buffer, and passing them as optional parameters to the `array_float_parmap` function: this may save a significant amount of time if the -array is very large. \ No newline at end of file +array is very large. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocaml-parmap-20170223.c9b0ee7/bytearray.ml new/ocaml-parmap-20190330.8d19c66/bytearray.ml --- old/ocaml-parmap-20170223.c9b0ee7/bytearray.ml 2017-02-24 01:29:58.000000000 +0100 +++ new/ocaml-parmap-20190330.8d19c66/bytearray.ml 2019-03-30 17:57:21.000000000 +0100 @@ -37,17 +37,22 @@ external unsafe_blit_from_string : string -> int -> t -> int -> int -> unit = "ml_blit_string_to_bigarray" "noalloc" - -external unsafe_blit_to_string : t -> int -> string -> int -> int -> unit +(* +external unsafe_blit_to_bytes : t -> int -> bytes -> int -> int -> unit = "ml_blit_bigarray_to_string" "noalloc" - + *) +(* +let unsafe_sub a ofs len = + let s = Bytes.create len in + unsafe_blit_to_bytes a ofs s 0 len; + Bytes.to_string s + *) +(* let to_string a = let l = length a in if l > Sys.max_string_length then invalid_arg "Bytearray.to_string" else - let s = String.create l in - unsafe_blit_to_string a 0 s 0 l; - s - + unsafe_sub a 0 l + *) let of_string s = let l = String.length s in let a = create l in @@ -59,18 +64,15 @@ let ba = Bigarray.Array1.map_file fd Bigarray.char Bigarray.c_layout true l in unsafe_blit_from_string s 0 ba 0 l; ba - +(* let sub a ofs len = if ofs < 0 || len < 0 || ofs > length a - len || len > Sys.max_string_length then invalid_arg "Bytearray.sub" - else begin - let s = String.create len in - unsafe_blit_to_string a ofs s 0 len; - s - end - + else + unsafe_sub a ofs len + *) let rec prefix_rec a i a' i' l = l = 0 || (a.{i} = a'.{i'} && prefix_rec a (i + 1) a' (i' + 1) (l - 1)) @@ -86,13 +88,13 @@ || j < 0 || j > length a - l then invalid_arg "Bytearray.blit_from_string" else unsafe_blit_from_string s i a j l - -let blit_to_string a i s j l = +(* +let blit_to_bytes a i s j l = if l < 0 || i < 0 || i > length a - l - || j < 0 || j > String.length s - l - then invalid_arg "Bytearray.blit_to_string" - else unsafe_blit_to_string a i s j l - + || j < 0 || j > Bytes.length s - l + then invalid_arg "Bytearray.blit_to_bytes" + else unsafe_blit_to_bytes a i s j l + *) external marshal : 'a -> Marshal.extern_flags list -> t = "ml_marshal_to_bigarray" @@ -119,6 +121,6 @@ let of_floatarray fa = let l = Array.length fa in - let a = createf l in + let a = createf l in unsafe_blit_from_floatarray fa 0 a 0 l; a diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocaml-parmap-20170223.c9b0ee7/bytearray.mli new/ocaml-parmap-20190330.8d19c66/bytearray.mli --- old/ocaml-parmap-20170223.c9b0ee7/bytearray.mli 2017-02-24 01:29:58.000000000 +0100 +++ new/ocaml-parmap-20190330.8d19c66/bytearray.mli 2019-03-30 17:57:21.000000000 +0100 @@ -18,9 +18,9 @@ val create : int -> t val length : t -> int - +(* val to_string : t -> string - + *) val of_string : string -> t val mmap_of_string : Unix.file_descr -> string -> t @@ -30,13 +30,13 @@ val to_this_floatarray : float array -> tf -> int -> float array val of_floatarray : float array -> tf - +(* val sub : t -> int -> int -> string - + *) val blit_from_string : string -> int -> t -> int -> int -> unit - -val blit_to_string : t -> int -> string -> int -> int -> unit - +(* +val blit_to_bytes : t -> int -> bytes -> int -> int -> unit + *) val prefix : t -> t -> int -> bool val marshal : 'a -> Marshal.extern_flags list -> t diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocaml-parmap-20170223.c9b0ee7/oasis_test.sh new/ocaml-parmap-20190330.8d19c66/oasis_test.sh --- old/ocaml-parmap-20170223.c9b0ee7/oasis_test.sh 2017-02-24 01:29:58.000000000 +0100 +++ new/ocaml-parmap-20190330.8d19c66/oasis_test.sh 2019-03-30 17:57:21.000000000 +0100 @@ -1,8 +1,8 @@ #!/bin/bash -set -x +set -x # DEBUG oasis setup -ocaml setup.ml -configure +ocaml setup.ml -configure -prefix `opam config var prefix` ocaml setup.ml -build -ocaml setup.ml -install +ocaml setup.ml -reinstall diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocaml-parmap-20170223.c9b0ee7/opam new/ocaml-parmap-20190330.8d19c66/opam --- old/ocaml-parmap-20170223.c9b0ee7/opam 2017-02-24 01:29:58.000000000 +0100 +++ new/ocaml-parmap-20190330.8d19c66/opam 2019-03-30 17:57:21.000000000 +0100 @@ -25,4 +25,5 @@ "ocamlfind" "ocamlbuild" {build} "conf-autoconf" + "conf-aclocal" ] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocaml-parmap-20170223.c9b0ee7/parmap.ml new/ocaml-parmap-20190330.8d19c66/parmap.ml --- old/ocaml-parmap-20170223.c9b0ee7/parmap.ml 2017-02-24 01:29:58.000000000 +0100 +++ new/ocaml-parmap-20190330.8d19c66/parmap.ml 2019-03-30 17:57:21.000000000 +0100 @@ -35,6 +35,20 @@ let set_ncores n = ncores := n;; let get_ncores () = !ncores +(* core mapping *) + +let no_core_pinning = ref false + +let disable_core_pinning () = + no_core_pinning := true + +let enable_core_pinning () = + no_core_pinning := false + +let core_mapping = ref None + +let set_core_mapping (m: int array) = core_mapping := Some m + (* worker process rank *) let masters_rank = -1 @@ -246,7 +260,14 @@ type msg_to_worker = Finished | Task of int let setup_children_chans oc pipedown ?fdarr i = - Setcore.setcore i; + (if !no_core_pinning then () + else match !core_mapping with + (* map process i to core i, or, if a core_mapping exist, + to core_mapping.(i), reusing core_mapping as many times as needed *) + | None -> Setcore.setcore i + | Some m -> + let ml = Array.length m in + Setcore.setcore m.(i mod ml)); (* close the other ends of the pipe and convert my ends to ic/oc *) Unix.close (snd pipedown.(i)); let pid = Unix.getpid() in @@ -547,7 +568,7 @@ let mapi_range lo hi (f:int -> 'a -> 'b) a = let l = hi-lo in if l < 0 then [||] else begin - let r = Array.create (l+1) (f 0 (Array.unsafe_get a lo)) in + let r = Array.create (l+1) (f lo (Array.unsafe_get a lo)) in for i = 1 to l do let idx = lo+i in Array.unsafe_set r i (f idx (Array.unsafe_get a idx)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocaml-parmap-20170223.c9b0ee7/parmap.mli new/ocaml-parmap-20190330.8d19c66/parmap.mli --- old/ocaml-parmap-20170223.c9b0ee7/parmap.mli 2017-02-24 01:29:58.000000000 +0100 +++ new/ocaml-parmap-20190330.8d19c66/parmap.mli 2019-03-30 17:57:21.000000000 +0100 @@ -11,14 +11,14 @@ (* library, see the LICENSE file for more information. *) (**************************************************************************) -(** Module [Parmap]: efficient parallel map, fold and mapfold on lists and - arrays on multicores. +(** Module [Parmap]: efficient parallel map, fold and mapfold on lists and + arrays on multicores. - All the primitives allow to control the granularity of the parallelism + All the primitives allow to control the granularity of the parallelism via an optional parameter [chunksize]: if [chunksize] is omitted, the input sequence is split evenly among the available cores; if [chunksize] is specified, the input data is split in chunks of size [chunksize] and - dispatched to the available cores using an on demand strategy that + dispatched to the available cores using an on demand strategy that ensures automatic load balancing. A specific primitive [array_float_parmap] is provided for fast operations on float arrays. @@ -34,6 +34,25 @@ val get_ncores : unit -> int +(** {6 Enabling/disabling processes core pinning } *) + +val disable_core_pinning: unit -> unit + (** [disable_core_pinning ()] will prevent forked out processes + from being pinned to a specific core. + WARNING: this may have a negative impact on performance, + but might be necessary on systems where several parmap computations + are running concurrently. *) + +val enable_core_pinning: unit -> unit +(** [enable_core_pinning ()] turns on core pinning (it is on by default). *) + +(** {6 Setting and getting an explicity mapping from processes to cores } *) + +val set_core_mapping: int array -> unit + (** [set_core_mapping m] installs the array [m] as the mapping to be used to pin + processes to cores. Process [i] will be pinned to core [m.(i mod Array.length m)]. + *) + (** {6 Getting the current worker rank. The master process has rank -1. Other processes have the rank at which they were forked out (a worker's rank is in [0..ncores-1]) } *) @@ -60,26 +79,26 @@ (** {6 Parallel mapfold} *) val parmapfold : ?init:(int -> unit) -> ?finalize:(unit -> unit) -> ?ncores:int -> ?chunksize:int -> ('a -> 'b) -> 'a sequence -> ('b-> 'c -> 'c) -> 'c -> ('c->'c->'c) -> 'c - (** [parmapfold ~ncores:n f (L l) op b concat ] computes [List.fold_right op (List.map f l) b] - by forking [n] processes on a multicore machine. + (** [parmapfold ~ncores:n f (L l) op b concat ] computes [List.fold_right op (List.map f l) b] + by forking [n] processes on a multicore machine. You need to provide the extra [concat] operator to combine the partial results of the - fold computed on each core. If 'b = 'c, then [concat] may be simply [op]. - The order of computation in parallel changes w.r.t. sequential execution, so this + fold computed on each core. If 'b = 'c, then [concat] may be simply [op]. + The order of computation in parallel changes w.r.t. sequential execution, so this function is only correct if [op] and [concat] are associative and commutative. If the optional [chunksize] parameter is specified, the processes compute the result in an on-demand fashion on blocks of size [chunksize]. - [parmapfold ~ncores:n f (A a) op b concat ] computes [Array.fold_right op (Array.map f a) b] + [parmapfold ~ncores:n f (A a) op b concat ] computes [Array.fold_right op (Array.map f a) b] *) (** {6 Parallel fold} *) val parfold: ?init:(int -> unit) -> ?finalize:(unit -> unit) -> ?ncores:int -> ?chunksize:int -> ('a -> 'b -> 'b) -> 'a sequence -> 'b -> ('b->'b->'b) -> 'b - (** [parfold ~ncores:n op (L l) b concat] computes [List.fold_right op l b] + (** [parfold ~ncores:n op (L l) b concat] computes [List.fold_right op l b] by forking [n] processes on a multicore machine. You need to provide the extra [concat] operator to combine the partial results of the - fold computed on each core. If 'b = 'c, then [concat] may be simply [op]. - The order of computation in parallel changes w.r.t. sequential execution, so this + fold computed on each core. If 'b = 'c, then [concat] may be simply [op]. + The order of computation in parallel changes w.r.t. sequential execution, so this function is only correct if [op] and [concat] are associative and commutative. If the optional [chunksize] parameter is specified, the processes compute the result in an on-demand fashion @@ -90,9 +109,9 @@ (** {6 Parallel map} *) val parmap : ?init:(int -> unit) -> ?finalize:(unit -> unit) -> ?ncores:int -> ?chunksize:int -> ('a -> 'b) -> 'a sequence -> 'b list - (** [parmap ~ncores:n f (L l) ] computes [List.map f l] + (** [parmap ~ncores:n f (L l) ] computes [List.map f l] by forking [n] processes on a multicore machine. - [parmap ~ncores:n f (A a) ] computes [Array.map f a] + [parmap ~ncores:n f (A a) ] computes [Array.map f a] by forking [n] processes on a multicore machine. If the optional [chunksize] parameter is specified, the processes compute the result in an on-demand fashion @@ -103,9 +122,9 @@ (** {6 Parallel iteration} *) val pariter : ?init:(int -> unit) -> ?finalize:(unit -> unit) -> ?ncores:int -> ?chunksize:int -> ('a -> unit) -> 'a sequence -> unit - (** [pariter ~ncores:n f (L l) ] computes [List.iter f l] + (** [pariter ~ncores:n f (L l) ] computes [List.iter f l] by forking [n] processes on a multicore machine. - [parmap ~ncores:n f (A a) ] computes [Array.iter f a] + [parmap ~ncores:n f (A a) ] computes [Array.iter f a] by forking [n] processes on a multicore machine. If the optional [chunksize] parameter is specified, the processes perform the computation in an on-demand fashion @@ -133,7 +152,7 @@ (** {6 Parallel map on arrays} *) val array_parmap : ?init:(int -> unit) -> ?finalize:(unit -> unit) -> ?ncores:int -> ?chunksize:int -> ('a -> 'b) -> 'a array -> 'b array - (** [array_parmap ~ncores:n f a ] computes [Array.map f a] + (** [array_parmap ~ncores:n f a ] computes [Array.map f a] by forking [n] processes on a multicore machine. If the optional [chunksize] parameter is specified, the processes compute the result in an on-demand fashion @@ -158,7 +177,7 @@ This buffer can be reused in a series of calls to [array_float_parmap], avoiding the cost of reallocating it each time. *) val array_float_parmap : ?init:(int -> unit) -> ?finalize:(unit -> unit) -> ?ncores:int -> ?chunksize:int -> ?result: float array -> ?sharedbuffer: buf -> ('a -> float) -> 'a array -> float array - (** [array_float_parmap ~ncores:n f a ] computes [Array.map f a] by forking + (** [array_float_parmap ~ncores:n f a ] computes [Array.map f a] by forking [n] processes on a multicore machine, and preallocating the resulting array as shared memory, which allows significantly more efficient computation than calling the generic array_parmap function. If the @@ -196,12 +215,11 @@ val redirect : ?path:string -> id:int -> unit - (** Helper function that redirects stdout and stderr to files - located in the directory [path], carrying names of the shape + (** Helper function that redirects stdout and stderr to files + located in the directory [path], carrying names of the shape stdout.NNN and stderr.NNN where NNN is the [id] of the used core. Useful when writing initialisation functions to be passed as [init] argument to the parallel combinators. The default value for [path] is /tmp/.parmap.PPPP with PPPP the process id of the main program. *) - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ocaml-parmap-20170223.c9b0ee7/setcore.ml new/ocaml-parmap-20190330.8d19c66/setcore.ml --- old/ocaml-parmap-20170223.c9b0ee7/setcore.ml 2017-02-24 01:29:58.000000000 +0100 +++ new/ocaml-parmap-20190330.8d19c66/setcore.ml 2019-03-30 17:57:21.000000000 +0100 @@ -1,4 +1,4 @@ -(* uses the native affinity interface to +(* uses the native affinity interface to declare that the current process should be attached to core number n *)
