bug#36718: uniq treats distinct Korean characters equal

2019-07-18 Thread Paul Eggert
uniq just calls strcoll, and if strcoll (A, B) returns 0 then uniq assumes the 
lines are equal. So my guess is that your problem has something to do with 
strcoll, not with coreutils per se.






Re: [PATCH 1/1] cp: leverage copy_file_range syscall for copy

2019-07-18 Thread Olga Kornievskaia
On Thu, Jul 18, 2019 at 12:21 PM Pádraig Brady  wrote:
>
> On 18/07/19 16:08, Olga Kornievskaia wrote:
> > On Thu, Jul 18, 2019 at 7:54 AM Pádraig Brady  wrote:
> >>
> >> On 15/07/19 17:37, Olga Kornievskaia wrote:
> >>> From: Olga Kornievskaia 
> >>>
> >>> Add an option of --copy-offload that instead of doing a traditional
> >>> copy will utilize a copy_file_range() system call. For local file
> >>> system this system call adds the benefit that no copy from
> >>> kernel space into user space buffers provided by the copy will be
> >>> done. Instead, all the copying happens internally in the kernel.
> >>>
> >>> For some network file system (eg., NFS, CIFS) that have copy offload
> >>> functionality, it allows utilization of that feature from the copy
> >>> command.
> >>> ---
> >>>  src/copy.c| 32 
> >>>  src/copy.h|  5 +
> >>>  src/cp.c  |  9 -
> >>>  src/install.c |  1 +
> >>>  src/mv.c  |  1 +
> >>>  5 files changed, 47 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/src/copy.c b/src/copy.c
> >>> index 65cf65895..7d518a528 100644
> >>> --- a/src/copy.c
> >>> +++ b/src/copy.c
> >>> @@ -245,6 +245,26 @@ create_hole (int fd, char const *name, bool 
> >>> punch_holes, off_t size)
> >>>  }
> >>>
> >>>
> >>> +static bool
> >>> +copy_offload (int src_fd, int dest_fd, uintmax_t max_n_read)
> >>> +{
> >>> +  loff_t in_offset = 0, out_offset = 0, ret;
> >>> +  size_t nbytes = max_n_read;
> >>> +
> >>> +  while (nbytes)
> >>> +{
> >>> +  ret = copy_file_range (src_fd, _offset, dest_fd, _offset,
> >>> + nbytes, 0);
> >>> +  if (ret < 0)
> >>> +return false;
> >>> +  else if (ret == 0)
> >>> +break;
> >>> +  nbytes -= ret;
> >>> +}
> >>> +
> >>> +  return true;
> >>> +}
> >>> +
> >>>  /* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME,
> >>> honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer
> >>> BUF for temporary storage.  Copy no more than MAX_N_READ bytes.
> >>> @@ -1252,6 +1272,18 @@ copy_reg (char const *src_name, char const 
> >>> *dst_name,
> >>>  }
> >>>  }
> >>>
> >>> +  if (data_copy_required && x->copy_offload)
> >>> +{
> >>> +  if (! copy_offload (source_desc, dest_desc, UINTMAX_MAX))
> >>> +{
> >>> +  error (0, errno, _("failed to copy_file_range %s from %s"),
> >>> +  quoteaf_n (0, dst_name), quoteaf_n (1, src_name));
> >>> +  return_val = false;
> >>> +  goto close_src_and_dst_desc;
> >>> +}
> >>> +  data_copy_required = false;
> >>> +}
> >>> +
> >>>if (data_copy_required)
> >>>  {
> >>>/* Choose a suitable buffer size; it may be adjusted later.  */
> >>> diff --git a/src/copy.h b/src/copy.h
> >>> index 102516c68..1503aba3b 100644
> >>> --- a/src/copy.h
> >>> +++ b/src/copy.h
> >>> @@ -261,6 +261,11 @@ struct cp_options
> >>>/* Control creation of COW files.  */
> >>>enum Reflink_type reflink_mode;
> >>>
> >>> +  /* If copy specified with --offload, then use copy_file_range system
> >>> +   * call to do the copy
> >>> +   */
> >>> +  bool copy_offload;
> >>> +
> >>>/* This is a set of destination name/inode/dev triples.  Each such 
> >>> triple
> >>>   represents a file we have created corresponding to a source file 
> >>> name
> >>>   that was specified on the command line.  Use it to avoid clobbering
> >>> diff --git a/src/cp.c b/src/cp.c
> >>> index 707f3984a..8eb9a4d9d 100644
> >>> --- a/src/cp.c
> >>> +++ b/src/cp.c
> >>> @@ -71,7 +71,8 @@ enum
> >>>REFLINK_OPTION,
> >>>SPARSE_OPTION,
> >>>STRIP_TRAILING_SLASHES_OPTION,
> >>> -  UNLINK_DEST_BEFORE_OPENING
> >>> +  UNLINK_DEST_BEFORE_OPENING,
> >>> +  COPY_OFFLOAD_OPTION,
> >>>  };
> >>>
> >>>  /* True if the kernel is SELinux enabled.  */
> >>> @@ -132,6 +133,7 @@ static struct option const long_opts[] =
> >>>{"target-directory", required_argument, NULL, 't'},
> >>>{"update", no_argument, NULL, 'u'},
> >>>{"verbose", no_argument, NULL, 'v'},
> >>> +  {"copy-offload", optional_argument, NULL, COPY_OFFLOAD_OPTION},
> >>>{GETOPT_SELINUX_CONTEXT_OPTION_DECL},
> >>>{GETOPT_HELP_OPTION_DECL},
> >>>{GETOPT_VERSION_OPTION_DECL},
> >>> @@ -794,6 +796,7 @@ cp_option_init (struct cp_options *x)
> >>>x->install_mode = false;
> >>>x->one_file_system = false;
> >>>x->reflink_mode = REFLINK_NEVER;
> >>> +  x->copy_offload = false;
> >>>
> >>>x->preserve_ownership = false;
> >>>x->preserve_links = false;
> >>> @@ -969,6 +972,10 @@ main (int argc, char **argv)
> >>> reflink_type_string, 
> >>> reflink_type);
> >>>break;
> >>>
> >>> + case COPY_OFFLOAD_OPTION:
> >>> +   x.copy_offload = true;
> >>> +  break;
> >>> +
> >>>  case 'a':
> >>>/* Like -dR --preserve=all with reduced failure diagnostics.  
> >>> */
> >>>

Re: [PATCH 1/1] cp: leverage copy_file_range syscall for copy

2019-07-18 Thread Pádraig Brady
On 18/07/19 16:08, Olga Kornievskaia wrote:
> On Thu, Jul 18, 2019 at 7:54 AM Pádraig Brady  wrote:
>>
>> On 15/07/19 17:37, Olga Kornievskaia wrote:
>>> From: Olga Kornievskaia 
>>>
>>> Add an option of --copy-offload that instead of doing a traditional
>>> copy will utilize a copy_file_range() system call. For local file
>>> system this system call adds the benefit that no copy from
>>> kernel space into user space buffers provided by the copy will be
>>> done. Instead, all the copying happens internally in the kernel.
>>>
>>> For some network file system (eg., NFS, CIFS) that have copy offload
>>> functionality, it allows utilization of that feature from the copy
>>> command.
>>> ---
>>>  src/copy.c| 32 
>>>  src/copy.h|  5 +
>>>  src/cp.c  |  9 -
>>>  src/install.c |  1 +
>>>  src/mv.c  |  1 +
>>>  5 files changed, 47 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/copy.c b/src/copy.c
>>> index 65cf65895..7d518a528 100644
>>> --- a/src/copy.c
>>> +++ b/src/copy.c
>>> @@ -245,6 +245,26 @@ create_hole (int fd, char const *name, bool 
>>> punch_holes, off_t size)
>>>  }
>>>
>>>
>>> +static bool
>>> +copy_offload (int src_fd, int dest_fd, uintmax_t max_n_read)
>>> +{
>>> +  loff_t in_offset = 0, out_offset = 0, ret;
>>> +  size_t nbytes = max_n_read;
>>> +
>>> +  while (nbytes)
>>> +{
>>> +  ret = copy_file_range (src_fd, _offset, dest_fd, _offset,
>>> + nbytes, 0);
>>> +  if (ret < 0)
>>> +return false;
>>> +  else if (ret == 0)
>>> +break;
>>> +  nbytes -= ret;
>>> +}
>>> +
>>> +  return true;
>>> +}
>>> +
>>>  /* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME,
>>> honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer
>>> BUF for temporary storage.  Copy no more than MAX_N_READ bytes.
>>> @@ -1252,6 +1272,18 @@ copy_reg (char const *src_name, char const *dst_name,
>>>  }
>>>  }
>>>
>>> +  if (data_copy_required && x->copy_offload)
>>> +{
>>> +  if (! copy_offload (source_desc, dest_desc, UINTMAX_MAX))
>>> +{
>>> +  error (0, errno, _("failed to copy_file_range %s from %s"),
>>> +  quoteaf_n (0, dst_name), quoteaf_n (1, src_name));
>>> +  return_val = false;
>>> +  goto close_src_and_dst_desc;
>>> +}
>>> +  data_copy_required = false;
>>> +}
>>> +
>>>if (data_copy_required)
>>>  {
>>>/* Choose a suitable buffer size; it may be adjusted later.  */
>>> diff --git a/src/copy.h b/src/copy.h
>>> index 102516c68..1503aba3b 100644
>>> --- a/src/copy.h
>>> +++ b/src/copy.h
>>> @@ -261,6 +261,11 @@ struct cp_options
>>>/* Control creation of COW files.  */
>>>enum Reflink_type reflink_mode;
>>>
>>> +  /* If copy specified with --offload, then use copy_file_range system
>>> +   * call to do the copy
>>> +   */
>>> +  bool copy_offload;
>>> +
>>>/* This is a set of destination name/inode/dev triples.  Each such triple
>>>   represents a file we have created corresponding to a source file name
>>>   that was specified on the command line.  Use it to avoid clobbering
>>> diff --git a/src/cp.c b/src/cp.c
>>> index 707f3984a..8eb9a4d9d 100644
>>> --- a/src/cp.c
>>> +++ b/src/cp.c
>>> @@ -71,7 +71,8 @@ enum
>>>REFLINK_OPTION,
>>>SPARSE_OPTION,
>>>STRIP_TRAILING_SLASHES_OPTION,
>>> -  UNLINK_DEST_BEFORE_OPENING
>>> +  UNLINK_DEST_BEFORE_OPENING,
>>> +  COPY_OFFLOAD_OPTION,
>>>  };
>>>
>>>  /* True if the kernel is SELinux enabled.  */
>>> @@ -132,6 +133,7 @@ static struct option const long_opts[] =
>>>{"target-directory", required_argument, NULL, 't'},
>>>{"update", no_argument, NULL, 'u'},
>>>{"verbose", no_argument, NULL, 'v'},
>>> +  {"copy-offload", optional_argument, NULL, COPY_OFFLOAD_OPTION},
>>>{GETOPT_SELINUX_CONTEXT_OPTION_DECL},
>>>{GETOPT_HELP_OPTION_DECL},
>>>{GETOPT_VERSION_OPTION_DECL},
>>> @@ -794,6 +796,7 @@ cp_option_init (struct cp_options *x)
>>>x->install_mode = false;
>>>x->one_file_system = false;
>>>x->reflink_mode = REFLINK_NEVER;
>>> +  x->copy_offload = false;
>>>
>>>x->preserve_ownership = false;
>>>x->preserve_links = false;
>>> @@ -969,6 +972,10 @@ main (int argc, char **argv)
>>> reflink_type_string, reflink_type);
>>>break;
>>>
>>> + case COPY_OFFLOAD_OPTION:
>>> +   x.copy_offload = true;
>>> +  break;
>>> +
>>>  case 'a':
>>>/* Like -dR --preserve=all with reduced failure diagnostics.  */
>>>x.dereference = DEREF_NEVER;
>>> diff --git a/src/install.c b/src/install.c
>>> index bde69c994..9870a321f 100644
>>> --- a/src/install.c
>>> +++ b/src/install.c
>>> @@ -303,6 +303,7 @@ cp_option_init (struct cp_options *x)
>>>x->verbose = false;
>>>x->dest_info = NULL;
>>>x->src_info = NULL;
>>> +  x->copy_offload = false;
>>>  }
>>>
>>>  #ifdef 

Re: [PATCH 1/1] cp: leverage copy_file_range syscall for copy

2019-07-18 Thread Olga Kornievskaia
On Thu, Jul 18, 2019 at 7:54 AM Pádraig Brady  wrote:
>
> On 15/07/19 17:37, Olga Kornievskaia wrote:
> > From: Olga Kornievskaia 
> >
> > Add an option of --copy-offload that instead of doing a traditional
> > copy will utilize a copy_file_range() system call. For local file
> > system this system call adds the benefit that no copy from
> > kernel space into user space buffers provided by the copy will be
> > done. Instead, all the copying happens internally in the kernel.
> >
> > For some network file system (eg., NFS, CIFS) that have copy offload
> > functionality, it allows utilization of that feature from the copy
> > command.
> > ---
> >  src/copy.c| 32 
> >  src/copy.h|  5 +
> >  src/cp.c  |  9 -
> >  src/install.c |  1 +
> >  src/mv.c  |  1 +
> >  5 files changed, 47 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/copy.c b/src/copy.c
> > index 65cf65895..7d518a528 100644
> > --- a/src/copy.c
> > +++ b/src/copy.c
> > @@ -245,6 +245,26 @@ create_hole (int fd, char const *name, bool 
> > punch_holes, off_t size)
> >  }
> >
> >
> > +static bool
> > +copy_offload (int src_fd, int dest_fd, uintmax_t max_n_read)
> > +{
> > +  loff_t in_offset = 0, out_offset = 0, ret;
> > +  size_t nbytes = max_n_read;
> > +
> > +  while (nbytes)
> > +{
> > +  ret = copy_file_range (src_fd, _offset, dest_fd, _offset,
> > + nbytes, 0);
> > +  if (ret < 0)
> > +return false;
> > +  else if (ret == 0)
> > +break;
> > +  nbytes -= ret;
> > +}
> > +
> > +  return true;
> > +}
> > +
> >  /* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME,
> > honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer
> > BUF for temporary storage.  Copy no more than MAX_N_READ bytes.
> > @@ -1252,6 +1272,18 @@ copy_reg (char const *src_name, char const *dst_name,
> >  }
> >  }
> >
> > +  if (data_copy_required && x->copy_offload)
> > +{
> > +  if (! copy_offload (source_desc, dest_desc, UINTMAX_MAX))
> > +{
> > +  error (0, errno, _("failed to copy_file_range %s from %s"),
> > +  quoteaf_n (0, dst_name), quoteaf_n (1, src_name));
> > +  return_val = false;
> > +  goto close_src_and_dst_desc;
> > +}
> > +  data_copy_required = false;
> > +}
> > +
> >if (data_copy_required)
> >  {
> >/* Choose a suitable buffer size; it may be adjusted later.  */
> > diff --git a/src/copy.h b/src/copy.h
> > index 102516c68..1503aba3b 100644
> > --- a/src/copy.h
> > +++ b/src/copy.h
> > @@ -261,6 +261,11 @@ struct cp_options
> >/* Control creation of COW files.  */
> >enum Reflink_type reflink_mode;
> >
> > +  /* If copy specified with --offload, then use copy_file_range system
> > +   * call to do the copy
> > +   */
> > +  bool copy_offload;
> > +
> >/* This is a set of destination name/inode/dev triples.  Each such triple
> >   represents a file we have created corresponding to a source file name
> >   that was specified on the command line.  Use it to avoid clobbering
> > diff --git a/src/cp.c b/src/cp.c
> > index 707f3984a..8eb9a4d9d 100644
> > --- a/src/cp.c
> > +++ b/src/cp.c
> > @@ -71,7 +71,8 @@ enum
> >REFLINK_OPTION,
> >SPARSE_OPTION,
> >STRIP_TRAILING_SLASHES_OPTION,
> > -  UNLINK_DEST_BEFORE_OPENING
> > +  UNLINK_DEST_BEFORE_OPENING,
> > +  COPY_OFFLOAD_OPTION,
> >  };
> >
> >  /* True if the kernel is SELinux enabled.  */
> > @@ -132,6 +133,7 @@ static struct option const long_opts[] =
> >{"target-directory", required_argument, NULL, 't'},
> >{"update", no_argument, NULL, 'u'},
> >{"verbose", no_argument, NULL, 'v'},
> > +  {"copy-offload", optional_argument, NULL, COPY_OFFLOAD_OPTION},
> >{GETOPT_SELINUX_CONTEXT_OPTION_DECL},
> >{GETOPT_HELP_OPTION_DECL},
> >{GETOPT_VERSION_OPTION_DECL},
> > @@ -794,6 +796,7 @@ cp_option_init (struct cp_options *x)
> >x->install_mode = false;
> >x->one_file_system = false;
> >x->reflink_mode = REFLINK_NEVER;
> > +  x->copy_offload = false;
> >
> >x->preserve_ownership = false;
> >x->preserve_links = false;
> > @@ -969,6 +972,10 @@ main (int argc, char **argv)
> > reflink_type_string, reflink_type);
> >break;
> >
> > + case COPY_OFFLOAD_OPTION:
> > +   x.copy_offload = true;
> > +  break;
> > +
> >  case 'a':
> >/* Like -dR --preserve=all with reduced failure diagnostics.  */
> >x.dereference = DEREF_NEVER;
> > diff --git a/src/install.c b/src/install.c
> > index bde69c994..9870a321f 100644
> > --- a/src/install.c
> > +++ b/src/install.c
> > @@ -303,6 +303,7 @@ cp_option_init (struct cp_options *x)
> >x->verbose = false;
> >x->dest_info = NULL;
> >x->src_info = NULL;
> > +  x->copy_offload = false;
> >  }
> >
> >  #ifdef ENABLE_MATCHPATHCON
> > diff --git a/src/mv.c 

bug#36718: uniq treats distinct Korean characters equal

2019-07-18 Thread Felix Hamme
Dear all,

I found that, when performing uniq on some Korean characters, it treats
them as equal (counts as duplicate) although the characters aren't
equal. To be precise, it happened to me on the Characters 프 (U+D504) and
틀 (U+D2C0).

An example (input, expected output, actual output) can be found in the
attachment.
I've tried that using uniq (GNU coreutils) 8.30.

Greetings
Felix Hamme


uniq-korean-characters-bug.tar.gz
Description: application/gzip


Re: [PATCH 1/1] cp: leverage copy_file_range syscall for copy

2019-07-18 Thread Pádraig Brady
On 15/07/19 17:37, Olga Kornievskaia wrote:
> From: Olga Kornievskaia 
> 
> Add an option of --copy-offload that instead of doing a traditional
> copy will utilize a copy_file_range() system call. For local file
> system this system call adds the benefit that no copy from
> kernel space into user space buffers provided by the copy will be
> done. Instead, all the copying happens internally in the kernel.
> 
> For some network file system (eg., NFS, CIFS) that have copy offload
> functionality, it allows utilization of that feature from the copy
> command.
> ---
>  src/copy.c| 32 
>  src/copy.h|  5 +
>  src/cp.c  |  9 -
>  src/install.c |  1 +
>  src/mv.c  |  1 +
>  5 files changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/src/copy.c b/src/copy.c
> index 65cf65895..7d518a528 100644
> --- a/src/copy.c
> +++ b/src/copy.c
> @@ -245,6 +245,26 @@ create_hole (int fd, char const *name, bool punch_holes, 
> off_t size)
>  }
>  
>  
> +static bool
> +copy_offload (int src_fd, int dest_fd, uintmax_t max_n_read)
> +{
> +  loff_t in_offset = 0, out_offset = 0, ret;
> +  size_t nbytes = max_n_read;
> +
> +  while (nbytes)
> +{
> +  ret = copy_file_range (src_fd, _offset, dest_fd, _offset,
> + nbytes, 0);
> +  if (ret < 0)
> +return false;
> +  else if (ret == 0)
> +break;
> +  nbytes -= ret;
> +}
> +
> +  return true;
> +}
> +
>  /* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME,
> honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer
> BUF for temporary storage.  Copy no more than MAX_N_READ bytes.
> @@ -1252,6 +1272,18 @@ copy_reg (char const *src_name, char const *dst_name,
>  }
>  }
>  
> +  if (data_copy_required && x->copy_offload)
> +{
> +  if (! copy_offload (source_desc, dest_desc, UINTMAX_MAX))
> +{
> +  error (0, errno, _("failed to copy_file_range %s from %s"),
> +  quoteaf_n (0, dst_name), quoteaf_n (1, src_name));
> +  return_val = false;
> +  goto close_src_and_dst_desc;
> +}
> +  data_copy_required = false;
> +}
> +
>if (data_copy_required)
>  {
>/* Choose a suitable buffer size; it may be adjusted later.  */
> diff --git a/src/copy.h b/src/copy.h
> index 102516c68..1503aba3b 100644
> --- a/src/copy.h
> +++ b/src/copy.h
> @@ -261,6 +261,11 @@ struct cp_options
>/* Control creation of COW files.  */
>enum Reflink_type reflink_mode;
>  
> +  /* If copy specified with --offload, then use copy_file_range system
> +   * call to do the copy
> +   */
> +  bool copy_offload;
> +
>/* This is a set of destination name/inode/dev triples.  Each such triple
>   represents a file we have created corresponding to a source file name
>   that was specified on the command line.  Use it to avoid clobbering
> diff --git a/src/cp.c b/src/cp.c
> index 707f3984a..8eb9a4d9d 100644
> --- a/src/cp.c
> +++ b/src/cp.c
> @@ -71,7 +71,8 @@ enum
>REFLINK_OPTION,
>SPARSE_OPTION,
>STRIP_TRAILING_SLASHES_OPTION,
> -  UNLINK_DEST_BEFORE_OPENING
> +  UNLINK_DEST_BEFORE_OPENING,
> +  COPY_OFFLOAD_OPTION,
>  };
>  
>  /* True if the kernel is SELinux enabled.  */
> @@ -132,6 +133,7 @@ static struct option const long_opts[] =
>{"target-directory", required_argument, NULL, 't'},
>{"update", no_argument, NULL, 'u'},
>{"verbose", no_argument, NULL, 'v'},
> +  {"copy-offload", optional_argument, NULL, COPY_OFFLOAD_OPTION},
>{GETOPT_SELINUX_CONTEXT_OPTION_DECL},
>{GETOPT_HELP_OPTION_DECL},
>{GETOPT_VERSION_OPTION_DECL},
> @@ -794,6 +796,7 @@ cp_option_init (struct cp_options *x)
>x->install_mode = false;
>x->one_file_system = false;
>x->reflink_mode = REFLINK_NEVER;
> +  x->copy_offload = false;
>  
>x->preserve_ownership = false;
>x->preserve_links = false;
> @@ -969,6 +972,10 @@ main (int argc, char **argv)
> reflink_type_string, reflink_type);
>break;
>  
> + case COPY_OFFLOAD_OPTION:
> +   x.copy_offload = true;
> +  break;
> +
>  case 'a':
>/* Like -dR --preserve=all with reduced failure diagnostics.  */
>x.dereference = DEREF_NEVER;
> diff --git a/src/install.c b/src/install.c
> index bde69c994..9870a321f 100644
> --- a/src/install.c
> +++ b/src/install.c
> @@ -303,6 +303,7 @@ cp_option_init (struct cp_options *x)
>x->verbose = false;
>x->dest_info = NULL;
>x->src_info = NULL;
> +  x->copy_offload = false;
>  }
>  
>  #ifdef ENABLE_MATCHPATHCON
> diff --git a/src/mv.c b/src/mv.c
> index d1dd1d224..997087187 100644
> --- a/src/mv.c
> +++ b/src/mv.c
> @@ -144,6 +144,7 @@ cp_option_init (struct cp_options *x)
>x->verbose = false;
>x->dest_info = NULL;
>x->src_info = NULL;
> +  x->copy_offload = false;
>  }
>  
>  /* FILE is the last operand of this command.  Return true if FILE