On 25 March 2014, Bram Moolenaar <[email protected]> wrote:
>
> lcd wrote:
>
> > On 21 March 2014, Cade Forester <[email protected]> wrote:
> > > > How about separating it from sort()? That is, an uniq()
> > > > function that would remove duplicates from an already sorted
> > > > list (results would be undefined if the input list is not
> > > > sorted). That would be consistent with how uniq(1) works on
> > > > UNIX.
> > >
> > > This patch add uniq() function. uniq(list) will remove copies of
> > > repeated adjacent items
> >
> > There is a problem with this patch: it removes the duplicates
> > on the fly, so if the comparison function fails on the second or
> > subsequent call the list is still modified. In contrast, sort()
> > restores the list to the initial state if the comparison fails at
> > some point.
> >
> > I'm attaching bellow my attempt to a fix. I also merged
> > f_sort() and f_uniq() in a single function, and made some minor
> > optimisations.
> >
> > On a related topic: adding an efficient "stable unique" would be
> > relatively straightforward too, using plain red-black trees. Now,
> > red-black trees are implemented as a header (namely sys/tree.h) on
> > *BSD, but not on other systems. Any comment on the preferred way to
> > handle this? I'd suggest testing for the existence of sys/tree.h in
> > autoconf, and also including a stripped down copy of it from, say,
> > OpenBSD, for the systems that don't have it.
>
> There are a few problems with this implementation.
>
> The li_prev pointer is not updated.
Right: this comes from the initial patch. I should have checked
what is actually going on.
> When the first item is not a list the error is for sort(), even when
> uniq() is used.
Updated patch below.
/lcd
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
diff -r d2ef98a43b5d runtime/doc/change.txt
--- a/runtime/doc/change.txt Mon Mar 24 19:44:09 2014 +0100
+++ b/runtime/doc/change.txt Tue Mar 25 16:49:48 2014 +0200
@@ -1650,7 +1650,7 @@
7. Sorting text *sorting*
Vim has a sorting function and a sorting command. The sorting function can be
-found here: |sort()|.
+found here: |sort()|, |uniq()|.
*:sor* *:sort*
:[range]sor[t][!] [i][u][r][n][x][o] [/{pattern}/]
diff -r d2ef98a43b5d runtime/doc/eval.txt
--- a/runtime/doc/eval.txt Mon Mar 24 19:44:09 2014 +0100
+++ b/runtime/doc/eval.txt Tue Mar 25 16:49:48 2014 +0200
@@ -327,6 +327,7 @@
Changing the order of items in a list: >
:call sort(list) " sort a list alphabetically
:call reverse(list) " reverse the order of items
+ :call uniq(sort(list)) " sort and remove duplicates
For loop ~
@@ -2005,6 +2006,8 @@
type( {name}) Number type of variable {name}
undofile( {name}) String undo file name for {name}
undotree() List undo file tree
+uniq( {list} [, {func} [, {dict}]])
+ List remove adjacent duplicates
values( {dict}) List values in {dict}
virtcol( {expr}) Number screen column of cursor or mark
visualmode( [expr]) String last visual mode used
@@ -6169,6 +6172,14 @@
blocks. Each item may again have an "alt"
item.
+uniq({list} [, {func} [, {dict}]]) *uniq()* *E702*
+ Remove second and succeeding copies of repeated adjacent
+ {list} items in-place. Returns {list}. If you want a list
+ to remain unmodified make a copy first: >
+ :let newlist = uniq(copy(mylist))
+< Compare function uses the string representation of each item.
+ For the use of {func} and {dict} see |sort()|.
+
values({dict}) *values()*
Return a |List| with all the values of {dict}. The |List| is
in arbitrary order.
diff -r d2ef98a43b5d runtime/doc/usr_41.txt
--- a/runtime/doc/usr_41.txt Mon Mar 24 19:44:09 2014 +0100
+++ b/runtime/doc/usr_41.txt Tue Mar 25 16:49:48 2014 +0200
@@ -623,6 +623,7 @@
map() change each List item
sort() sort a List
reverse() reverse the order of a List
+ uniq() remove copies of repeated adjacent items
split() split a String into a List
join() join List items into a String
range() return a List with a sequence of numbers
diff -r d2ef98a43b5d runtime/doc/version7.txt
--- a/runtime/doc/version7.txt Mon Mar 24 19:44:09 2014 +0100
+++ b/runtime/doc/version7.txt Tue Mar 25 16:49:48 2014 +0200
@@ -942,6 +942,7 @@
|tagfiles()| List with tags file names
|taglist()| get list of matching tags (Yegappan Lakshmanan)
|tr()| translate characters (Ron Aaron)
+|uniq()| remove copies of repeated adjacent list items
|values()| get List of Dictionary values
|winnr()| takes an argument: what window to use
|winrestview()| restore the view of the current window
diff -r d2ef98a43b5d src/eval.c
--- a/src/eval.c Mon Mar 24 19:44:09 2014 +0100
+++ b/src/eval.c Tue Mar 25 16:49:48 2014 +0200
@@ -744,6 +744,7 @@
static void f_type __ARGS((typval_T *argvars, typval_T *rettv));
static void f_undofile __ARGS((typval_T *argvars, typval_T *rettv));
static void f_undotree __ARGS((typval_T *argvars, typval_T *rettv));
+static void f_uniq __ARGS((typval_T *argvars, typval_T *rettv));
static void f_values __ARGS((typval_T *argvars, typval_T *rettv));
static void f_virtcol __ARGS((typval_T *argvars, typval_T *rettv));
static void f_visualmode __ARGS((typval_T *argvars, typval_T *rettv));
@@ -8150,6 +8151,7 @@
{"type", 1, 1, f_type},
{"undofile", 1, 1, f_undofile},
{"undotree", 0, 0, f_undotree},
+ {"uniq", 1, 3, f_uniq},
{"values", 1, 1, f_values},
{"virtcol", 1, 1, f_virtcol},
{"visualmode", 0, 1, f_visualmode},
@@ -17023,10 +17025,11 @@
static char_u *item_compare_func;
static dict_T *item_compare_selfdict;
static int item_compare_func_err;
+static int (*item_compare_func_ptr)__ARGS((const void *, const void *));
#define ITEM_COMPARE_FAIL 999
/*
- * Compare functions for f_sort() below.
+ * Compare functions for f_sort() and f_uniq() below.
*/
static int
#ifdef __BORLANDC__
@@ -17100,9 +17103,10 @@
* "sort({list})" function
*/
static void
-f_sort(argvars, rettv)
- typval_T *argvars;
- typval_T *rettv;
+do_sort_uniq(argvars, rettv, sort)
+ typval_T *argvars;
+ typval_T *rettv;
+ int sort;
{
list_T *l;
listitem_T *li;
@@ -17116,7 +17120,7 @@
{
l = argvars[0].vval.v_list;
if (l == NULL || tv_check_lock(l->lv_lock,
- (char_u *)_("sort() argument")))
+ (char_u *)_(sort ? "sort() argument" : "uniq() argument")))
return;
rettv->vval.v_list = l;
rettv->v_type = VAR_LIST;
@@ -17163,29 +17167,67 @@
ptrs = (listitem_T **)alloc((int)(len * sizeof(listitem_T *)));
if (ptrs == NULL)
return;
+
i = 0;
- for (li = l->lv_first; li != NULL; li = li->li_next)
- ptrs[i++] = li;
-
- item_compare_func_err = FALSE;
- /* test the compare function */
- if (item_compare_func != NULL
- && item_compare2((void *)&ptrs[0], (void *)&ptrs[1])
- == ITEM_COMPARE_FAIL)
- EMSG(_("E702: Sort compare function failed"));
- else
- {
- /* Sort the array with item pointers. */
- qsort((void *)ptrs, (size_t)len, sizeof(listitem_T *),
- item_compare_func == NULL ? item_compare : item_compare2);
+ if (sort)
+ {
+ /* f_sort(): ptr will be the sorted list */
+ for (li = l->lv_first; li != NULL; li = li->li_next)
+ ptrs[i++] = li;
+
+ item_compare_func_err = FALSE;
+ /* test the compare function */
+ if (item_compare_func != NULL
+ && item_compare2((void *)&ptrs[0], (void *)&ptrs[1])
+ ==
ITEM_COMPARE_FAIL)
+ EMSG(_("E702: Sort compare function failed"));
+ else
+ {
+ /* Sort the array with item pointers. */
+ qsort((void *)ptrs, (size_t)len, sizeof(listitem_T *),
+ item_compare_func == NULL ? item_compare :
item_compare2);
+
+ if (!item_compare_func_err)
+ {
+ /* Clear the List and append the items in the sorted order.
*/
+ l->lv_first = l->lv_last = l->lv_idx_item = NULL;
+ l->lv_len = 0;
+ for (i = 0; i < len; ++i)
+ list_append(l, ptrs[i]);
+ }
+ }
+ }
+ else
+ {
+ /* f_uniq(): ptr will be a stack of items to remove */
+ item_compare_func_err = FALSE;
+ item_compare_func_ptr = item_compare_func ? item_compare2 :
item_compare;
+
+ for(li = l->lv_first; li != NULL && li->li_next != NULL; li =
li->li_next)
+ {
+ if (item_compare_func_ptr((void *)&li, (void *)&li->li_next) ==
0)
+ ptrs[i++] = li;
+ if (item_compare_func_err)
+ {
+ EMSG(_("E702: Uniq compare function failed"));
+ break;
+ }
+ }
if (!item_compare_func_err)
{
- /* Clear the List and append the items in the sorted order. */
- l->lv_first = l->lv_last = l->lv_idx_item = NULL;
- l->lv_len = 0;
- for (i = 0; i < len; ++i)
- list_append(l, ptrs[i]);
+ while (i-- > 0)
+ {
+ li = ptrs[i]->li_next;
+ ptrs[i]->li_next = li->li_next;
+ if (li->li_next != NULL)
+ li->li_next->li_prev = ptrs[i];
+ else
+ l->lv_last = ptrs[i];
+ list_fix_watch(l, li);
+ listitem_free(li);
+ l->lv_len--;
+ }
}
}
@@ -17194,6 +17236,28 @@
}
/*
+ * "sort({list})" function
+ */
+ static void
+f_sort(argvars, rettv)
+ typval_T *argvars;
+ typval_T *rettv;
+{
+ do_sort_uniq(argvars, rettv, 1);
+}
+
+/*
+ * "uniq({list})" function
+ */
+ static void
+f_uniq(argvars, rettv)
+ typval_T *argvars;
+ typval_T *rettv;
+{
+ do_sort_uniq(argvars, rettv, 0);
+}
+
+/*
* "soundfold({word})" function
*/
static void
diff -r d2ef98a43b5d src/testdir/test55.in
--- a/src/testdir/test55.in Mon Mar 24 19:44:09 2014 +0100
+++ b/src/testdir/test55.in Tue Mar 25 16:49:48 2014 +0200
@@ -323,13 +323,15 @@
: $put ='caught ' . v:exception
:endtry
:"
-:" reverse() and sort()
-:let l = ['-0', 'A11', 2, 'xaaa', 4, 'foo', 'foo6', [0, 1, 2], 'x8']
+:" reverse(), sort(), uniq()
+:let l = ['-0', 'A11', 2, 2, 'xaaa', 4, 'foo', 'foo6', 'foo', [0, 1, 2], 'x8',
[0, 1, 2], 1.5]
+:$put =string(uniq(copy(l)))
:$put =string(reverse(l))
:$put =string(reverse(reverse(l)))
:$put =string(sort(l))
:$put =string(reverse(sort(l)))
:$put =string(sort(reverse(sort(l))))
+:$put =string(uniq(sort(l)))
:"
:" splitting a string to a List
:$put =string(split(' aa bb '))
diff -r d2ef98a43b5d src/testdir/test55.ok
--- a/src/testdir/test55.ok Mon Mar 24 19:44:09 2014 +0100
+++ b/src/testdir/test55.ok Tue Mar 25 16:49:48 2014 +0200
@@ -94,11 +94,13 @@
caught a:000[2]
caught a:000[3]
[1, 2, [3, 9, 5, 6], {'a': 12, '5': 8}]
-['x8', [0, 1, 2], 'foo6', 'foo', 4, 'xaaa', 2, 'A11', '-0']
-['x8', [0, 1, 2], 'foo6', 'foo', 4, 'xaaa', 2, 'A11', '-0']
-['-0', 'A11', 'foo', 'foo6', 'x8', 'xaaa', 2, 4, [0, 1, 2]]
-[[0, 1, 2], 4, 2, 'xaaa', 'x8', 'foo6', 'foo', 'A11', '-0']
-['-0', 'A11', 'foo', 'foo6', 'x8', 'xaaa', 2, 4, [0, 1, 2]]
+['-0', 'A11', 2, 'xaaa', 4, 'foo', 'foo6', 'foo', [0, 1, 2], 'x8', [0, 1, 2],
1.5]
+[1.5, [0, 1, 2], 'x8', [0, 1, 2], 'foo', 'foo6', 'foo', 4, 'xaaa', 2, 2,
'A11', '-0']
+[1.5, [0, 1, 2], 'x8', [0, 1, 2], 'foo', 'foo6', 'foo', 4, 'xaaa', 2, 2,
'A11', '-0']
+['-0', 'A11', 'foo', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 2, 4, [0, 1, 2], [0,
1, 2]]
+[[0, 1, 2], [0, 1, 2], 4, 2, 2, 1.5, 'xaaa', 'x8', 'foo6', 'foo', 'foo',
'A11', '-0']
+['-0', 'A11', 'foo', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 2, 4, [0, 1, 2], [0,
1, 2]]
+['-0', 'A11', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 4, [0, 1, 2]]
['aa', 'bb']
['aa', 'bb']
['', 'aa', 'bb', '']