On 21 March 2014, Cade Forester <[email protected]> wrote:
> > How about separating it from sort()? That is, an uniq()
> > function that would remove duplicates from an already sorted list
> > (results would be undefined if the input list is not sorted). That
> > would be consistent with how uniq(1) works on UNIX.
>
> This patch add uniq() function. uniq(list) will remove copies of
> repeated adjacent items
There is a problem with this patch: it removes the duplicates on the
fly, so if the comparison function fails on the second or subsequent
call the list is still modified. In contrast, sort() restores the list
to the initial state if the comparison fails at some point.
I'm attaching bellow my attempt to a fix. I also merged f_sort()
and f_uniq() in a single function, and made some minor optimisations.
On a related topic: adding an efficient "stable unique" would be
relatively straightforward too, using plain red-black trees. Now,
red-black trees are implemented as a header (namely sys/tree.h) on *BSD,
but not on other systems. Any comment on the preferred way to handle
this? I'd suggest testing for the existence of sys/tree.h in autoconf,
and also including a stripped down copy of it from, say, OpenBSD, for
the systems that don't have it.
/lcd
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
diff -r d2ef98a43b5d runtime/doc/change.txt
--- a/runtime/doc/change.txt Mon Mar 24 19:44:09 2014 +0100
+++ b/runtime/doc/change.txt Tue Mar 25 01:28:04 2014 +0200
@@ -1650,7 +1650,7 @@
7. Sorting text *sorting*
Vim has a sorting function and a sorting command. The sorting function can be
-found here: |sort()|.
+found here: |sort()|, |uniq()|.
*:sor* *:sort*
:[range]sor[t][!] [i][u][r][n][x][o] [/{pattern}/]
diff -r d2ef98a43b5d runtime/doc/eval.txt
--- a/runtime/doc/eval.txt Mon Mar 24 19:44:09 2014 +0100
+++ b/runtime/doc/eval.txt Tue Mar 25 01:28:04 2014 +0200
@@ -327,6 +327,7 @@
Changing the order of items in a list: >
:call sort(list) " sort a list alphabetically
:call reverse(list) " reverse the order of items
+ :call uniq(sort(list)) " sort and remove duplicates
For loop ~
@@ -2005,6 +2006,8 @@
type( {name}) Number type of variable {name}
undofile( {name}) String undo file name for {name}
undotree() List undo file tree
+uniq( {list} [, {func} [, {dict}]])
+ List remove adjacent duplicates
values( {dict}) List values in {dict}
virtcol( {expr}) Number screen column of cursor or mark
visualmode( [expr]) String last visual mode used
@@ -6169,6 +6172,14 @@
blocks. Each item may again have an "alt"
item.
+uniq({list} [, {func} [, {dict}]]) *uniq()* *E702*
+ Remove second and succeeding copies of repeated adjacent
+ {list} items in-place. Returns {list}. If you want a list
+ to remain unmodified make a copy first: >
+ :let newlist = uniq(copy(mylist))
+< Compare function uses the string representation of each item.
+ For the use of {func} and {dict} see |sort()|.
+
values({dict}) *values()*
Return a |List| with all the values of {dict}. The |List| is
in arbitrary order.
diff -r d2ef98a43b5d runtime/doc/usr_41.txt
--- a/runtime/doc/usr_41.txt Mon Mar 24 19:44:09 2014 +0100
+++ b/runtime/doc/usr_41.txt Tue Mar 25 01:28:04 2014 +0200
@@ -623,6 +623,7 @@
map() change each List item
sort() sort a List
reverse() reverse the order of a List
+ uniq() remove copies of repeated adjacent items
split() split a String into a List
join() join List items into a String
range() return a List with a sequence of numbers
diff -r d2ef98a43b5d runtime/doc/version7.txt
--- a/runtime/doc/version7.txt Mon Mar 24 19:44:09 2014 +0100
+++ b/runtime/doc/version7.txt Tue Mar 25 01:28:04 2014 +0200
@@ -942,6 +942,7 @@
|tagfiles()| List with tags file names
|taglist()| get list of matching tags (Yegappan Lakshmanan)
|tr()| translate characters (Ron Aaron)
+|uniq()| remove copies of repeated adjacent list items
|values()| get List of Dictionary values
|winnr()| takes an argument: what window to use
|winrestview()| restore the view of the current window
diff -r d2ef98a43b5d src/eval.c
--- a/src/eval.c Mon Mar 24 19:44:09 2014 +0100
+++ b/src/eval.c Tue Mar 25 01:28:04 2014 +0200
@@ -744,6 +744,7 @@
static void f_type __ARGS((typval_T *argvars, typval_T *rettv));
static void f_undofile __ARGS((typval_T *argvars, typval_T *rettv));
static void f_undotree __ARGS((typval_T *argvars, typval_T *rettv));
+static void f_uniq __ARGS((typval_T *argvars, typval_T *rettv));
static void f_values __ARGS((typval_T *argvars, typval_T *rettv));
static void f_virtcol __ARGS((typval_T *argvars, typval_T *rettv));
static void f_visualmode __ARGS((typval_T *argvars, typval_T *rettv));
@@ -8150,6 +8151,7 @@
{"type", 1, 1, f_type},
{"undofile", 1, 1, f_undofile},
{"undotree", 0, 0, f_undotree},
+ {"uniq", 1, 3, f_uniq},
{"values", 1, 1, f_values},
{"virtcol", 1, 1, f_virtcol},
{"visualmode", 0, 1, f_visualmode},
@@ -17023,10 +17025,11 @@
static char_u *item_compare_func;
static dict_T *item_compare_selfdict;
static int item_compare_func_err;
+static int (*item_compare_func_ptr)__ARGS((const void *, const void *s2));
#define ITEM_COMPARE_FAIL 999
/*
- * Compare functions for f_sort() below.
+ * Compare functions for f_sort() and f_uniq() below.
*/
static int
#ifdef __BORLANDC__
@@ -17100,9 +17103,10 @@
* "sort({list})" function
*/
static void
-f_sort(argvars, rettv)
- typval_T *argvars;
- typval_T *rettv;
+do_sort_uniq(argvars, rettv, sort)
+ typval_T *argvars;
+ typval_T *rettv;
+ int sort;
{
list_T *l;
listitem_T *li;
@@ -17163,29 +17167,63 @@
ptrs = (listitem_T **)alloc((int)(len * sizeof(listitem_T *)));
if (ptrs == NULL)
return;
+
i = 0;
- for (li = l->lv_first; li != NULL; li = li->li_next)
- ptrs[i++] = li;
-
- item_compare_func_err = FALSE;
- /* test the compare function */
- if (item_compare_func != NULL
- && item_compare2((void *)&ptrs[0], (void *)&ptrs[1])
- == ITEM_COMPARE_FAIL)
- EMSG(_("E702: Sort compare function failed"));
- else
- {
- /* Sort the array with item pointers. */
- qsort((void *)ptrs, (size_t)len, sizeof(listitem_T *),
- item_compare_func == NULL ? item_compare : item_compare2);
+ if (sort)
+ {
+ /* f_sort(): ptr will be the sorted list */
+ for (li = l->lv_first; li != NULL; li = li->li_next)
+ ptrs[i++] = li;
+
+ item_compare_func_err = FALSE;
+ /* test the compare function */
+ if (item_compare_func != NULL
+ && item_compare2((void *)&ptrs[0], (void *)&ptrs[1])
+ ==
ITEM_COMPARE_FAIL)
+ EMSG(_("E702: Sort compare function failed"));
+ else
+ {
+ /* Sort the array with item pointers. */
+ qsort((void *)ptrs, (size_t)len, sizeof(listitem_T *),
+ item_compare_func == NULL ? item_compare :
item_compare2);
+
+ if (!item_compare_func_err)
+ {
+ /* Clear the List and append the items in the sorted order.
*/
+ l->lv_first = l->lv_last = l->lv_idx_item = NULL;
+ l->lv_len = 0;
+ for (i = 0; i < len; ++i)
+ list_append(l, ptrs[i]);
+ }
+ }
+ }
+ else
+ {
+ /* f_uniq(): ptr will be a stack of items to remove */
+ item_compare_func_err = FALSE;
+ item_compare_func_ptr = item_compare_func ? item_compare2 :
item_compare;
+
+ for(li = l->lv_first; li != NULL && li->li_next != NULL; li =
li->li_next)
+ {
+ if (item_compare_func_ptr((void *)&li, (void *)&li->li_next) ==
0)
+ ptrs[i++] = li;
+ if (item_compare_func_err)
+ {
+ EMSG(_("E702: Uniq compare function failed"));
+ break;
+ }
+ }
if (!item_compare_func_err)
{
- /* Clear the List and append the items in the sorted order. */
- l->lv_first = l->lv_last = l->lv_idx_item = NULL;
- l->lv_len = 0;
- for (i = 0; i < len; ++i)
- list_append(l, ptrs[i]);
+ while (i-- > 0)
+ {
+ li = ptrs[i]->li_next;
+ ptrs[i]->li_next = li->li_next;
+ list_fix_watch(l, li);
+ listitem_free(li);
+ l->lv_len--;
+ }
}
}
@@ -17194,6 +17232,28 @@
}
/*
+ * "sort({list})" function
+ */
+ static void
+f_sort(argvars, rettv)
+ typval_T *argvars;
+ typval_T *rettv;
+{
+ do_sort_uniq(argvars, rettv, 1);
+}
+
+/*
+ * "uniq({list})" function
+ */
+ static void
+f_uniq(argvars, rettv)
+ typval_T *argvars;
+ typval_T *rettv;
+{
+ do_sort_uniq(argvars, rettv, 0);
+}
+
+/*
* "soundfold({word})" function
*/
static void
diff -r d2ef98a43b5d src/testdir/test55.in
--- a/src/testdir/test55.in Mon Mar 24 19:44:09 2014 +0100
+++ b/src/testdir/test55.in Tue Mar 25 01:28:04 2014 +0200
@@ -323,13 +323,15 @@
: $put ='caught ' . v:exception
:endtry
:"
-:" reverse() and sort()
-:let l = ['-0', 'A11', 2, 'xaaa', 4, 'foo', 'foo6', [0, 1, 2], 'x8']
+:" reverse(), sort(), uniq()
+:let l = ['-0', 'A11', 2, 2, 'xaaa', 4, 'foo', 'foo6', 'foo', [0, 1, 2], 'x8',
[0, 1, 2], 1.5]
+:$put =string(uniq(copy(l)))
:$put =string(reverse(l))
:$put =string(reverse(reverse(l)))
:$put =string(sort(l))
:$put =string(reverse(sort(l)))
:$put =string(sort(reverse(sort(l))))
+:$put =string(uniq(sort(l)))
:"
:" splitting a string to a List
:$put =string(split(' aa bb '))
diff -r d2ef98a43b5d src/testdir/test55.ok
--- a/src/testdir/test55.ok Mon Mar 24 19:44:09 2014 +0100
+++ b/src/testdir/test55.ok Tue Mar 25 01:28:04 2014 +0200
@@ -94,11 +94,13 @@
caught a:000[2]
caught a:000[3]
[1, 2, [3, 9, 5, 6], {'a': 12, '5': 8}]
-['x8', [0, 1, 2], 'foo6', 'foo', 4, 'xaaa', 2, 'A11', '-0']
-['x8', [0, 1, 2], 'foo6', 'foo', 4, 'xaaa', 2, 'A11', '-0']
-['-0', 'A11', 'foo', 'foo6', 'x8', 'xaaa', 2, 4, [0, 1, 2]]
-[[0, 1, 2], 4, 2, 'xaaa', 'x8', 'foo6', 'foo', 'A11', '-0']
-['-0', 'A11', 'foo', 'foo6', 'x8', 'xaaa', 2, 4, [0, 1, 2]]
+['-0', 'A11', 2, 'xaaa', 4, 'foo', 'foo6', 'foo', [0, 1, 2], 'x8', [0, 1, 2],
1.5]
+[1.5, [0, 1, 2], 'x8', [0, 1, 2], 'foo', 'foo6', 'foo', 4, 'xaaa', 2, 2,
'A11', '-0']
+[1.5, [0, 1, 2], 'x8', [0, 1, 2], 'foo', 'foo6', 'foo', 4, 'xaaa', 2, 2,
'A11', '-0']
+['-0', 'A11', 'foo', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 2, 4, [0, 1, 2], [0,
1, 2]]
+[[0, 1, 2], [0, 1, 2], 4, 2, 2, 1.5, 'xaaa', 'x8', 'foo6', 'foo', 'foo',
'A11', '-0']
+['-0', 'A11', 'foo', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 2, 4, [0, 1, 2], [0,
1, 2]]
+['-0', 'A11', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 4, [0, 1, 2]]
['aa', 'bb']
['aa', 'bb']
['', 'aa', 'bb', '']