Re: fulltext sorting in bfe_fulltext

Samuele Kaplun Thu, 28 Jan 2010 10:04:22 +0100

Hi Theodoros,

In data martedì 26 gennaio 2010 13:34:18, Theodoros Theodoropoulos ha scritto:
> Hi to all,
> 
> I have an issue where for all records with several fulltexts, in
> detailed view, the links are displayed in non-alphabetic order.
> 
> For example, a record with the following MARC fields:
> 8564_ $$uhttp://invenio.lib.auth.gr/record/103094/files/arc-2008-40800.pdf
> 8564_
> $$uhttp://invenio.lib.auth.gr/record/103094/files/arc-2008-40800_001.pdf$$y
> arc-2008-40800_001.pdf 8564_
> $$uhttp://invenio.lib.auth.gr/record/103094/files/arc-2008-40800_002.pdf$$y
> arc-2008-40800_002.pdf 8564_
> $$uhttp://invenio.lib.auth.gr/record/103094/files/arc-2008-40800_003.pdf$$y
> arc-2008-40800_003.pdf 8564_
> $$uhttp://invenio.lib.auth.gr/record/103094/files/arc-2008-40800_004.pdf$$y
> arc-2008-40800_004.pdf
> 
> Is displayed (both in detailed HTML and brief HTML) in this order:
> 
> arc-2008-40800_002.pdf: arc-2008-40800_002 - PDF
> Fulltext: arc-2008-40800 - PDF
> arc-2008-40800_003.pdf: arc-2008-40800_003 - PDF
> arc-2008-40800_001.pdf: arc-2008-40800_001 - PDF
> arc-2008-40800_004.pdf: arc-2008-40800_004 - PDF
> 
> I've checked bfe_fulltext.py and it seems that the only sorting function
> is the following: urls.sort(lambda (url1, name1, format1), (url2, name2,
> format2): url1 < url2 and -1 or url1 > url2 and 1 or 0)
> The above algorithm seems to work for dummy strings, but for some
> reason, it doesn't work for my records...
> 
> Any ideas?
> 
> Best regards,
> Theodoropoulos Theodoros


you are right. Actually the sorting of fulltext files is currently not well 
implemented. Here is a proposal for a patch that you can apply to 
bfe_fulltext.py and bfe_fulltext_mini.py, in order to have URLs first sorted 
WRT the description and then WRT the actual name, using alphanumeric sorting 
(i.e. "foo1" < "foo10" < "foo11").

Let us know if this improves your issues.

Best regards,
        Samuele

-- 
Samuele Kaplun ** CERN Document Server ** <http://cds.cern.ch/>

From 7bda5cdfaf2cf57bdd2fa729282c27156a62d8f8 Mon Sep 17 00:00:00 2001
From: Samuele Kaplun <[email protected]>
Date: Thu, 28 Jan 2010 10:01:15 +0100
Subject: [PATCH] BibFormat: improved sorting of fulltextes in bfe_fulltex

---
 modules/bibformat/lib/elements/bfe_fulltext.py     |   19 ++++++++++++++++---
 .../bibformat/lib/elements/bfe_fulltext_mini.py    |   14 ++++++++++----
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/modules/bibformat/lib/elements/bfe_fulltext.py b/modules/bibformat/lib/elements/bfe_fulltext.py
index 277e9f7..a249343 100644
--- a/modules/bibformat/lib/elements/bfe_fulltext.py
+++ b/modules/bibformat/lib/elements/bfe_fulltext.py
@@ -22,6 +22,7 @@
 """
 __revision__ = "$Id$"
 
+import re
 from invenio.bibdocfile import BibRecDocs, file_strip_ext
 from invenio.messages import gettext_set_language
 from invenio.config import CFG_SITE_URL, CFG_CERN_SITE
@@ -77,12 +78,18 @@ def format(bfo, style, separator='; ', show_icons='no'):
 
     if main_urls:
         last_name = ""
-        for descr, urls in main_urls.items():
+        main_urls_keys = sort_alphanumerically(main_urls.keys())
+        for descr in main_urls_keys:
+            urls = main_urls[descr]
             out += "<strong>%s:</strong> " % descr
             url_list = []
-            urls.sort(lambda (url1, name1, format1), (url2, name2, format2): url1 < url2 and -1 or url1 > url2 and 1 or 0)
-
+            ## FIXME: This is so ugly!
+            urls_dict = {}
             for url, name, format in urls:
+                urls_dict[url] = (name, format)
+            urls_dict_keys = sort_alphanumerically(urls_dict.keys())
+            for url in urls_dict_keys:
+                name, format = urls_dict[url]
                 if not name == last_name and len(main_urls) > 1:
                     print_name = "<em>%s</em> - " % name
                 else:
@@ -229,3 +236,9 @@ def get_files(bfo):
                     parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url
 
     return (parsed_urls, old_versions, additionals)
+
+_RE_SPLIT = re.compile(r"\d+|\D+")
+def sort_alphanumerically(elements):
+    elements = [([not token.isdigit() and token or int(token) for token in _RE_SPLIT.findall(element)], element) for element in elements]
+    elements.sort()
+    return [element[1] for element in elements]
diff --git a/modules/bibformat/lib/elements/bfe_fulltext_mini.py b/modules/bibformat/lib/elements/bfe_fulltext_mini.py
index de6a3d3..8610216 100644
--- a/modules/bibformat/lib/elements/bfe_fulltext_mini.py
+++ b/modules/bibformat/lib/elements/bfe_fulltext_mini.py
@@ -22,7 +22,7 @@
 """
 __revision__ = "$Id$"
 
-from invenio.bibformat_elements.bfe_fulltext import get_files
+from invenio.bibformat_elements.bfe_fulltext import get_files, sort_alphanumerically
 from invenio.messages import gettext_set_language
 from invenio.config import CFG_SITE_URL, CFG_CERN_SITE
 from cgi import escape
@@ -73,12 +73,18 @@ def format(bfo, style, separator='; ', show_icons='no'):
             file_icon = ''
 
         last_name = ""
-        for descr, urls in main_urls.items():
+        main_urls_keys = sort_alphanumerically(main_urls.keys())
+        for descr in main_urls_keys:
+            urls = main_urls[descr]
             out += '<div><small class="detailedRecordActions">%s:</small> ' % descr
             url_list = []
-            urls.sort(lambda (url1, name1, format1), (url2, name2, format2): url1 < url2 and -1 or url1 > url2 and 1 or 0)
-
+            ## FIXME: This is so ugly!
+            urls_dict = {}
             for url, name, format in urls:
+                urls_dict[url] = (name, format)
+            urls_dict_keys = sort_alphanumerically(urls_dict.keys())
+            for url in urls_dict_keys:
+                name, format = urls_dict[url]
                 if not name == last_name and len(main_urls) > 1:
                     print_name = "<em>%s</em> - " % name
                 else:
-- 
1.6.3.3

Re: fulltext sorting in bfe_fulltext

Reply via email to