jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1092273?usp=email )

Change subject: Enhance searching the item number from the ISBN number
......................................................................

Enhance searching the item number from the ISBN number

Get list of Wikidata items (more efficient; more details available).
Enhanced error messages.
Show the required isbnlib libraries.

Bug: T314942
Change-Id: I6f34e5757fa6e2dd06adb31d9d6136a3e7846c6e
Signed-off-by: xqt <i...@gno.de>
---
M scripts/create_isbn_edition.py
1 file changed, 112 insertions(+), 80 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified




diff --git a/scripts/create_isbn_edition.py b/scripts/create_isbn_edition.py
index fca0e41..1a33d05 100755
--- a/scripts/create_isbn_edition.py
+++ b/scripts/create_isbn_edition.py
@@ -171,8 +171,8 @@

     .. code:: shell

-        pip install isbnlib-bol
         pip install isbnlib-bnf
+        pip install isbnlib-bol
         pip install isbnlib-dnb
         pip install isbnlib-kb
         pip install isbnlib-loc
@@ -245,7 +245,25 @@

       .. seealso:: See :pylib:`venv` for more information about virtual
          environments.
+      .. note:: If you believe this is a mistake, please contact your
+         Python installation or OS distribution provider. You can
+         override this, at the risk of breaking your Python installation
+         or OS, by passing ``--break-system-packages`` to ``pip``.
+      .. hint:: See :pep:`668` for the detailed specification.

+      You need to install a local python environment:
+
+      - https://pip.pypa.io/warnings/venv
+      - :python:`tutorial/venv`
+
+      .. code-block:: bash
+
+         sudo -s
+         apt install python3-full
+         python3 -m venv /opt/python
+         /opt/python/bin/pip install pywikibot
+         /opt/python/bin/pip install isbnlib-kb
+         /opt/python/bin/python ../userscripts/create_isbn_edition.py kb

 **Environment:**
     The python script can run on the following platforms:
@@ -471,7 +489,7 @@
     AFTERWORDBYPROP,
 }

-# Profession author required
+# Profession author instances
 author_profession = {
     AUTHORINSTANCE,
     ILLUSTRATORINSTANCE,
@@ -496,23 +514,31 @@
 # You can better run the script repeatedly with difference library sources.
 # Content and completeness differs amongst libraryies.
 bib_source = {
-    # database ID - item number - label - default language
-    'bnf': ('Q193563', 'Catalogue General (France)', 'fr'),
-    'bol': ('Q609913', 'Bol.Com', 'en'),
-    'dnb': ('Q27302', 'Deutsche National Library', 'de'),
-    'goob': ('Q206033', 'Google Books', 'en'),
+    # database ID: item number, label, default language, package
+    'bnf': ('Q193563', 'Catalogue General (France)', 'fr', 'isbnlib-bnf'),
+    'bol': ('Q609913', 'Bol.Com', 'en', 'isbnlib-bol'),
+    'dnb': ('Q27302', 'Deutsche National Library', 'de', 'isbnlib-dnb'),
+    'goob': ('Q206033', 'Google Books', 'en', 'isbnlib'),  # lib
     # A (paying) api key is needed
-    'isbndb': ('Q117793433', 'isbndb.com', 'en'),
-    'kb': ('Q1526131', 'Koninklijke Bibliotheek (Nederland)', 'nl'),
+    'isbndb': ('Q117793433', 'isbndb.com', 'en', 'isbnlib'),
+    'kb': ('Q1526131', 'Koninklijke Bibliotheek (Nederland)', 'nl',
+           'isbnlib-kb'),
     # Not implemented in Belgium
-    # 'kbr': ('Q383931', 'Koninklijke Bibliotheek (België)', 'nl'),
-    'loc': ('Q131454', 'Library of Congress (US)', 'en'),
-    'mcues': ('Q750403', 'Ministerio de Cultura (Spain)', 'es'),
-    'openl': ('Q1201876', 'OpenLibrary.org', 'en'),
-    'porbase': ('Q51882885', 'Portugal (urn.porbase.org)', 'pt'),
-    'sbn': ('Q576951', 'Servizio Bibliotecario Nazionale (Italië)', 'it'),
-    'wiki': ('Q121093616', 'Wikipedia.org', 'en'),
-    'worldcat': ('Q76630151', 'WorldCat (worldcat2)', 'en'),
+    # 'kbr': ('Q383931', 'Koninklijke Bibliotheek (België)', 'nl', 'isbnlib'),
+    'loc': ('Q131454', 'Library of Congress (US)', 'en', 'isbnlib-loc'),
+    'mcues': ('Q750403', 'Ministerio de Cultura (Spain)', 'es',
+              'isbnlib-mcues'),
+    'openl': ('Q1201876', 'OpenLibrary.org', 'en', 'isbnlib'),  # lib
+    'porbase': ('Q51882885', 'Portugal (urn.porbase.org)', 'pt',
+                'isbnlib-porbase'),
+    'sbn': ('Q576951', 'Servizio Bibliotecario Nazionale (Italië)', 'it',
+            'isbnlib-sbn'),
+    'wiki': ('Q121093616', 'Wikipedia.org', 'en', 'isbnlib'),   # lib
+    'worldcat': ('Q76630151', 'WorldCat (worldcat2)', 'en',
+                 'isbnlib-worldcat2'),
+    # isbnlib-oclc
+    # https://github.com/swissbib
+    # others to be added
 }

 # Remap obsolete or non-standard language codes
@@ -742,7 +768,7 @@


 def get_item_list(item_name: str,
-                  instance_id: str | set[str] | list[str]) -> list[str]:
+                  instance_id: str | set[str] | list[str]) -> set[str]:
     """Get list of items by name, belonging to an instance (list).

     Normally there should have one single best match. The caller should
@@ -753,7 +779,7 @@

     :param item_name: Item name (case sensitive)
     :param instance_id: Instance ID
-    :return: Set of items (Q-numbers)
+    :return: Set of items
     """
     pywikibot.debug(f'Search label: {item_name.encode("utf-8")}')
     item_list = set()  # Empty set
@@ -796,15 +822,14 @@
             for lang in item.aliases:
                 for seq in item.aliases[lang]:
                     if item_name_canon == unidecode(seq).casefold():
-                        item_list.add(item.getID())  # Alias match
+                        item_list.add(item)  # Alias match
                         break

     pywikibot.log(item_list)
-    # Convert set to list
-    return list(item_list)
+    return item_list


-def get_item_with_prop_value(prop: str, propval: str) -> list[str]:
+def get_item_with_prop_value(prop: str, propval: str) -> set[str]:
     """Get list of items that have a property/value statement.

     .. seealso:: :api:`Search`
@@ -842,12 +867,11 @@

             for seq in item.claims[prop]:
                 if unidecode(seq.getTarget()).casefold() == item_name_canon:
-                    item_list.add(item.getID())  # Found match
+                    item_list.add(item)  # Found match
                     break

-    # Convert set to list
     pywikibot.log(item_list)
-    return sorted(item_list)
+    return item_list


 def amend_isbn_edition(isbn_number: str) -> int:
@@ -869,8 +893,11 @@
     if not isbn_number:
         return 3  # Do nothing when the ISBN number is missing

+    pywikibot.info()
+
     # Some digital library services raise failure
     try:
+        # Get ISBN basic data
         isbn_data = isbnlib.meta(isbn_number, service=booklib)
         # {
         #     'ISBN-13': '9789042925564',
@@ -910,8 +937,9 @@
     global proptyx
     # targetx is not global (to allow for language specific editions)

-    # Get the book language from the ISBN book reference
-    booklang = mainlang  # Default language
+    # Set default language from book library
+    # Mainlang was set to default digital library language code
+    booklang = mainlang
     if isbn_data['Language']:
         # Get the book language from the ISBN book number
         # Can overwrite the default language
@@ -925,8 +953,7 @@
     lang_list = get_item_list(booklang, propreqinst[EDITIONLANGPROP])

     # Hardcoded parameter
-    if 'Q3504110' in lang_list:  # Somebody insisted on this disturbing value
-        lang_list.remove('Q3504110')  # Remove duplicate "En" language
+    lang_list -= {'Q3504110'}  # Remove duplicate "En" language

     if not lang_list:
         # Can' t store unknown language (need to update mapping table...)
@@ -935,20 +962,19 @@

     if len(lang_list) != 1:
         # Ambiguous language
-        pywikibot.warning(f'Ambiguous language {booklang}')
+        pywikibot.warning(f'Ambiguous language {booklang}\n'
+                          f'[lang_item.getID() for lang_item in lang_list]')
         return 3

     # Set edition language item number
-    target[EDITIONLANGPROP] = lang_list[0]
+    lang_item = lang_list.pop()
+    target[EDITIONLANGPROP] = lang_item.getID()

     # Require short Wikipedia language code
     if len(booklang) > 3:
-        # Get best ranked language item
-        lang = get_item_page(lang_list[0])
-
         # Get official language code
-        if WIKILANGPROP in lang.claims:
-            booklang = lang.claims[WIKILANGPROP][0].getTarget()
+        if WIKILANGPROP in lang_item.claims:
+            booklang = lang_item.claims[WIKILANGPROP][0].getTarget()

     # Get edition title
     edition_title = isbn_data['Title'].strip()
@@ -982,7 +1008,7 @@

     # Search the ISBN number both in canonical and numeric format
     qnumber_list = get_item_with_prop_value(ISBNPROP, isbn_fmtd)
-    qnumber_list += get_item_with_prop_value(ISBNPROP, isbn_number)
+    qnumber_list.update(get_item_with_prop_value(ISBNPROP, isbn_number))

     # Get addional data from the digital library
     # This could fail with
@@ -1023,27 +1049,24 @@
             if isbn10_number:
                 isbn10_fmtd = isbnlib.mask(isbn10_number)
                 pywikibot.info(f'ISBN 10: {isbn10_fmtd}')
-                qnumber_list += get_item_with_prop_value(ISBN10PROP,
-                                                         isbn10_fmtd)
-                qnumber_list += get_item_with_prop_value(ISBN10PROP,
-                                                         isbn10_number)
+                qnumber_list.update(
+                    get_item_with_prop_value(ISBN10PROP, isbn10_fmtd))
+                qnumber_list.update(
+                    get_item_with_prop_value(ISBN10PROP, isbn10_number))
         except Exception as error:
             pywikibot.error(f'ISBN 10 error, {error}')

-    qnumber_list = sorted(set(qnumber_list))  # Get unique values
-
     # Create or amend the item

     if not qnumber_list:
         # Create the edition
         label = {MULANG: objectname}
         item = pywikibot.ItemPage(repo)  # Create item
-        item.editEntity({'labels': label}, summary=transcmt)
+        item.editLabels(label, summary=transcmt, bot=wdbotflag)
         qnumber = item.getID()  # Get new item number
         status = 'Created'
     elif len(qnumber_list) == 1:
-        qnumber = qnumber_list[0]
-        item = get_item_page(qnumber)
+        item = qnumber_list.pop()
         qnumber = item.getID()

         # Update item only if edition, or instance is missing
@@ -1057,16 +1080,18 @@
         # Add missing book label for book language
         if MULANG not in item.labels:
             item.labels[MULANG] = objectname
-            item.editEntity({'labels': item.labels}, summary=transcmt)
+            item.editLabels(item.labels, summary=transcmt, bot=wdbotflag)
         status = 'Found'
     else:
-        pywikibot.error(f'Ambiguous ISBN number {isbn_fmtd}, '
-                        f'{qnumber_list} not updated')
+        pywikibot.error(
+            f'Ambiguous ISBN number {isbn_fmtd}, '
+            f'{[item.getID() for item in qnumber_list]} not updated'
+        )
         return 2

-    pywikibot.warning(f'{status} item: P212:{isbn_fmtd} ({qnumber}) '
+    pywikibot.warning(f'{status} item {qnumber}: P212: {isbn_fmtd} '
                       f'language {booklang} ({target[EDITIONLANGPROP]}) '
-                      f'{get_item_header_lang(item.labels, booklang)}')
+                      f'{objectname}')

     # Register missing statements
     pywikibot.debug(target)
@@ -1178,7 +1203,7 @@

         if len(author_list) == 1:
             add_author = True
-            author_item = get_item_page(author_list[0])
+            author_item = author_list.pop()

             if (PROFESSIONPROP not in author_item.claims
                     or not item_is_in_list(author_item.claims[PROFESSIONPROP],
@@ -1186,14 +1211,10 @@
                 # Add profession:author statement
                 claim = pywikibot.Claim(repo, PROFESSIONPROP)
                 claim.setTarget(target_author)
-                author_item.addClaim(
-                    claim, bot=wdbotflag,
-                    summary=f'{transcmt} {PROFESSIONPROP}:{AUTHORINSTANCE}')
-                pywikibot.warning(
-                    'Add profession:author '
-                    f'({PROFESSIONPROP}:{AUTHORINSTANCE}) to '
-                    f'{author_name} ({author_list[0]})'
-                )
+                author_item.addClaim(claim, bot=wdbotflag, summary=transcmt)
+                pywikibot.warning('Add profession: author '
+                                  f'({PROFESSIONPROP}:{AUTHORINSTANCE}) to '
+                                  f'{author_name} ({author_item.getID()})')

             # Possibly found as author?
             # Possibly found as editor?
@@ -1204,7 +1225,7 @@

                 for claim in item.claims[prop]:
                     book_author = claim.getTarget()
-                    if book_author.getID() == author_list[0]:
+                    if book_author == author_item:
                         # Add missing sequence number
                         if SEQNRPROP not in claim.qualifiers:
                             qualifier = pywikibot.Claim(repo, SEQNRPROP)
@@ -1227,35 +1248,43 @@
                 claim.setTarget(author_item)
                 item.addClaim(claim, bot=wdbotflag, summary=transcmt)
                 pywikibot.warning(f'Add author {author_cnt}:{author_name} '
-                                  f'({AUTHORPROP}:{author_list[0]})')
+                                  f'({AUTHORPROP}:{author_item.getID()})')

                 # Add sequence number
                 qualifier = pywikibot.Claim(repo, SEQNRPROP)
                 qualifier.setTarget(str(author_cnt))
                 claim.addQualifier(qualifier, bot=wdbotflag, summary=transcmt)
         elif author_list:
-            pywikibot.error(f'Ambiguous author: {author_name}')
+            pywikibot.error(
+                f'Ambiguous author: {author_name}'
+                f'({[author_item.getID() for author_item in author_list]})'
+            )
         else:
             pywikibot.error(f'Unknown author: {author_name}')

     # Set the publisher
     publisher_name = isbn_data['Publisher'].strip()
     if publisher_name:
-        publisher_list = get_item_list(get_canon_name(publisher_name),
+        publisher_list = get_item_list(publisher_name,
                                        propreqinst[PUBLISHERPROP])

         if len(publisher_list) == 1:
+            publisher_item = publisher_list.pop()
             if (PUBLISHERPROP not in item.claims
                     or not item_is_in_list(item.claims[PUBLISHERPROP],
-                                           publisher_list)):
+                                           [publisher_item.getID()])):
                 claim = pywikibot.Claim(repo, PUBLISHERPROP)
-                claim.setTarget(get_item_page(publisher_list[0]))
+                claim.setTarget(publisher_item)
                 item.addClaim(claim, bot=wdbotflag, summary=transcmt)
-                pywikibot.warning(f'Add publisher: {publisher_name} '
-                                  f'({PUBLISHERPROP}:{publisher_list[0]})')
+                pywikibot.warning(
+                    f'Add publisher: {publisher_name} '
+                    f'({PUBLISHERPROP}:{publisher_item.getID()})'
+                )
         elif publisher_list:
-            pywikibot.error(f'Ambiguous publisher: {publisher_name} '
-                            f'({publisher_list})')
+            pywikibot.error(
+                f'Ambiguous publisher: {publisher_name} '
+                f'({[p_item.getID() for p_item in publisher_list]})'
+            )
         else:
             pywikibot.error(f'Unknown publisher: {publisher_name}')

@@ -1432,19 +1461,18 @@

             if len(qmain_subject) == 1:
                 # Get main subject and label
-                main_subject = get_item_page(qmain_subject[0])
-                main_subject_label = get_item_header(main_subject.labels)
+                main_subject_label = get_item_header(qmain_subject[0].labels)

                 if (MAINSUBPROP in item.claims
-                        and item_is_in_list(item.claims[MAINSUBPROP],
-                                            qmain_subject)):
+                    and item_is_in_list(item.claims[MAINSUBPROP],
+                                        [qmain_subject[0].getID()])):
                     pywikibot.log(
                         f'Skipping main subject ({MAINSUBPROP}): '
                         f'{main_subject_label} ({qmain_subject[0]})'
                     )
                 else:
                     claim = pywikibot.Claim(repo, MAINSUBPROP)
-                    claim.setTarget(main_subject)
+                    claim.setTarget(qmain_subject[0])
                     # Add main subject
                     item.addClaim(claim, bot=wdbotflag, summary=transcmt)
                     pywikibot.warning(
@@ -1479,7 +1507,8 @@
     if isbn_info:
         pywikibot.info(isbn_info)

-    # DOI number
+    # DOI number -- No warranty that the document number really exists on
+    # https:/doi.org
     isbn_doi = isbnlib.doi(isbn_number)
     if isbn_doi:
         pywikibot.info(isbn_doi)
@@ -1588,6 +1617,7 @@
                    for seq in bib_source}

     if booklib in bib_sourcex:
+        # Register source
         references = pywikibot.Claim(repo, REFPROP)
         references.setTarget(bib_sourcex[booklib])

@@ -1596,13 +1626,15 @@
         retrieved.setTarget(date_ref)
         booklib_ref = [references, retrieved]

-        # Register source and retrieval date
+        # Get default language from book library
         mainlang = bib_source[booklib][2]
     else:
         # Unknown bib reference - show implemented codes
         for seq in bib_source:
-            pywikibot.info(f'{seq.ljust(10)}{bib_source[seq][2].ljust(4)}'
-                           f'{bib_source[seq][1]}')
+            pywikibot.info(
+                f'{seq.ljust(10)}{bib_source[seq][2].ljust(4)}'
+                f'{bib_source[seq][3].ljust(20)}{bib_source[seq][1]}'
+            )
         fatal_error(3, f'Unknown Digital library ({REFPROP}) {booklib}')

     # Get optional parameters (all are optional)

--
To view, visit 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1092273?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I6f34e5757fa6e2dd06adb31d9d6136a3e7846c6e
Gerrit-Change-Number: 1092273
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <i...@gno.de>
Gerrit-Reviewer: Xqt <i...@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Geertivp <geert...@gmail.com>
_______________________________________________
Pywikibot-commits mailing list -- pywikibot-commits@lists.wikimedia.org
To unsubscribe send an email to pywikibot-commits-le...@lists.wikimedia.org

Reply via email to