Index - change (labs...vocabulary-index)

Youni Verciti (Code Review) Sat, 12 Mar 2016 07:56:37 -0800

Youni Verciti has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/276974


Change subject: Phase 2 category dpt/Vocabulaire/Index
......................................................................

Phase 2 category dpt/Vocabulaire/Index

Change-Id: I9e89a8b8dcf6f12f960601f9ad587dda9b65e72e
---
M vcb/vcb.py
M vcb/vcbformat.py
2 files changed, 66 insertions(+), 27 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/vocabulary-index 
refs/changes/74/276974/1

diff --git a/vcb/vcb.py b/vcb/vcb.py
index 95cbc0f..fe2dd6e 100755
--- a/vcb/vcb.py
+++ b/vcb/vcb.py
@@ -22,12 +22,15 @@
                     action="store_true")
 args = parser.parse_args()
 cible_unicode = unicode(args.cible, 'utf-8') # Encodage UNICODE pour PWB 
+
 ### EXEC PATHNAME
 lPath = pathname(args.cible, srv)       # pathname avec l'argument et le 
serveur forme la variable lPath
 [path, list_path_elemt, root_name, last_name, nb_path_elemt, list_sections, 
linker] = lPath # dont ceci est la composition
 [class_doc, new_page, sommaire] = linker # avec la composition de linker
 
 ###PYWIKIBOT
+root_name_uni = unicode(root_name, 'utf-8')
+last_name_uni = unicode(last_name, 'utf-8') # UNICODE!
 title = cible_unicode   # Titre reçoit l'argument au format UNICODE
 page = pywikibot.Page(site, title) # PWB variable
 
@@ -101,14 +104,28 @@
   if gen:                            # si l'objet generator existe
     for template in gen:             # pour chaque item du generator
       template_name = template[0]    # Le nom de la pge du modele
-      template_params = template[1] # liste des parametres
+      template_params = template[1]  # liste des parametres
       template_name = str(template_name)
       moTrad = reTrad.search(template_name) # cherche trad dans liste des 
modeles
       if moTrad:
-       lTrad.append(template)      # si trad enregistre dans LISTE TRAD
+       if u'indexation = non' in template_params: # strictement |indexation = 
non|
+         print 'Le modèle suivant est marqué pour ne pas être indexé 
(indexation = non)\n'
+         print title           # implémentation rapide du parametre
+         print template_name   # indexation = non
+         print template_params # utiliser regex pour améliorer
+         pass
+       else:
+         lTrad.append(template)      # si trad enregistre dans LISTE TRAD
       moPron = rePron.search(template_name) # cherche prononciation dans liste 
des modèles
       if moPron:
-       lPron.append(template)      # si pron enregistre dans LISTE PRON
+       if u'indexation = non' in template_params: # strictement |indexation = 
non|
+         print 'Le modèle suivant est marqué pour ne pas être indexé 
(indexation = non)\n'
+         print title           # implémentation rapide du parametre
+         print template_name   # indexation = non
+         print template_params # utiliser regex pour améliorer
+         pass 
+       else:
+         lPron.append(template)      # si pron enregistre dans LISTE PRON
 nbPages = len(list_page)   # Nombre de pages à comparer avec all_pages
 nbPron = len(lPron)        # Nombre de modèles prononciation
 nbTrad = len(lTrad)        # Nombre de modèles traduction
@@ -240,22 +257,22 @@
     print rmk
     # rmv = removeDict[rmk] #?¿
     del finalDict[rmk]    # supprime la clé de finalDict
-  #tplInsideLog = tplInsideLog  + str(rmv)#unicode(rmk, 'utf-8')# +  str(rmv) 
+ '\n'
+  # tplInsideLog = tplInsideLog  + str(rmv)#unicode(rmk, 'utf-8')# +  str(rmv) 
+ '\n'
   # Japonais/Vocabulaire ne reussit pas à convertir rmk et rmv en string ni en 
unicode
   # Portugais indexGlobal plante aussi sur cette ligne
   log = log + tplInsideLog
 
 nb_lines = len(finalDict)   # Le nombre de ligne dans le dictionnaire apres 
nettoyage
-wlp = divdict(finalDict)  # Division en 3 listes Word, Locution, Phrase
+wlp = divdict(finalDict)    # Division en 3 listes Word, Locution, Phrase
 [tupWord, tupLocution, tupPhrase] = wlp # Le tuple contient les 3 listes
 chkword(tupLocution, tupWord)   # Sépare les locutions dont le formatage 
permet le deplacement dans la liste des mots simples
 ### TRAITEMENT DES ARTICLES RECONNUS SELON LANGPACK
-for lang in langPack:  
-  pack = langPack[lang]
+for lang in lang_pack:  
+  pack = lang_pack[lang]
   if lang == rootLang:
     chkarticle(tupLocution, tupWord, pack)
   else:
-    print ' Pas de langPack'
+    print ' Pas de lang_pack'
 ### JOURNALISE TAILLE DES LISTES
 nb_words = len(tupWord)
 nb_locutions = len(tupLocution)
@@ -268,14 +285,14 @@
 phrases_formated = linesans(tupPhrase)
 
 script_name = sys.argv[0]
-writePack = [script_name, all_pages, nb_templates, nb_lines, cible_unicode, 
words_formated, locutions_formated, phrases_formated]
+write_pack = [script_name, all_pages, nb_templates, nb_lines, cible_unicode, 
words_formated, locutions_formated, phrases_formated, root_name_uni, 
last_name_uni]
 print '### Log: ###'
 print log
 if nb_lines < 5:
   print 'Pas suffisament de données pour créer une page. Minimum 5 lignes.'
   print nb_lines
 else:
-  txtin = writelist(writePack)
+  txtin = writelist(write_pack)
   comment = 'Indexation automatique du vocabulaire pour les langues 
étrangères. Youni Verciti Bot'
   if args.test: # MODE TEST SAVE IN LABORATOIRE
     new_page = 'Projet:Laboratoire/Propositions/Index_vocabulaire/vcb '+ 
last_name
@@ -291,17 +308,21 @@
     print 'Pas sauvegardé, exception'
   else:
     print 'Feĺicitation vous avez enregistré une nouvelle page de vocabulaire'
-    title = sommaire     # vérifier existance de dpt/Index vocabulaire
+    #last_name_uni = unicode(last_name, 'utf-8') # UNICODE!
+    title = sommaire                            
     page = pywikibot.Page(site, title)
-    exist = page.exists()
-    if exist:   # Test exist page du sommaire
+    exist = page.exists()                       # vérifie existance du 
sommaire dpt/Index vocabulaire
+    if exist:                                   # La page du sommaire
       print 'Le sommaire existe:' + sommaire    # Hote du lien à créer: 
sommaire
-      last_name_uni = unicode(last_name, 'utf-8') # UNICODE!
+      #last_name_uni = unicode(last_name, 'utf-8') # UNICODE!
       link_generator = page.linkedPages(namespaces=0)   # L'objet PWB
       if link_generator:  # Si le sommaire contient des liens dans l'espace 
principal
        for linked in link_generator:    ### l'objet pagegenerator PWB contient 
des objets page.Page
-         #print linked.title()          ### la syntaxe PWB pour extaire le 
titre UNICODE
+         print linked.title()          ### la syntaxe PWB pour extaire le 
titre UNICODE
          if linked.title() == new_page: # Le lien pour notre nouvelle page 
existe
+           # BUG: Le programme ne voit pas que le lien du chapitre est deja en 
place dans l'exemple suivant:!
+            # [[Portugais/Index vocabulaire/vcb Articles_définis | Vocabulaire 
Articles_définis]]
+            # Le dernier espace est interpré comme une option l'ajout de _ 
provoque une différence...
            print 'Le lien est déja en place dans le sommaire.\nLe programme se 
termine avec succès, actualiser la page <vcb>.'
            exit()
       ## Sortie de boucle le lien n'y est pas le prog se POURSUIT
@@ -317,11 +338,12 @@
       print link_write
     else:   # PAS DE SOMMAIRE Creation du sommaire des sections et du lien
       print 'Création du sommaire des fiches vocabulaire!'
-      link_write =  '\n[[' + new_page + ' | Vocabulaire ' + last_name_uni + 
']]\n'
-      title = sommaire
+      head = u'{{Titre | Index vocabulaire du département ' + root_name_uni 
+'}}'
+      link_write =  u'\n[[' + new_page + ' | Vocabulaire ' + last_name_uni + 
']]\n'
+      category = u'\n[[Catégorie:' + root_name_uni + '/Vocabulaire/Index]]'  #
       page = pywikibot.Page(site, title)
-      comment = u'Création du sommaire, avec le lien vers la nouvelle page de 
vocabulaire.'
-      page.text = insert # EDITE LE TEXTE DE LA PAGE
+      comment = u'Création du sommaire des fiches vocabulaire, avec un premier 
lien.'
+      page.text = head + link_write + category   # EDITE LE TEXTE DE LA PAGE
       page.save(comment)
       
 #time.sleep(15)
diff --git a/vcb/vcbformat.py b/vcb/vcbformat.py
index 3e237fd..ecafac4 100755
--- a/vcb/vcbformat.py
+++ b/vcb/vcbformat.py
@@ -127,14 +127,15 @@
 
 ### ECRITURE DE LA LISTE A PUBLIER
 def writelist(dataPack):
-  [script_name, all_pages, nb_templates, nb_lines, cible_unicode, 
words_formated, locutions_formated, phrases_formated] = dataPack
+  [script_name, all_pages, nb_templates, nb_lines, cible_unicode, 
words_formated, locutions_formated, phrases_formated, root_name_uni, 
last_name_uni] = dataPack
   now = datetime.date.today()   # PASSER AU FORMAT FRANÇAIS
   date = str(now)               # pour écrire la date 
-  head1 = u'{{Entête de fiche}}<small> Liste auto. script: ' + script_name + ' 
- Date: ' + date + ' - ' + ustr(all_pages) + ' pages - '
-  head2 = str(nb_templates) + ' modèles - ' + str(nb_lines) + ' 
lignes.</small><br>'
-  head2 = unicode(head2, 'utf-8')
+  head = u'{{Titre | Index vocabulaire ' + last_name_uni +'}}'
+  source = u'<small> Liste auto. script: ' + script_name + ' - Date: ' + date 
+ ' - ' + ustr(all_pages) + ' pages - '
+  resume = str(nb_templates) + ' modèles - ' + str(nb_lines) + ' 
lignes.</small><br>'
+  resume = unicode(resume, 'utf-8')
   back_link = 'Retour: [[' + cible_unicode + ']]\n' # Lien pour retourner à la 
leçon
-  txtin = head1 + head2 + back_link
+  txtin = head + source + resume + back_link
   if words_formated <> '':
     section1 = '== Mots ==\n<div style="-moz-column-count:2; 
column-count:2;">\n'
     txtin = txtin + section1 + words_formated
@@ -143,7 +144,23 @@
     txtin = txtin + section2 + locutions_formated
   if phrases_formated <> '':
     section3 = '== Phrases ==\n'
-    txtin = txtin + section3 + phrases_formated
-  txtin = txtin + unicode('[[Catégorie:Page auto]]', 'utf-8')
+    txtin = txtin + section3 + phrases_formated  
+  category = u'\n[[Catégorie:' + root_name_uni + u'/Vocabulaire/Index]] 
[[Catégorie:Page auto]]'  ### ATTENTION
+  # {{Autocat}} travail dans dpt/Index vocabulaire - gestion des categories 
super-complexe
+  # Phase 2: on s'en tient à catégoriser toutes les pages dans 
dpt/Vocabulaire/Index
+  # On conserve "Page auto" qui permet de retrouver toutes les fiches 
(catégorie:Fiches vocabulaire interwiki)
+  # Sans verifier ni créer les catégories - phase 3: utiliser back_link pour 
categoriser d'avantage
+  txtin = txtin + category
+  #unicode('[[Catégorie:Page auto]]', 'utf-8') + category
   #txtin = headDraft+suite+backLnk + section1 + words_formated + section2 + 
locutions_formated + section3 + phrases_formated
-  return txtin 
\ No newline at end of file
+  return txtin 
+
+#### ECRITURE INDEX GLOBAUX ###
+#def writedpt(datapack):
+  #[script_name, all_pages, nb_templates, nb_lines, cible_unicode, 
words_formated, locutions_formated, phrases_formated, root_name_uni, 
last_name_uni, class_doc] = dataPack
+  #now = datetime.date.today()   # PASSER AU FORMAT FRANÇAIS
+  #date = str(now)               # pour écrire la date 
+  #if words_formated <> '':
+    #head = u'{{Titre | Index global des mots du département ' + root_name_uni 
+'}}'
+    #txtin = head + words_formated + category
+  

-- 
To view, visit https://gerrit.wikimedia.org/r/276974
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9e89a8b8dcf6f12f960601f9ad587dda9b65e72e
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/vocabulary-index
Gerrit-Branch: master
Gerrit-Owner: Youni Verciti <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] Phase 2 category dpt/Vocabulaire/Index - change (labs...vocabulary-index)

Reply via email to