From: Kevin <[email protected]>

this introduces a new remote.origin.namespaces argument that is a
space-separated list of namespaces. the list of pages extract is then
taken from all the specified namespaces.

Reviewed-by: Antoine Beaupré <[email protected]>
Signed-off-by: Antoine Beaupré <[email protected]>
---
 contrib/mw-to-git/git-remote-mediawiki.perl | 34 +++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/contrib/mw-to-git/git-remote-mediawiki.perl 
b/contrib/mw-to-git/git-remote-mediawiki.perl
index e7f857c1a..1c5e39831 100755
--- a/contrib/mw-to-git/git-remote-mediawiki.perl
+++ b/contrib/mw-to-git/git-remote-mediawiki.perl
@@ -17,6 +17,7 @@ use Git;
 use Git::Mediawiki qw(clean_filename smudge_filename connect_maybe
                                        EMPTY HTTP_CODE_OK);
 use DateTime::Format::ISO8601;
+use Scalar::Util;
 use warnings;
 
 # By default, use UTF-8 to communicate with Git and the user
@@ -63,6 +64,10 @@ chomp(@tracked_pages);
 my @tracked_categories = split(/[ \n]/, run_git("config --get-all 
remote.${remotename}.categories"));
 chomp(@tracked_categories);
 
+# Just like @tracked_categories, but for MediaWiki namespaces.
+my @tracked_namespaces = split(/[ \n]/, run_git("config --get-all 
remote.${remotename}.namespaces"));
+chomp(@tracked_namespaces);
+
 # Import media files on pull
 my $import_media = run_git("config --get --bool 
remote.${remotename}.mediaimport");
 chomp($import_media);
@@ -256,6 +261,23 @@ sub get_mw_tracked_categories {
        return;
 }
 
+sub get_mw_tracked_namespaces {
+    my $pages = shift;
+    foreach my $local_namespace (@tracked_namespaces) {
+        my $mw_pages = $mediawiki->list( {
+            action => 'query',
+            list => 'allpages',
+            apnamespace => get_mw_namespace_id($local_namespace),
+            aplimit => 'max' } )
+            || die $mediawiki->{error}->{code} . ': '
+                . $mediawiki->{error}->{details} . "\n";
+        foreach my $page (@{$mw_pages}) {
+            $pages->{$page->{title}} = $page;
+        }
+    }
+    return;
+}
+
 sub get_mw_all_pages {
        my $pages = shift;
        # No user-provided list, get the list of pages from the API.
@@ -319,6 +341,10 @@ sub get_mw_pages {
                $user_defined = 1;
                get_mw_tracked_categories(\%pages);
        }
+    if (@tracked_namespaces) {
+        $user_defined = 1;
+        get_mw_tracked_namespaces(\%pages);
+    }
        if (!$user_defined) {
                get_mw_all_pages(\%pages);
        }
@@ -1263,7 +1289,6 @@ my %cached_mw_namespace_id;
 sub get_mw_namespace_id {
        $mediawiki = connect_maybe($mediawiki, $remotename, $url);
        my $name = shift;
-
        if (!exists $namespace_id{$name}) {
                # Look at configuration file, if the record for that namespace 
is
                # already cached. Namespaces are stored in form:
@@ -1331,7 +1356,12 @@ sub get_mw_namespace_id {
 sub get_mw_namespace_id_for_page {
        my $namespace = shift;
        if ($namespace =~ /^([^:]*):/) {
-               return get_mw_namespace_id($namespace);
+               my ($ns, $id) = split(/:/, $namespace);
+               if (Scalar::Util::looks_like_number($id)) {
+                       return get_mw_namespace_id($ns);
+               } else{
+                       return
+               }
        } else {
                return;
        }
-- 
2.11.0

Reply via email to