Searching for pages using the MediaWiki API returns at most 500 results (hi Patrick). To get a list of all pages in a larger wiki, we need to run repeated searches... Source: https://github.com/moy/Git-Mediawiki/issues/32 Author: anarcat https://github.com/anarcat diff --git a/contrib/mw-to-git/git-remote-mediawiki.perl b/contrib/mw-to-git/git-remote-mediawiki.perl index 8dd74a9..f2ce311 100755 --- a/contrib/mw-to-git/git-remote-mediawiki.perl +++ b/contrib/mw-to-git/git-remote-mediawiki.perl @@ -259,16 +259,29 @@ sub get_mw_tracked_categories { sub get_mw_all_pages { my $pages = shift; # No user-provided list, get the list of pages from the API. - my $mw_pages = $mediawiki->list({ + my $query = { action => 'query', list => 'allpages', aplimit => 'max' - }); - if (!defined($mw_pages)) { + }; + my $curpage; + my $oldpage = ''; + while (1) { + if (defined($curpage)) { + if ($oldpage eq $curpage) { + last; + } + $query->{apfrom} = $curpage; + $oldpage = $curpage; + } + my $mw_pages = $mediawiki->list($query); + if (!defined($mw_pages)) { fatal_mw_error("get the list of wiki pages"); - } - foreach my $page (@{$mw_pages}) { + } + foreach my $page (@{$mw_pages}) { $pages->{$page->{title}} = $page; + $curpage = $page->{title}; + } } return; }