add sort option to get_tags, add tests for pagerized wiki

kmaehashi · kmaehashi · commit 2d4f3c405bf1 · 2022-04-23T18:54:18.000+09:00
diff --git a/atwiki/core.py b/atwiki/core.py
@@ -27,8 +27,8 @@ def __init__(self, uri, **kwargs):
     self._user_agent = kwargs.get('user_agent', 'Mozilla/5.0 (AtWikiPython)')
     self._sleep = kwargs.get('sleep', 10)
 
-  def get_list(self, tag=None):
-    index = 1
+  def get_list(self, tag=None, _start=1):
+    index = _start
     while True:
       count = 0
       if tag:
@@ -52,11 +52,11 @@ def get_list(self, tag=None):
       index += 1
       time.sleep(self._sleep)
 
-  def get_tags(self):
-    index = 1
+  def get_tags(self, sort='', _start=1):
+    index = _start
     while True:
       count = 0
-      soup = self._request(self._uri.tags('', index))
+      soup = self._request(self._uri.tags(sort, index))
       links = soup.find('div', attrs={'class': 'cmd_tag'}).findAll('a', attrs={'class': 'tag'})
       for link in links:
         tag_name = link.text
diff --git a/atwiki/test/test_core.py b/atwiki/test/test_core.py
@@ -2,6 +2,8 @@
 
 from __future__ import absolute_import, division, print_function, unicode_literals
 
+import math
+import re
 from unittest import TestCase
 
 from atwiki.core import AtWikiAPI
@@ -59,3 +61,51 @@ def test_search_or(self):
   def test_search_none(self):
     results = list(self._api.search('no_result_expected_for_this'))
     self.assertEqual(len(results), 0)
+
+
+class PagerizeTest(TestCase):
+    def setUp(self):
+        self._uri = AtWikiURI('https://w.atwiki.jp/hmiku')
+        self._api = AtWikiAPI(self._uri)
+
+    def test_get_list(self):
+        top_page = next(self._api.get_list())
+        assert top_page == {'id': 1, 'name': 'トップページ'}
+
+        soup = self._api._request(self._uri.list(sort='create', index=1))
+        text = soup.find('div', class_='pagelist').text
+        m = re.search(r'計 (\d+) ページ / 1 から 100 を表示', text)
+        assert m is not None
+        count = int(m.group(1))
+        assert 45000 < count < 90000
+        last_index = math.ceil(count / 100)
+
+        # Get list from the last page.
+        # N.B. The page counter is not updated immediately.
+        pages = list(self._api.get_list(_start=last_index))
+        expected = (count % 100)
+        assert (expected - 5) < len(pages) < (expected + 5)
+
+        top_page = next(self._api.get_list(_start=last_index + 1))
+        assert top_page == {'id': 1, 'name': 'トップページ'}
+
+    def test_get_list_tag(self):
+        soup = self._api._request(self._uri.tag('曲', index=1))
+        last_index = 1
+        for link in soup.find('div', class_='cmd_tag').find_all('a'):
+            if not link.attrs['href'].endswith('&p={}'.format(last_index + 1)):
+                break
+            last_index += 1
+        pages = list(self._api.get_list('曲', _start=last_index))
+        assert 1 <= len(pages) <= 50
+
+        pages = list(self._api.get_list('曲', _start=last_index + 1))
+        assert len(pages) == 0
+
+    def test_get_tags(self):
+        song = next(self._api.get_tags('num'))
+        assert song['name'] == '曲'
+        assert 35000 < song['weight'] < 70000
+
+        not_song = next(self._api.get_tags('num', _start=2))
+        assert not_song['name'] != '曲'