Skip to content

Commit f9538d6

Browse files
authored
Merge pull request #14 from kmaehashi/fix-enumeration-apis
Fix page and tag enumeration APIs
2 parents 1959501 + 21ff6fb commit f9538d6

2 files changed

Lines changed: 17 additions & 28 deletions

File tree

atwiki/core.py

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,28 @@
2020
from .uri import AtWikiURI
2121

2222
class AtWikiAPI(object):
23-
_PAGER_PATTERN = re.compile(r'.+?(\d+).+?(\d+).+?(\d+).+?') # "計 110 ページ / 1 から 100 を表示"
23+
_TAG_WEIGHT_PATTERN = re.compile(r'\((\d+)\)$') # "タグ名(1)"
2424

2525
def __init__(self, uri, **kwargs):
2626
self._uri = uri
2727
self._user_agent = kwargs.get('user_agent', 'Mozilla/5.0 (AtWikiPython)')
2828
self._sleep = kwargs.get('sleep', 10)
2929

3030
def get_list(self, tag=None):
31-
index = 0
31+
index = 1
3232
while True:
3333
count = 0
34-
is_end = True
3534
if tag:
3635
soup = self._request(self._uri.tag(tag, index))
37-
links = soup.find('div', attrs={'class': 'cmd_tag'}).findAll('a', href=True)
38-
is_end = False
36+
links = soup.find('div', attrs={'class': 'cmd_tag'}).find('ul').select('a')
37+
pager = soup.find('div', attrs={'class': 'cmd_tag'}).select_one('a[href$="?&p={}"]'.format(index + 1))
3938
else:
4039
soup = self._request(self._uri.list('create', index))
4140
links = soup.find('table', attrs={'class': 'pagelist'}).findAll('a', href=True, title=True)
42-
pager = soup.find('div', attrs={'class': 'pagelist'}).findAll('p')[2].text
43-
m = self._PAGER_PATTERN.search(pager)
44-
if m:
45-
(total, cursor_begin, cursor_end) = (int(m.group(1)), int(m.group(2)), int(m.group(3)))
46-
is_end = (total == cursor_end)
47-
else:
48-
is_end = True
41+
pager = soup.find('ul', attrs={'class': 'atwiki_pagination'})
42+
if pager is not None:
43+
pager = pager.select_one('a[href$="&pp={}"]'.format(index + 1))
44+
is_end = (pager is None or len(links) == 0)
4945
for link in links:
5046
page_id = self._uri.get_page_id_from_uri(link.attrs['href'])
5147
page_name = link.text.strip()
@@ -57,32 +53,25 @@ def get_list(self, tag=None):
5753
time.sleep(self._sleep)
5854

5955
def get_tags(self):
60-
index = 0
56+
index = 1
6157
while True:
6258
count = 0
6359
soup = self._request(self._uri.tag('', index))
6460
links = soup.find('div', attrs={'class': 'cmd_tag'}).findAll('a', attrs={'class': 'tag'})
6561
for link in links:
6662
tag_name = link.text
6763
tag_weight = 0
68-
for clazz in link.attrs['class']:
69-
if clazz.startswith('weight'):
70-
tag_weight = int(clazz[6:])
71-
break
64+
m = self._TAG_WEIGHT_PATTERN.search(link.attrs['title'])
65+
if m:
66+
tag_weight = int(m.group(1))
7267
count += 1
7368
yield {'name': tag_name, 'weight': tag_weight}
7469
if count == 0: break
7570

76-
pagerArea = soup.find('div', attrs={'class': 'cmd_tag'}).find('div')
77-
if pagerArea is None:
78-
# Pager area will not be shown when tag list fits in one page.
79-
assert index == 0
71+
# Find "次の500件" link.
72+
pager = soup.find('div', attrs={'class': 'cmd_tag'}).select_one('a[href$="/tag/?p={}"]'.format(index + 1))
73+
if not pager:
8074
break
81-
pagers = pagerArea.findAll('a')
82-
if len(pagers) == 1:
83-
if pagers[0].attrs['href'].endswith('/?p={}'.format(index - 1)):
84-
# Valid pager found, and no more tags.
85-
break
8675
index += 1
8776
time.sleep(self._sleep)
8877

atwiki/test/test_core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def setUp(self):
1515

1616
def test_get_list(self):
1717
results = list(self._api.get_list())
18-
self.assertTrue(1 < len(results))
18+
self.assertEqual(len(results), 19)
1919

2020
def test_get_list_tag(self):
2121
results = list(self._api.get_list('tag01'))
@@ -27,7 +27,7 @@ def test_get_tags(self):
2727
results = list(self._api.get_tags())
2828
self.assertEqual(len(results), 1)
2929
self.assertEqual(results[0]['name'], 'tag01')
30-
self.assertEqual(results[0]['weight'], 3)
30+
self.assertEqual(results[0]['weight'], 1)
3131

3232
def test_get_source(self):
3333
self.assertEqual(self._api.get_source(14, 0),

0 commit comments

Comments
 (0)