2020from .uri import AtWikiURI
2121
2222class AtWikiAPI (object ):
23- _PAGER_PATTERN = re .compile (r'.+?(\d+).+?(\d+).+?(\d+).+?' ) # "計 110 ページ / 1 から 100 を表示"
2423 _TAG_WEIGHT_PATTERN = re .compile (r'\((\d+)\)$' ) # "タグ名(1)"
2524
2625 def __init__ (self , uri , ** kwargs ):
@@ -29,24 +28,20 @@ def __init__(self, uri, **kwargs):
2928 self ._sleep = kwargs .get ('sleep' , 10 )
3029
3130 def get_list (self , tag = None ):
32- index = 0
31+ index = 1
3332 while True :
3433 count = 0
35- is_end = True
3634 if tag :
3735 soup = self ._request (self ._uri .tag (tag , index ))
38- links = soup .find ('div' , attrs = {'class' : 'cmd_tag' }).findAll ( 'a' , href = True )
39- is_end = False
36+ links = soup .find ('div' , attrs = {'class' : 'cmd_tag' }).find ( 'ul' ). select ( 'a' )
37+ pager = soup . find ( 'div' , attrs = { 'class' : 'cmd_tag' }). select_one ( 'a[href$="?&p={}"]' . format ( index + 1 ))
4038 else :
4139 soup = self ._request (self ._uri .list ('create' , index ))
4240 links = soup .find ('table' , attrs = {'class' : 'pagelist' }).findAll ('a' , href = True , title = True )
43- pager = soup .find ('div' , attrs = {'class' : 'pagelist' }).findAll ('p' )[2 ].text
44- m = self ._PAGER_PATTERN .search (pager )
45- if m :
46- (total , cursor_begin , cursor_end ) = (int (m .group (1 )), int (m .group (2 )), int (m .group (3 )))
47- is_end = (total == cursor_end )
48- else :
49- is_end = True
41+ pager = soup .find ('ul' , attrs = {'class' : 'atwiki_pagination' })
42+ if pager is not None :
43+ pager = pager .select_one ('a[href$="&pp={}"]' .format (index + 1 ))
44+ is_end = (pager is None or len (links ) == 0 )
5045 for link in links :
5146 page_id = self ._uri .get_page_id_from_uri (link .attrs ['href' ])
5247 page_name = link .text .strip ()
0 commit comments