Skip to content

Commit 0d77d91

Browse files
committed
add wiki parser
1 parent 0cbc81d commit 0d77d91

2 files changed

Lines changed: 159 additions & 0 deletions

File tree

atwiki/parser.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from __future__ import absolute_import, division, print_function, unicode_literals
4+
5+
import re
6+
7+
class AtWikiStripper(object):
8+
# Comment: `// comment`
9+
COMMENT = re.compile(r'^//')
10+
11+
# Inline annotation: `&color(#999999){text}`, `&nicovideo(url)`
12+
INLINE_ANN = re.compile(r'&[a-z_]+\(([^()]*?)\)({([^{}]+?)})?'), r'\3'
13+
14+
# Inline links: `[[page]]`, `[[alias>URL]]`
15+
INLINE_LINK = re.compile(r'\[\[(.+?)((>|>>)(.+?))?\]\]'), r'\1'
16+
17+
# Inline italic: `'''text'''`
18+
INLINE_ITALIC = re.compile(r'\'\'\'(.+?)\'\'\''), r'\1'
19+
20+
# Inline bold: `''text''`
21+
INLINE_BOLD = re.compile(r'\'\'(.+?)\'\''), r'\1'
22+
23+
# Inline del: `%%text%%`
24+
INLINE_DEL = re.compile(r'%%(.+?)%%'), r'\1'
25+
26+
# Line annotation: `#right(){text}`, `#comment()`, `#region`
27+
LINE_ANN = re.compile(r'^#[a-z_]+(\(([^()]*?)\)({([^{}]+?)})?)?\s*$'), r'\4'
28+
29+
# Line horizontal line: `----`
30+
LINE_HR = re.compile(r'^----\s*()$'), r'\1'
31+
32+
# Line item list and heading: `+foo`, `-foo`, `*foo`
33+
LINE_ITEMLIST = re.compile(r'^(\*+|\++|-+)(.+)$'), r'\2'
34+
35+
# Line quote: `>text`
36+
LINE_QUOTE = re.compile(r'^>+(.+)$'), r'\1'
37+
38+
# Line formatted: ` text`
39+
LINE_PRE = re.compile(r'^ (.+)$'), r'\1'
40+
41+
# Block annotation: `#exk(){{{` ... `}}}`
42+
BLOCK_BEGIN_ANN = re.compile(r'^#[a-z_]+\(([^{}()]*?)\)({+)\s*$')
43+
BLOCK_END_ANN = re.compile(r'^(}+)\s*$')
44+
45+
def __init__(self, source):
46+
self._source = source
47+
48+
def _inline_strip(self, line, pattern, repl):
49+
while True:
50+
prev = line
51+
line = pattern.sub(repl, line)
52+
if prev == line: return line
53+
54+
def _line_process(self, buf, line, pattern, repl):
55+
prev = line
56+
line = pattern.sub(repl, line)
57+
if prev == line: return False
58+
buf.append(line)
59+
return True
60+
61+
def text(self):
62+
ret = []
63+
lines = self._source.splitlines()
64+
block_level = 0
65+
for line in lines:
66+
if self.COMMENT.match(line): continue
67+
line = self._inline_strip(line, *self.INLINE_ANN)
68+
line = self._inline_strip(line, *self.INLINE_LINK)
69+
line = self._inline_strip(line, *self.INLINE_ITALIC)
70+
line = self._inline_strip(line, *self.INLINE_BOLD)
71+
line = self._inline_strip(line, *self.INLINE_DEL)
72+
if self._line_process(ret, line, *self.LINE_ANN): continue
73+
if self._line_process(ret, line, *self.LINE_HR): continue
74+
if self._line_process(ret, line, *self.LINE_ITEMLIST): continue
75+
if self._line_process(ret, line, *self.LINE_QUOTE): continue
76+
if self._line_process(ret, line, *self.LINE_PRE): continue
77+
if block_level == 0:
78+
m = self.BLOCK_BEGIN_ANN.match(line)
79+
if m:
80+
block_level = len(m.group(2))
81+
continue
82+
else:
83+
m = self.BLOCK_END_ANN.match(line)
84+
if m and len(m.group(1)) == block_level:
85+
block_level = 0
86+
continue
87+
ret.append(line)
88+
return '\n'.join(ret)

atwiki/test/test_parser.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from __future__ import absolute_import, division, print_function, unicode_literals
4+
5+
from unittest import TestCase
6+
7+
from atwiki.parser import AtWikiStripper
8+
9+
INPUT = '''
10+
// This is a comment.
11+
12+
[[Link1]] / [[Link2>URL]]
13+
Styles: ''bold'' \'\'\'italic\'\'\' %%del%% ''bold''
14+
Special: '' A ✔︎ ( ) { } \ / ! ''
15+
16+
17+
#right(aaa){inline}
18+
#comment()
19+
#region
20+
+item 1
21+
++item 2
22+
-item 1
23+
--item 2
24+
-+item 3
25+
*head 1
26+
**head 2
27+
>quote 1
28+
>>quote 2
29+
pre
30+
----
31+
#exk(xxx){{{
32+
block 1
33+
}}}
34+
#exk(){{{{{
35+
block 2
36+
}}}}}
37+
'''
38+
39+
OUTPUT = '''
40+
41+
Link1 / Link2
42+
Styles: bold italic del bold
43+
Special: A ✔︎ ( ) { } \ / !
44+
45+
46+
inline
47+
48+
49+
item 1
50+
item 2
51+
item 1
52+
item 2
53+
+item 3
54+
head 1
55+
head 2
56+
quote 1
57+
quote 2
58+
pre
59+
60+
block 1
61+
block 2\
62+
'''
63+
64+
class AtWikiStripperTest(TestCase):
65+
def test_doc(self):
66+
stripper = AtWikiStripper(INPUT)
67+
self.assertEqual(OUTPUT, stripper.text())
68+
69+
def test_single(self):
70+
stripper = AtWikiStripper('[[Test]]')
71+
self.assertEqual('Test', stripper.text())

0 commit comments

Comments
 (0)