Home

Tags

Разметка вики синтаксиса на регексах

2010-01-28 wiki regexp python

Начальная версия pymark:

# coding: utf-8

import re

r_codebox = '<div class="codebox"><pre><code>%s</code></pre></div>'
r_header = '<h3>%s</h3>'
r_url = '<a href="%s">%s</a>'
r_urle = '<a target=_blank href="%s">%s</a>'
r_file = '<a href="/uploads/%s">%s</a>'
r_image = '<img src="/uploads/%s" />'

def mark1(nText):
    ret = nText

    # safe
    ret = ret.replace('~~0~~','~~0~~0~~0~~')

    # blocks
    blocks = ['~~0~~','&amp;','&lt;','&gt;','\n<br />&nbsp;&#8226;&nbsp;','\n<br />&nbsp;&nbsp;&#9643;&nbsp;','<br />\n']
    def toBlocks(s):
        blocks.append(s)
        return len(blocks)-1

    # html blocks ~~7~~
    def rid(m,reptext='%s',protect=0):
        if len(m.groups(0)) > 1:
            text = reptext % m.groups(0)
            id = toBlocks(text)
        else:
            text = m.groups(0)[0]
            if protect:
                text = m.groups(0)[0]
                text = text.replace('&','&amp;')
                text = text.replace('<','&lt;')
                text = text.replace('>','&gt;')
                if protect>1: text = text.replace('\n','<br />\n')
            id = toBlocks(reptext % text)
        return '~~0~~%d~~0~~' % id
    ret = re.compile(r'\[\{(.+?)\}\]', re.DOTALL).sub(rid, ret)

    # code block [ [ ] ]
    ret = re.compile(r'\[\[(.+?)\]\]', re.DOTALL).sub(lambda x:rid(x,r_codebox,1), ret)

    # attachments
    ret = re.compile(r'\[file:(\d+)\s(.+?)\]', re.DOTALL).sub(lambda x:rid(x,r_file), ret)
    ret = re.compile(r'\[image:(\d+)\]', re.DOTALL).sub(lambda x:rid(x,r_image), ret)

    # url
    ret = re.compile(r'\[(https?://[\w\d/:\.\-]+?)\s(.+?)\]', re.DOTALL).sub(lambda x:rid(x,r_urle), ret)
    ret = re.compile(r'\[([\w\d/:\.\-]+?)\s(.+?)\]', re.DOTALL).sub(lambda x:rid(x,r_url), ret)

    # !header
    ret = re.compile(r'^!(.+?)$', re.DOTALL | re.MULTILINE).sub(lambda x:rid(x,r_header,2), ret)

    # **bold**
    ret = re.compile(r'\*\*(.+?)\*\*', re.DOTALL).sub(lambda x:rid(x,'<b>%s</b>',2), ret)

    # **italic**
    ret = re.compile(r'//(.+?)//', re.DOTALL).sub(lambda x:rid(x,'<i>%s</i>',2), ret)

    # --strike--
    ret = re.compile(r'--(.+?)--', re.DOTALL).sub(lambda x:rid(x,'<s>%s</s>',2), ret)

    # __underline__
    ret = re.compile(r'__(.+?)__', re.DOTALL).sub(lambda x:rid(x,'<u>%s</u>',2), ret)

    # ##monoshir##
    ret = re.compile(r'##(.+?)##', re.DOTALL).sub(lambda x:rid(x,'<tt>%s</tt>',2), ret)

    # ++small++
    ret = re.compile(r'\+\+(.+?)\+\+', re.DOTALL).sub(lambda x:rid(x,'<small>%s</small>',2), ret)

    # safe < > &
    ret = ret.replace('&','~~0~~1~~0~~')
    ret = ret.replace('<','~~0~~2~~0~~')
    ret = ret.replace('>','~~0~~3~~0~~')

    # *
    ret = ret.replace('\n * ','~~0~~4~~0~~')
    ret = ret.replace('\n  * ','~~0~~5~~0~~')

    ret = ret.replace('\n','~~0~~6~~0~~')

    # insert blocks
    def rid2(m):
        id = int(m.groups(0)[0])
        return blocks[id]
    ret = re.sub(r'~~0~~(\d+)~~0~~', rid2, ret)

    #print blocks
    return ret


исходник: http://bitbucket.org/lega911/pymark
пример использования