BB-Code-Parser

Test-Cases

ConvertsSimpleTokens

[b]fett[/b]
<Strong>fett</Strong>
<Strong>fett</Strong>
[i]kursiv[/i]
<Em>kursiv</Em>
<Em>kursiv</Em>
[s]durchgestrichen[/s]
<Strike>durchgestrichen</Strike>
<Strike>durchgestrichen</Strike>
[m]inline code[/m]
<InlineCode>inline code</InlineCode>
<InlineCode>inline code</InlineCode>
[code]code block[/code]
<Code>code block</Code>
<Code>code block</Code>
[php]php code block[/php]
<Code Language="PHP">php code block</Code>
<Code Language="PHP">php code block</Code>
[spoiler]spoiler[/spoiler]
<Spoiler>spoiler</Spoiler>
<Spoiler>spoiler</Spoiler>
[mod]moderator text[/mod]
<Highlight>moderator text</Highlight>
<Highlight>moderator text</Highlight>
[url]http://tempuri.org[/url]
<Link Target="http://tempuri.org" />
<Link Target="http://tempuri.org" />
[url=http://tempuri.org]Link name[/url]
<Link Target="http://tempuri.org">Link name</Link>
<Link Target="http://tempuri.org">Link name</Link>
[img]http://tempuri.org/Image.png[/img]
<Image Source="http://tempuri.org/Image.png" />
<Image Source="http://tempuri.org/Image.png" />
[list][*]erstens[*]zweitens[*]drittens[/list]
<List><Item>erstens</Item><Item>zweitens</Item><Item>drittens</Item></List>
<List><Item>erstens</Item><Item>zweitens</Item><Item>drittens</Item></List>
[quote]zitat ohne autor[/quote]
<Quote>zitat ohne autor</Quote>
<Quote>zitat ohne autor</Quote>
[quote=123,456,"[DK]Peacemaker"]zitat von autor mit sonderzeichen[/quote]
<Quote ThreadId="123" PostId="456" Author="[DK]Peacemaker">zitat von autor mit sonderzeichen</Quote>
<Quote ThreadId="123" PostId="456" Author="[DK]Peacemaker">zitat von autor mit sonderzeichen</Quote>
[table]1[||]2[--]3[||]4[/table]
<Table><Row><Cell>1</Cell><Cell>2</Cell></Row><Row><Cell>3</Cell><Cell>4</Cell></Row></Table>
<Table><Row><Cell>1</Cell><Cell>2</Cell></Row><Row><Cell>3</Cell><Cell>4</Cell></Row></Table>

AllowsNestedTokens

[b][i]fett und kursiv[/i][/b]
<Strong><Em>fett und kursiv</Em></Strong>
<Strong><Em>fett und kursiv</Em></Strong>
[b][i][s][u][m]verschachtelt[/m][/u][/s][/i][/b]
<Strong><Em><Strike><Underline><InlineCode>verschachtelt</InlineCode></Underline></Strike></Em></Strong>
<Strong><Em><Strike><Underline><InlineCode>verschachtelt</InlineCode></Underline></Strike></Em></Strong>
[quote]kein autor[quote=123,456,"Author"]mit autor[/quote]ok?[/quote]
<Quote>kein autor<Quote ThreadId="123" PostId="456" Author="Author">mit autor</Quote>ok?</Quote>
<Quote>kein autor<Quote ThreadId="123" PostId="456" Author="Author">mit autor</Quote>ok?</Quote>
[list][*][b]fett[/b][*][i]kursiv[/i][/list]
<List><Item><Strong>fett</Strong></Item><Item><Em>kursiv</Em></Item></List>
<List><Item><Strong>fett</Strong></Item><Item><Em>kursiv</Em></Item></List>

CorrectsMissingEndTokens

[b][i]fett und kursiv[/b]
<Strong><Em>fett und kursiv</Em></Strong>
<Strong><Em>fett und kursiv</Em></Strong>
[b]fett
<Strong>fett</Strong>
<Strong>fett</Strong>
[quote]zitat von [b]autor[quote]weiteres zitat[/quote]
<Quote>zitat von <Strong>autor<Quote>weiteres zitat</Quote></Strong></Quote>
<Quote>zitat von <Strong>autor<Quote>weiteres zitat</Quote></Strong></Quote>
[b][i]fett und kursiv, aber vertauscht[/b][/i]
<Strong><Em>fett und kursiv, aber vertauscht</Em></Strong>
<Strong><Em>fett und kursiv, aber vertauscht</Em></Strong>
[b]Hallo[i]Welt[/b]![/i]
<Strong>Hallo<Em>Welt</Em></Strong><Em>!</Em>
<Strong>Hallo<Em>Welt</Em></Strong><Em>!</Em>
[u][b]Hallo[i][s]Welt[/b][/u]![/s][/i]
<Underline><Strong>Hallo<Em><Strike>Welt</Strike></Em></Strong></Underline><Em><Strike>!</Strike></Em>
<Underline><Strong>Hallo<Em><Strike>Welt</Strike></Em></Strong></Underline><Em><Strike>!</Strike></Em>

AllowsParameterLiterals

[quote="123", "456", "Author"]Zitat[/quote]
<Quote ThreadId="123" PostId="456" Author="Author">Zitat</Quote>
<Quote ThreadId="123" PostId="456" Author="Author">Zitat</Quote>
[url="http://tempuri.org"]Name[/url]
<Link Target="http://tempuri.org">Name</Link>
<Link Target="http://tempuri.org">Name</Link>

IgnoresCase

[B]fett[/b]
<Strong>fett</Strong>
<Strong>fett</Strong>
[I]kursiv[/i]
<Em>kursiv</Em>
<Em>kursiv</Em>
[S]durchgestrichen[/s]
<Strike>durchgestrichen</Strike>
<Strike>durchgestrichen</Strike>
[M]inline code[/m]
<InlineCode>inline code</InlineCode>
<InlineCode>inline code</InlineCode>
[CODE]code block[/code]
<Code>code block</Code>
<Code>code block</Code>
[PHP]php code block[/php]
<Code Language="PHP">php code block</Code>
<Code Language="PHP">php code block</Code>
[SPOILER]spoiler[/spoiler]
<Spoiler>spoiler</Spoiler>
<Spoiler>spoiler</Spoiler>
[MOD]moderator text[/mod]
<Highlight>moderator text</Highlight>
<Highlight>moderator text</Highlight>
[URL]http://tempuri.org[/url]
<Link Target="http://tempuri.org" />
<Link Target="http://tempuri.org" />
[URL=http://tempuri.org]Link name[/url]
<Link Target="http://tempuri.org">Link name</Link>
<Link Target="http://tempuri.org">Link name</Link>
[IMG]http://tempuri.org/Image.png[/img]
<Image Source="http://tempuri.org/Image.png" />
<Image Source="http://tempuri.org/Image.png" />
[LIST][*]erstens[*]zweitens[*]drittens[/list]
<List><Item>erstens</Item><Item>zweitens</Item><Item>drittens</Item></List>
<List><Item>erstens</Item><Item>zweitens</Item><Item>drittens</Item></List>
[QUOTE]zitat ohne autor[/quote]
<Quote>zitat ohne autor</Quote>
<Quote>zitat ohne autor</Quote>
[QUOTE=123,456,"[DK]Peacemaker"]zitat von autor mit sonderzeichen[/quote]
<Quote ThreadId="123" PostId="456" Author="[DK]Peacemaker">zitat von autor mit sonderzeichen</Quote>
<Quote ThreadId="123" PostId="456" Author="[DK]Peacemaker">zitat von autor mit sonderzeichen</Quote>

Klappfallscheibe

[b] Fett </Strong> [/b]
<Strong> Fett &lt;/Strong&gt; </Strong>
<Strong> Fett &lt;/Strong&gt; </Strong>
[url="123]url[/url]
[url="123]url[/url]
[url="123]url[/url]
[url="123]"]url[/url]
<Link Target="123]">url</Link>
<Link Target="123]">url</Link>
[quote=123,456,"ABC
[quote=123,456,"ABC
[quote=123,456,"ABC
[quote=123,456,"ABC" ]Test[/quote]
<Quote ThreadId="123" PostId="456" Author="ABC">Test</Quote>
<Quote ThreadId="123" PostId="456" Author="ABC">Test</Quote>
[quote="Welt"][quote=123,456,"Ernie"]Hallo Welt![/quote]Hallo Ernie![/quote]
[quote="Welt"]<Quote ThreadId="123" PostId="456" Author="Ernie">Hallo Welt!</Quote>Hallo Ernie![/quote]
[quote="Welt"]<Quote ThreadId="123" PostId="456" Author="Ernie">Hallo Welt!</Quote>Hallo Ernie![/quote]
[list]Hallo [*]Welt [*]! [/list]
<List><Item>Hallo </Item><Item>Welt </Item><Item>! </Item></List>
<List><Item>Hallo </Item><Item>Welt </Item><Item>! </Item></List>
[list][*]Dies[*]ist[list][*]eine[*]verschachtelte[/list][*]Liste![/list]
<List><Item>Dies</Item><Item>ist<List><Item>eine</Item><Item>verschachtelte</Item></List></Item><Item>Liste!</Item></List>
<List><Item>Dies</Item><Item>ist<List><Item>eine</Item><Item>verschachtelte</Item></List></Item><Item>Liste!</Item></List>
Hallo Welt! [quote = 123 , 456 , "Ernie" ]Das ist doch nicht [b]dein[/b] Ernst?![/quote]
Hallo Welt! <Quote ThreadId="123" PostId="456" Author="Ernie">Das ist doch nicht <Strong>dein</Strong> Ernst?!</Quote>
Hallo Welt! <Quote ThreadId="123" PostId="456" Author="Ernie">Das ist doch nicht <Strong>dein</Strong> Ernst?!</Quote>
[code]Other [b]Tags[/b] inside Code-Tags[/code]
<Code>Other [b]Tags[/b] inside Code-Tags</Code>
<Code>Other [b]Tags[/b] inside Code-Tags</Code>

Code

bbcode.py

#!/bin/env python
# -*- coding: utf-8 -*-

import re

class InvalidTokenError(Exception):
    pass

class WrongParameterCountError(Exception):
    pass

class Token(object):
    def __init__(self, type, name='', parameters=None, string=''):
        self.type = type
        self.name = name
        self.parameters = parameters
        self.string = string

class Node(object):
    allowed_nodes = ['string', 'b', 'u', 's', 'i', 'mod', 'spoiler', 'm', 
                     'code', 'php', 'img', 'quote', 'url', 'list', 'table']
    invalid_string_recovery = 'none'
    invalid_start_recovery = 'string'
    invalid_end_recovery = 'string'

    bbname = ''

    def __init__(self, parameters=None):
        self.children = []
        self.parent = None

    def __str__(self):
        string = ''
        for child in self.children:
            string += str(child)
        return string

    def append(self, child):
        self.children.append(child)
        child.parent = self

    def get_invalid_start_recovery(self, name):
        return self.invalid_start_recovery

class String(Node):
    def __init__(self, string):
        self.string = string
    
    def __str__(self):
        return self.string
    
    def append(self, string):
        self.string += string

class Tag(Node):
    bbname = ''
    parameter_count = [0]

class SimpleTag(Tag):
    invalid_start_recovery = 'close'
    invalid_end_recovery = 'reopen'

    tagname = ''
    pre_format = '<%s>'
    post_format = '</%s>'

    def __str__(self):
        cstring = ''
        for child in self.children:
            cstring += str(child)
        if cstring == '':
            return ''
        else:
            format = "%s%%s%s" % (self.pre_format, self.post_format)
            return format % (self.tagname, cstring, self.tagname)

class StrongTag(SimpleTag):
    bbname = 'b'
    tagname = 'Strong'

class EmTag(SimpleTag):
    bbname = 'i'
    tagname = 'Em'

class StrikeTag(SimpleTag):
    bbname = 's'
    tagname = 'Strike'

class UnderlineTag(SimpleTag):
    bbname = 'u'
    tagname = 'Underline'

class InlineCodeTag(SimpleTag):
    bbname = 'm'
    tagname = 'InlineCode'
    allowed_nodes = ['string']

class CodeTag(SimpleTag):
    bbname = 'code'
    tagname = 'Code'
    allowed_nodes = ['string']

class PhpCodeTag(SimpleTag):
    bbname = 'php'
    tagname = 'Code'
    pre_format = '<%s Language="PHP">'
    allowed_nodes = ['string']

class SpoilerTag(SimpleTag):
    bbname = 'spoiler'
    tagname = 'Spoiler'
    invalid_end_recovery = 'close'

class HighlightTag(SimpleTag):
    bbname = 'mod'
    tagname = 'Highlight'

class ParameterTag(Tag):
    tagname = ''
    pre_format = {0: '<%s>'}
    post_format = '</%s>'

    def __init__(self, parameters):
        Tag.__init__(self)
        self.parameters = parameters

    def __str__(self):
        if len(self.parameters) == 0:
            string = self.pre_format[0] % self.tagname
        else:
            string = self.pre_format[len(self.parameters)] % ((self.tagname,) + 
                                                         tuple(self.parameters))
        for child in self.children:
            string += str(child)
        string += self.post_format % self.tagname
        return string

class QuoteTag(ParameterTag):
    bbname = 'quote'
    tagname = 'Quote'
    parameter_count = [0, 3]
    pre_format = {0: '<%s>',  3:'<%s ThreadId="%s" PostId="%s" Author="%s">'}
    invalid_end_recovery = 'close'

class ListTag(SimpleTag):
    bbname = 'list'
    tagname = 'List'
    allowed_nodes = ['*']
    invalid_string_recovery = 'add *'
    invalid_start_recovery = 'add *'
    invalid_end_recovery = 'close'

class ItemTag(SimpleTag):
    bbname = '*'
    tagname = 'Item'
    invalid_end_recovery = 'close'

class TableTag(SimpleTag):
    bbname = 'table'
    tagname = 'Table'
    allowed_nodes = ['--']
    invalid_string_recovery = 'add --'
    invalid_start_recovery = 'add --'
    invalid_end_recovery = 'close'

class RowTag(SimpleTag):
    bbname = '--'
    tagname = 'Row'
    allowed_nodes = ['||']
    invalid_string_recovery = 'add ||'
    invalid_start_recovery = 'add ||'
    invalid_end_recovery = 'close'

    def get_invalid_start_recovery(self, name):
        if name == '--':
            return 'close'
        else:
            return self.invalid_start_recovery

class CellTag(SimpleTag):
    bbname = '||'
    tagname = 'Cell'
    invalid_end_recovery = 'close'

class ShortTag(ParameterTag):
    def __init__(self, parameters):
        if not parameters:
            self.need_parameter = True
            ParameterTag.__init__(self, [''])
        else:
            self.need_parameter = False
            ParameterTag.__init__(self, parameters)

    def append(self, child):
        if isinstance(child, String) and self.need_parameter:
            self.parameters[0] += child.string
        else:
            ParameterTag.append(self, child)

class ImageTag(ShortTag):
    bbname = 'img'
    allowed_nodes = ['string']
    invalid_start_recovery = 'string'
    invalid_end_recovery = 'string'
    
    def __str__(self):
        return '<Image Source="%s" />' % self.parameters[0]

class LinkTag(ShortTag):
    bbname = 'url'
    allowed_nodes = ['string']
    invalid_start_recovery = 'string'
    invalid_end_recovery = 'string'
    parameter_count = [0, 1]

    def __init__(self, parameters):
        ShortTag.__init__(self, parameters)
        self.need_parameter = True
        if parameters:
            self.parameters = [''] + self.parameters

    def __str__(self):
        if len(self.parameters) == 1:
            string = '<Link Target="%s" />' % self.parameters[0]
        else:
            string = '<Link Target="%s">%s</Link>' % (self.parameters[1],
                                                      self.parameters[0])
        return string

class BbCodeParser(object):

    _REGEX_STR = r'(?P<string>.*?)((?P<starttoken>\[\s*(?P<sname>%s)\s*(=(?P<parameters>(\s*(("[^"]+")|([^,"]+?))\s*,)*(\s*(("[^"]+")|([^,"]+?))\s*)))?\])|(?P<endtoken>\[/(?P<ename>%s)\]))'
    _SPLIT_RGX = re.compile(r'\s*(("(?P<quoted>[^"]+)")|(?P<normal>[^,"]*[^,\s"]))\s*')

    _TAGS = {'b': StrongTag,
             'i': EmTag,
             's': StrikeTag,
             'u': UnderlineTag,
             'm': InlineCodeTag,
             'code': CodeTag,
             'php': PhpCodeTag,
             'spoiler': SpoilerTag,
             'mod': HighlightTag,
             'quote': QuoteTag,
             'list': ListTag,
             '*': ItemTag,
             'img': ImageTag,
             'url': LinkTag,
             'table': TableTag,
             '--': RowTag,
             '||': CellTag}

    @classmethod
    def _split_parameters(cls, string):
        if not string:
            return []

        parameters = []
        for match in cls._SPLIT_RGX.finditer(string):
            if match.group('quoted'):
                parameters.append(match.group('quoted'))
            elif match.group('normal'):
                parameters.append(match.group('normal'))
            else:
                raise Exception('split_parameters failed')
        return parameters

    @classmethod
    def _escape_regex(cls, string):
        return string.replace('*', r'\*').replace('|', r'\|')

    def __init__(self):
        self.tags = {}

    def _lexical_analysis(self):
        self.tokens = []
        
        bbtags = [self._escape_regex(tag) for tag in self._TAGS.keys()]
        bbtags = '(' + '|'.join(bbtags) + ')'

        regex = re.compile(self._REGEX_STR % (bbtags, bbtags), re.I | re.S)

        pos = 0

        for match in regex.finditer(self.string):
            if match.group('string'):
                self.tokens.append(Token('string', 
                                         string=match.group('string')))
                pos += len(match.group('string'))
            if match.group('starttoken'):
                parameters = self._split_parameters(match.group('parameters'))
                self.tokens.append(Token('start', 
                                         match.group('sname').lower(),
                                         parameters,
                                         match.group('starttoken')))
                pos += len(match.group('starttoken'))
            elif match.group('endtoken'):
                self.tokens.append(Token('end',
                                         match.group('ename').lower(),
                                         [],
                                         match.group('endtoken')))
                pos += len(match.group('endtoken'))

        if pos < len(self.string):
            self.tokens.append(Token('string', string=self.string[pos:]))

    def parse(self, string):
        self.string = string.replace('<', '&lt;').replace('>', '&gt;')

        self.root = Node()
        self.idx = self.root

        self._lexical_analysis()

        while len(self.tokens) > 0:
            token = self.tokens[0]

            if token.type == 'string':
                try:
                    self._add_string(token.string)
                except InvalidTokenError:
                    policy = self.idx.invalid_string_recovery
                    if policy[0:3] == 'add':
                        self.tokens.insert(0, Token('start', policy[4:]))
                    else:
                        raise Exception('unknown invalid_string_recovery')
                    continue
            elif token.type == 'start':
                try:
                    self._add_start(token.name, token.parameters)
                except InvalidTokenError:
                    policy = self.idx.get_invalid_start_recovery(token.name)

                    if not self._is_tag_open2(token.name):
                        policy = 'string'

                    if policy == 'close':
                        self.tokens.insert(0, Token('end', self.idx.bbname))
                    elif policy == 'string':
                        token.type = 'string'
                    elif policy[0:3] == 'add':
                        self.tokens.insert(0, Token('start', policy[4:]))
                    else:
                        raise Exception('unknown invalid_start_recovery')
                    continue
                except WrongParameterCountError:
                    token.type = 'string'
                    continue
            elif token.type == 'end':
                try:
                    self._add_end(token.name)
                except InvalidTokenError:
                    policy = self.idx.invalid_end_recovery

                    if not self._is_tag_open(token.name):
                        policy = 'string'

                    if policy == 'reopen':
                        self.tokens.insert(0, Token('end', self.idx.bbname))
                        self.tokens.insert(2, Token('start', self.idx.bbname))
                    elif policy == 'close':
                        self.tokens.insert(0, Token('end', self.idx.bbname))
                    elif policy == 'string':
                        token.type = 'string'
                    else:
                        raise Exception('unknown invalid_end_recovery')
                    continue
            else:
                raise Exception('Unknown Token-Type: %s' % token.type)
            del self.tokens[0]
        
        return str(self.root)

    def _add_string(self, string):
        if 'string' not in self.idx.allowed_nodes:
            raise InvalidTokenError()
        self.idx.append(String(string))

    def _add_start(self, name, parameters):
        if name not in self.idx.allowed_nodes:
            raise InvalidTokenError()

        tag_class = self._TAGS[name]

        if parameters == None:
            parameters = []
        if len(parameters) not in tag_class.parameter_count:
            raise WrongParameterCountError()

        tag = tag_class(parameters)
        self.idx.append(tag)
        self.idx = tag
    
    def _add_end(self, name):
        if self.idx.bbname != name:
            raise InvalidTokenError()
        
        self.idx = self.idx.parent

    def _is_tag_open(self, name):
        node = self.idx

        while node != self.root:
            if node.bbname == name:
                break
            node = node.parent
        else:
            return False

        return True

    def _is_tag_open2(self, name):
        node = self.idx

        while node != self.root:
            if name in node.allowed_nodes:
                break
            node = node.parent
        else:
            return False

        return True

test.py

#!/bin/env python
# -*- coding: utf-8 -*-

import bbcode
import cgi
import time

from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import HtmlFormatter

print """<html>
<head>
    <title>Simple bb to xml test</title>
    <style type="text/css">
        .box {
            float: left;
            width: 30%;
            border: 1px solid;
            margin-bottom: 10px;
            margin-right: 1%;
        }
        .success {
            border-color: green;
        }
        .fail {
            border-color: red;
        }
        hr, h1, h2, h3 {
            clear: both;
        }"""

print HtmlFormatter().get_style_defs('.highlight')

print """
    </style>
</head>
<body>
<h1>BB-Code-Parser</h1>
<h2>Test-Cases</h2>"""

parser = bbcode.BbCodeParser()

def str_to_html(string):
    return cgi.escape(string)

def Compare(bbcode, xml):
    result = parser.parse(bbcode)

    barclass = 'fail'
    if result == xml:
        barclass = 'success'

    print '<div style="clear: both;">'
    print '    <div class="box">%s</div>' % str_to_html(bbcode)
    print '    <div class="box">%s</div>' % str_to_html(xml)
    print '    <div class="box %s">%s</div>' % (barclass, str_to_html(result))
    print '</div>'

print '<h3>ConvertsSimpleTokens</h3>'
Compare("[b]fett[/b]", "<Strong>fett</Strong>")
Compare("[i]kursiv[/i]", "<Em>kursiv</Em>")
Compare("[s]durchgestrichen[/s]", "<Strike>durchgestrichen</Strike>")
Compare("[m]inline code[/m]", "<InlineCode>inline code</InlineCode>")
Compare("[code]code block[/code]", "<Code>code block</Code>")
Compare("[php]php code block[/php]", "<Code Language=\"PHP\">php code block</Code>")
Compare("[spoiler]spoiler[/spoiler]", "<Spoiler>spoiler</Spoiler>")
Compare("[mod]moderator text[/mod]", "<Highlight>moderator text</Highlight>")
Compare("[url]http://tempuri.org[/url]", "<Link Target=\"http://tempuri.org\" />")
Compare("[url=http://tempuri.org]Link name[/url]", "<Link Target=\"http://tempuri.org\">Link name</Link>")
Compare("[img]http://tempuri.org/Image.png[/img]", "<Image Source=\"http://tempuri.org/Image.png\" />")
Compare("[list][*]erstens[*]zweitens[*]drittens[/list]", "<List><Item>erstens</Item><Item>zweitens</Item><Item>drittens</Item></List>")
Compare("[quote]zitat ohne autor[/quote]", "<Quote>zitat ohne autor</Quote>")
Compare("[quote=123,456,\"[DK]Peacemaker\"]zitat von autor mit sonderzeichen[/quote]", "<Quote ThreadId=\"123\" PostId=\"456\" Author=\"[DK]Peacemaker\">zitat von autor mit sonderzeichen</Quote>")
Compare("[table]1[||]2[--]3[||]4[/table]", "<Table><Row><Cell>1</Cell><Cell>2</Cell></Row><Row><Cell>3</Cell><Cell>4</Cell></Row></Table>")

print '<h3>AllowsNestedTokens</h3>'
Compare("[b][i]fett und kursiv[/i][/b]", "<Strong><Em>fett und kursiv</Em></Strong>")
Compare("[b][i][s][u][m]verschachtelt[/m][/u][/s][/i][/b]",
            "<Strong><Em><Strike><Underline><InlineCode>verschachtelt</InlineCode></Underline></Strike></Em></Strong>")
Compare("[quote]kein autor[quote=123,456,\"Author\"]mit autor[/quote]ok?[/quote]",
            "<Quote>kein autor<Quote ThreadId=\"123\" PostId=\"456\" Author=\"Author\">mit autor</Quote>ok?</Quote>")
Compare("[list][*][b]fett[/b][*][i]kursiv[/i][/list]",
            "<List><Item><Strong>fett</Strong></Item><Item><Em>kursiv</Em></Item></List>")

print '<h3>CorrectsMissingEndTokens</h3>'

Compare("[b][i]fett und kursiv[/b]", "<Strong><Em>fett und kursiv</Em></Strong>")
Compare("[b]fett", "<Strong>fett</Strong>")
Compare("[quote]zitat von [b]autor[quote]weiteres zitat[/quote]",
            "<Quote>zitat von <Strong>autor<Quote>weiteres zitat</Quote></Strong></Quote>")

Compare("[b][i]fett und kursiv, aber vertauscht[/b][/i]",
            "<Strong><Em>fett und kursiv, aber vertauscht</Em></Strong>")
Compare("[b]Hallo[i]Welt[/b]![/i]", "<Strong>Hallo<Em>Welt</Em></Strong><Em>!</Em>")
Compare("[u][b]Hallo[i][s]Welt[/b][/u]![/s][/i]", "<Underline><Strong>Hallo<Em><Strike>Welt</Strike></Em></Strong></Underline><Em><Strike>!</Strike></Em>")

print '<h3>AllowsParameterLiterals</h3>'
Compare("[quote=\"123\", \"456\", \"Author\"]Zitat[/quote]",
            "<Quote ThreadId=\"123\" PostId=\"456\" Author=\"Author\">Zitat</Quote>")
Compare("[url=\"http://tempuri.org\"]Name[/url]",
            "<Link Target=\"http://tempuri.org\">Name</Link>")


print '<h3>IgnoresCase</h3>'
Compare("[B]fett[/b]", "<Strong>fett</Strong>")
Compare("[I]kursiv[/i]", "<Em>kursiv</Em>")
Compare("[S]durchgestrichen[/s]", "<Strike>durchgestrichen</Strike>")
Compare("[M]inline code[/m]", "<InlineCode>inline code</InlineCode>")
Compare("[CODE]code block[/code]", "<Code>code block</Code>")
Compare("[PHP]php code block[/php]", "<Code Language=\"PHP\">php code block</Code>")
Compare("[SPOILER]spoiler[/spoiler]", "<Spoiler>spoiler</Spoiler>")
Compare("[MOD]moderator text[/mod]", "<Highlight>moderator text</Highlight>")
Compare("[URL]http://tempuri.org[/url]", "<Link Target=\"http://tempuri.org\" />")
Compare("[URL=http://tempuri.org]Link name[/url]", "<Link Target=\"http://tempuri.org\">Link name</Link>")
Compare("[IMG]http://tempuri.org/Image.png[/img]", "<Image Source=\"http://tempuri.org/Image.png\" />")
Compare("[LIST][*]erstens[*]zweitens[*]drittens[/list]",
            "<List><Item>erstens</Item><Item>zweitens</Item><Item>drittens</Item></List>")
Compare("[QUOTE]zitat ohne autor[/quote]", "<Quote>zitat ohne autor</Quote>")
Compare("[QUOTE=123,456,\"[DK]Peacemaker\"]zitat von autor mit sonderzeichen[/quote]",
            "<Quote ThreadId=\"123\" PostId=\"456\" Author=\"[DK]Peacemaker\">zitat von autor mit sonderzeichen</Quote>")

print '<h3>Klappfallscheibe</h3>'
Compare("[b] Fett </Strong> [/b]", "<Strong> Fett &lt;/Strong&gt; </Strong>")
Compare("[url=\"123]url[/url]", "[url=\"123]url[/url]")
Compare("[url=\"123]\"]url[/url]", "<Link Target=\"123]\">url</Link>")
Compare("[quote=123,456,\"ABC", "[quote=123,456,\"ABC")
Compare("[quote=123,456,\"ABC\" ]Test[/quote]", "<Quote ThreadId=\"123\" PostId=\"456\" Author=\"ABC\">Test</Quote>")
Compare("[quote=\"Welt\"][quote=123,456,\"Ernie\"]Hallo Welt![/quote]Hallo Ernie![/quote]", "[quote=\"Welt\"]<Quote ThreadId=\"123\" PostId=\"456\" Author=\"Ernie\">Hallo Welt!</Quote>Hallo Ernie![/quote]")
Compare("[list]Hallo [*]Welt [*]! [/list]", "<List><Item>Hallo </Item><Item>Welt </Item><Item>! </Item></List>")
Compare("[list][*]Dies[*]ist[list][*]eine[*]verschachtelte[/list][*]Liste![/list]", "<List><Item>Dies</Item><Item>ist<List><Item>eine</Item><Item>verschachtelte</Item></List></Item><Item>Liste!</Item></List>")
Compare('Hallo Welt! [quote = 123 , 456 , "Ernie" ]Das ist doch nicht [b]dein[/b] Ernst?![/quote]', 'Hallo Welt! <Quote ThreadId="123" PostId="456" Author="Ernie">Das ist doch nicht <Strong>dein</Strong> Ernst?!</Quote>')
Compare("[code]Other [b]Tags[/b] inside Code-Tags[/code]", "<Code>Other [b]Tags[/b] inside Code-Tags</Code>")

print '<h2>Code</h2>'

for file in ['bbcode.py', 'test.py', 'bbcodeparser', 'test-cli.py']:
    print "<h3>%s</h3>" % file

    f = open(file, 'r')
    code = f.read()
    print highlight(code, PythonLexer(), HtmlFormatter())

print '<p>Last modified: %s</p>' % time.asctime()

print """</body>
</html>"""

bbcodeparser

#!/bin/env python
# -*- coding: utf-8 -*-

import sys
import bbcode

string =  sys.stdin.read()

parser = bbcode.BbCodeParser()

sys.stdout.write(parser.parse(string))

test-cli.py

#!/bin/env python
# -*- coding: utf-8 -*-

import subprocess

test = 0
passed = 0
failed = 0

def Compare(bbcode, xml):
    global parser, test, passed, failed

    proc = subprocess.Popen('./bbcodeparser',
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE)

    #result = proc.communicate(bbcode)[0].strip()
    result = proc.communicate(bbcode)[0]

    test += 1

    if result == xml:
        passed += 1
        rst = '\033[32mpassed\033[0m'
    else:
        failed += 1
        rst = '\033[31mfailed\033[0m'

    print '* Test %-62d [ %s ]' % (test, rst)
    print indent('BB-Code', bbcode)
    print indent('Expected', xml)
    print indent('Output', result)
    print

def indent(s1, s2):
    string = '  %-10s %s' % ('%s:' % s1, s2)

    result = ''
    while len(string) > 75:
        result += string[0:75] + '     \n             '
        string = string[75:]

    result += string
    return result

Compare("[b]fett[/b]", 
        "<Strong>fett</Strong>")
Compare("[i]kursiv[/i]", 
        "<Em>kursiv</Em>")
Compare("[s]durchgestrichen[/s]", 
        "<Strike>durchgestrichen</Strike>")
Compare("[m]inline code[/m]", 
        "<InlineCode>inline code</InlineCode>")
Compare("[code]code block[/code]", 
        "<Code>code block</Code>")
Compare("[php]php code block[/php]", 
        "<Code Language=\"PHP\">php code block</Code>")
Compare("[spoiler]spoiler[/spoiler]", 
        "<Spoiler>spoiler</Spoiler>")
Compare("[mod]moderator text[/mod]", 
        "<Highlight>moderator text</Highlight>")
Compare("[url]http://tempuri.org[/url]", 
        "<Link Target=\"http://tempuri.org\" />")
Compare("[url=http://tempuri.org]Link name[/url]", 
        "<Link Target=\"http://tempuri.org\">Link name</Link>")
Compare("[img]http://tempuri.org/Image.png[/img]", 
        "<Image Source=\"http://tempuri.org/Image.png\" />")
Compare("[list][*]erstens[*]zweitens[*]drittens[/list]", 
        "<List><Item>erstens</Item><Item>zweitens</Item><Item>drittens</Item></List>")
Compare("[quote]zitat ohne autor[/quote]", 
        "<Quote>zitat ohne autor</Quote>")
Compare("[quote=123,456,\"[DK]Peacemaker\"]zitat von autor mit sonderzeichen[/quote]", 
        "<Quote ThreadId=\"123\" PostId=\"456\" Author=\"[DK]Peacemaker\">zitat von autor mit sonderzeichen</Quote>")
Compare("[table]1[||]2[--]3[||]4[/table]", 
        "<Table><Row><Cell>1</Cell><Cell>2</Cell></Row><Row><Cell>3</Cell><Cell>4</Cell></Row></Table>")
Compare("[b][i]fett und kursiv[/i][/b]", 
        "<Strong><Em>fett und kursiv</Em></Strong>")
Compare("[b][i][s][u][m]verschachtelt[/m][/u][/s][/i][/b]",
        "<Strong><Em><Strike><Underline><InlineCode>verschachtelt</InlineCode></Underline></Strike></Em></Strong>")
Compare("[quote]kein autor[quote=123,456,\"Author\"]mit autor[/quote]ok?[/quote]",
        "<Quote>kein autor<Quote ThreadId=\"123\" PostId=\"456\" Author=\"Author\">mit autor</Quote>ok?</Quote>")
Compare("[list][*][b]fett[/b][*][i]kursiv[/i][/list]",
        "<List><Item><Strong>fett</Strong></Item><Item><Em>kursiv</Em></Item></List>")
Compare("[b][i]fett und kursiv[/b]", 
        "<Strong><Em>fett und kursiv</Em></Strong>")
Compare("[b]fett", 
        "<Strong>fett</Strong>")
Compare("[quote]zitat von [b]autor[quote]weiteres zitat[/quote]",
        "<Quote>zitat von <Strong>autor<Quote>weiteres zitat</Quote></Strong></Quote>")
Compare("[b][i]fett und kursiv, aber vertauscht[/b][/i]",
        "<Strong><Em>fett und kursiv, aber vertauscht</Em></Strong>")
Compare("[b]Hallo[i]Welt[/b]![/i]", 
        "<Strong>Hallo<Em>Welt</Em></Strong><Em>!</Em>")
Compare("[u][b]Hallo[i][s]Welt[/b][/u]![/s][/i]", 
        "<Underline><Strong>Hallo<Em><Strike>Welt</Strike></Em></Strong></Underline><Em><Strike>!</Strike></Em>")
Compare("[quote=\"123\", \"456\", \"Author\"]Zitat[/quote]",
        "<Quote ThreadId=\"123\" PostId=\"456\" Author=\"Author\">Zitat</Quote>")
Compare("[url=\"http://tempuri.org\"]Name[/url]",
        "<Link Target=\"http://tempuri.org\">Name</Link>")
Compare("[B]fett[/b]", 
        "<Strong>fett</Strong>")
Compare("[I]kursiv[/i]", 
        "<Em>kursiv</Em>")
Compare("[S]durchgestrichen[/s]", 
        "<Strike>durchgestrichen</Strike>")
Compare("[M]inline code[/m]", 
        "<InlineCode>inline code</InlineCode>")
Compare("[CODE]code block[/code]", 
        "<Code>code block</Code>")
Compare("[PHP]php code block[/php]", 
        "<Code Language=\"PHP\">php code block</Code>")
Compare("[SPOILER]spoiler[/spoiler]", 
        "<Spoiler>spoiler</Spoiler>")
Compare("[MOD]moderator text[/mod]", 
        "<Highlight>moderator text</Highlight>")
Compare("[URL]http://tempuri.org[/url]", 
        "<Link Target=\"http://tempuri.org\" />")
Compare("[URL=http://tempuri.org]Link name[/url]", 
        "<Link Target=\"http://tempuri.org\">Link name</Link>")
Compare("[IMG]http://tempuri.org/Image.png[/img]", 
        "<Image Source=\"http://tempuri.org/Image.png\" />")
Compare("[LIST][*]erstens[*]zweitens[*]drittens[/list]",
        "<List><Item>erstens</Item><Item>zweitens</Item><Item>drittens</Item></List>")
Compare("[QUOTE]zitat ohne autor[/quote]", 
        "<Quote>zitat ohne autor</Quote>")
Compare("[QUOTE=123,456,\"[DK]Peacemaker\"]zitat von autor mit sonderzeichen[/quote]",
        "<Quote ThreadId=\"123\" PostId=\"456\" Author=\"[DK]Peacemaker\">zitat von autor mit sonderzeichen</Quote>")
Compare("[b] Fett </Strong> [/b]", 
        "<Strong> Fett &lt;/Strong&gt; </Strong>")
Compare("[url=\"123]url[/url]", 
        "[url=\"123]url[/url]")
Compare("[url=\"123]\"]url[/url]", 
        "<Link Target=\"123]\">url</Link>")
Compare("[quote=123,456,\"ABC", 
        "[quote=123,456,\"ABC")
Compare("[quote=123,456,\"ABC\" ]Test[/quote]", 
        "<Quote ThreadId=\"123\" PostId=\"456\" Author=\"ABC\">Test</Quote>")
Compare("[quote=\"Welt\"][quote=123,456,\"Ernie\"]Hallo Welt![/quote]Hallo Ernie![/quote]", 
        "[quote=\"Welt\"]<Quote ThreadId=\"123\" PostId=\"456\" Author=\"Ernie\">Hallo Welt!</Quote>Hallo Ernie![/quote]")
Compare("[list]Hallo [*]Welt [*]! [/list]", 
        "<List><Item>Hallo </Item><Item>Welt </Item><Item>! </Item></List>")
Compare("[list][*]Dies[*]ist[list][*]eine[*]verschachtelte[/list][*]Liste![/list]", 
        "<List><Item>Dies</Item><Item>ist<List><Item>eine</Item><Item>verschachtelte</Item></List></Item><Item>Liste!</Item></List>")
Compare('Hallo Welt! [quote = 123 , 456 , "Ernie" ]Das ist doch nicht [b]dein[/b] Ernst?![/quote]', 
        'Hallo Welt! <Quote ThreadId="123" PostId="456" Author="Ernie">Das ist doch nicht <Strong>dein</Strong> Ernst?!</Quote>')
Compare("[code]Other [b]Tags[/b] inside Code-Tags[/code]", 
        "<Code>Other [b]Tags[/b] inside Code-Tags</Code>")

print "%d tests: %d passed, %d failed" % (test, passed, failed)

Last modified: Wed Feb 17 16:22:23 2010