diff --git a/scripts/generate.py b/scripts/generate.py index fc40371..f40ca64 100644 --- a/scripts/generate.py +++ b/scripts/generate.py @@ -44,7 +44,7 @@ MONITORED_EVENT_TYPES = ( 'IN_MOVE_SELF', ) -WHITESPACE_RE = re.compile(r'\s') +TEXT_OR_LINK_RE = re.compile(r'([^\s\[\]]+|.)') ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -409,22 +409,20 @@ def render_text_tokens(tokens, acc, headline, graph): contents = [] for line in lines: line_chunks = [] - for word in WHITESPACE_RE.split(line): + for word in TEXT_OR_LINK_RE.findall(line): if ':/' in word and not (word.startswith('org-protocol://')): if not (word.startswith('http://') or word.startswith('https://') or word.startswith('ftp://') or word.startswith('ftps://') ): - print("Line:", line) - print("Chunks:", WHITESPACE_RE.split(line)) - raise Exception('Is this a link? {} (on {})'.format(word, headline.doc.path)) + raise Exception('Is this a link? {} (on {})\nLine: {}\nChunks: {}'.format(word, headline.doc.path, line, chunks)) line_chunks.append('{description}' .format(url=word, description=html.escape(word))) else: line_chunks.append(html.escape(word)) - contents.append(' '.join(line_chunks)) + contents.append(''.join(line_chunks)) acc.append('{}'.format('

'.join(contents)))