rendercomment: respectful linkification of Git commits

Message ID 20200130115700.19185-1-lfleischer@archlinux.org
State New
Headers show
Series rendercomment: respectful linkification of Git commits | expand

Commit Message

Lukas Fleischer Jan. 30, 2020, 11:57 a.m. UTC
From: Frédéric Mangano-Tarumi <fmang@mg0.fr>

Turn the git-commits markdown processor into an inline processor, which
is smart enough not to convert Git hashes contained in code blocks or
links.

Signed-off-by: Lukas Fleischer <lfleischer@archlinux.org>
---
 aurweb/scripts/rendercomment.py | 36 ++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 16 deletions(-)

Comments

Lukas Fleischer Jan. 30, 2020, 12:07 p.m. UTC | #1
On Thu, 30 Jan 2020 at 12:57:00, Lukas Fleischer wrote:
> From: Frédéric Mangano-Tarumi <fmang@mg0.fr>
> 
> Turn the git-commits markdown processor into an inline processor, which
> is smart enough not to convert Git hashes contained in code blocks or
> links.
> 
> Signed-off-by: Lukas Fleischer <lfleischer@archlinux.org>
> ---
>  aurweb/scripts/rendercomment.py | 36 ++++++++++++++++++---------------
>  1 file changed, 20 insertions(+), 16 deletions(-)

This was accidentally resubmitted to the list while working on some
other patches; sorry for that. I queued the patch in pu with some minor
changes (removed some UTF-8 characters from comments).

Still looking forward to test suite additions.

Patch

diff --git a/aurweb/scripts/rendercomment.py b/aurweb/scripts/rendercomment.py
index 5e18fd5..5c59748 100755
--- a/aurweb/scripts/rendercomment.py
+++ b/aurweb/scripts/rendercomment.py
@@ -40,19 +40,26 @@  class FlysprayLinksExtension(markdown.extensions.Extension):
         md.preprocessors.add('flyspray-links', preprocessor, '_end')
 
 
-class GitCommitsPreprocessor(markdown.preprocessors.Preprocessor):
-    _oidre = re.compile(r'(\b)([0-9a-f]{7,40})(\b)')
+class GitCommitsInlineProcessor(markdown.inlinepatterns.InlineProcessor):
+    """
+    Turn Git hashes like f7f5152be5ab into links to AUR's cgit.
+
+    Only commit references that do exist are linkified. Hashes are shortened to
+    shorter non-ambiguous prefixes. Only hashes with at least 7 digits are
+    considered.
+    """
+
     _repo = pygit2.Repository(repo_path)
-    _head = None
 
     def __init__(self, md, head):
         self._head = head
-        super(markdown.preprocessors.Preprocessor, self).__init__(md)
+        super().__init__(r'\b([0-9a-f]{7,40})\b', md)
 
-    def handleMatch(self, m):
-        oid = m.group(2)
+    def handleMatch(self, m, data):
+        oid = m.group(1)
         if oid not in self._repo:
-            return oid
+            # Unkwown OID; preserve the orginal text.
+            return None, None, None
 
         prefixlen = 12
         while prefixlen < 40:
@@ -60,13 +67,10 @@  class GitCommitsPreprocessor(markdown.preprocessors.Preprocessor):
                 break
             prefixlen += 1
 
-        html = '[`' + oid[:prefixlen] + '`]'
-        html += '(' + commit_uri % (self._head, oid[:prefixlen]) + ')'
-
-        return html
-
-    def run(self, lines):
-        return [self._oidre.sub(self.handleMatch, line) for line in lines]
+        el = markdown.util.etree.Element('a')
+        el.set('href', commit_uri % (self._head, oid[:prefixlen]))
+        el.text = markdown.util.AtomicString(oid[:prefixlen])
+        return el, m.start(0), m.end(0)
 
 
 class GitCommitsExtension(markdown.extensions.Extension):
@@ -77,8 +81,8 @@  class GitCommitsExtension(markdown.extensions.Extension):
         super(markdown.extensions.Extension, self).__init__()
 
     def extendMarkdown(self, md, md_globals):
-        preprocessor = GitCommitsPreprocessor(md, self._head)
-        md.preprocessors.add('git-commits', preprocessor, '_end')
+        processor = GitCommitsInlineProcessor(md, self._head)
+        md.inlinePatterns.add('git-commits', processor, '_end')
 
 
 class HeadingTreeprocessor(markdown.treeprocessors.Treeprocessor):