From 985098e09177dfc14ccdfbdd9ecdec41b8b9b84d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 4 Feb 2024 00:18:31 +0100 Subject: [PATCH] Find web links not marked as such when returning `doc.get_links()`. --- org_rw/org_rw.py | 12 +++++++++++- tests/03-links.org | 7 +++++++ tests/test_org.py | 40 +++++++++++++++++++++++++++++++++++----- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 7d01fcc..b1dff79 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -94,6 +94,8 @@ LIST_ITEM_RE = re.compile( r"(?P\s*)((?P[*\-+])|((?P\d|[a-zA-Z])(?P[.)]))) ((?P\s*)\[(?P[ Xx])\])?((?P\s*)(?P.*?)::)?(?P.*)" ) +IMPLICIT_LINK_RE = re.compile(r'(https?:[^<> ]*[a-zA-Z])') + # Org-Babel BEGIN_BLOCK_RE = re.compile(r"^\s*#\+BEGIN_(?P[^ ]+)(?P.*)$", re.I) END_BLOCK_RE = re.compile(r"^\s*#\+END_(?P[^ ]+)\s*$", re.I) @@ -206,6 +208,14 @@ def get_links_from_content(content): link_description.append(tok) else: link_value.append(tok) + elif isinstance(tok, str): + implicit_links = IMPLICIT_LINK_RE.findall(tok) + for link in implicit_links: + yield Link( + cast(str, link), + cast(str, link), + None + ) def text_to_dom(tokens, item): if tokens is None: @@ -1297,7 +1307,7 @@ class Line: class Link: - def __init__(self, value: str, description: Optional[str], origin: RangeInRaw): + def __init__(self, value: str, description: Optional[str], origin: Optional[RangeInRaw]): self._value = value self._description = description self._origin = origin diff --git a/tests/03-links.org b/tests/03-links.org index ad38d7a..7ab2d75 100644 --- a/tests/03-links.org +++ b/tests/03-links.org @@ -21,3 +21,10 @@ This is a [[https://codigoparallevar.com/4][[tricky web link]​]] followed up with some text. This is [[[https://codigoparallevar.com/5][another tricky web link]]] followed up with some text. + +* Implicit links + :PROPERTIES: + :ID: 03-markup-implicit-links + :CREATED: [2020-01-01 Wed 01:01] + :END: + This is an implicit web link: https://codigoparallevar.com/implicit. diff --git a/tests/test_org.py b/tests/test_org.py index 6981b76..8631fba 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -202,7 +202,7 @@ class TestSerde(unittest.TestCase): doc = load(f) links = list(doc.get_links()) - self.assertEqual(len(links), 7) + self.assertEqual(len(links), 8) self.assertEqual(links[0].value, "https://codigoparallevar.com/1") self.assertEqual(links[0].description, "web link") @@ -224,6 +224,9 @@ class TestSerde(unittest.TestCase): self.assertEqual(links[6].value, "https://codigoparallevar.com/5") self.assertEqual(links[6].description, "another tricky web link") + self.assertEqual(links[7].value, "https://codigoparallevar.com/implicit") + self.assertEqual(links[7].description, "https://codigoparallevar.com/implicit") + ex = Doc( props=[ ("TITLE", "03-Links"), @@ -290,8 +293,20 @@ class TestSerde(unittest.TestCase): "] followed up with some text.\n", ), ], - ) - ), + ), + HL( + "Implicit links", + props=[ + ("ID", "03-markup-implicit-links"), + ("CREATED", DT(2020, 1, 1, 1, 1)), + ], + content=[ + SPAN( + " This is an implicit web link: https://codigoparallevar.com/implicit.\n", + ), + ], + ), + ) ) ex.assert_matches(self, doc) @@ -301,7 +316,7 @@ class TestSerde(unittest.TestCase): doc = load(f) links = list(doc.get_links()) - self.assertEqual(len(links), 7) + self.assertEqual(len(links), 8) self.assertEqual(links[0].value, "https://codigoparallevar.com/1") self.assertEqual(links[0].description, "web link") links[0].value = "https://codigoparallevar.com/1-updated" @@ -337,6 +352,9 @@ class TestSerde(unittest.TestCase): links[6].value = "https://codigoparallevar.com/5-updated" links[6].description = "another tricky web link #5 with update" + self.assertEqual(links[7].value, "https://codigoparallevar.com/implicit") + self.assertEqual(links[7].description, "https://codigoparallevar.com/implicit") + ex = Doc( props=[ ("TITLE", "03-Links"), @@ -416,7 +434,19 @@ class TestSerde(unittest.TestCase): "] followed up with some text.\n", ), ], - ) + ), + HL( + "Implicit links", + props=[ + ("ID", "03-markup-implicit-links"), + ("CREATED", DT(2020, 1, 1, 1, 1)), + ], + content=[ + SPAN( + " This is an implicit web link: https://codigoparallevar.com/implicit.\n", + ), + ], + ), ), )