Find web links not marked as such when returning doc.get_links().

This commit is contained in:
Sergio Martínez Portela 2024-02-04 00:18:31 +01:00
parent feb836b2b6
commit 985098e091
3 changed files with 53 additions and 6 deletions

View File

@ -94,6 +94,8 @@ LIST_ITEM_RE = re.compile(
r"(?P<indentation>\s*)((?P<bullet>[*\-+])|((?P<counter>\d|[a-zA-Z])(?P<counter_sep>[.)]))) ((?P<checkbox_indentation>\s*)\[(?P<checkbox_value>[ Xx])\])?((?P<tag_indentation>\s*)(?P<tag>.*?)::)?(?P<content>.*)" r"(?P<indentation>\s*)((?P<bullet>[*\-+])|((?P<counter>\d|[a-zA-Z])(?P<counter_sep>[.)]))) ((?P<checkbox_indentation>\s*)\[(?P<checkbox_value>[ Xx])\])?((?P<tag_indentation>\s*)(?P<tag>.*?)::)?(?P<content>.*)"
) )
IMPLICIT_LINK_RE = re.compile(r'(https?:[^<> ]*[a-zA-Z])')
# Org-Babel # Org-Babel
BEGIN_BLOCK_RE = re.compile(r"^\s*#\+BEGIN_(?P<subtype>[^ ]+)(?P<arguments>.*)$", re.I) BEGIN_BLOCK_RE = re.compile(r"^\s*#\+BEGIN_(?P<subtype>[^ ]+)(?P<arguments>.*)$", re.I)
END_BLOCK_RE = re.compile(r"^\s*#\+END_(?P<subtype>[^ ]+)\s*$", re.I) END_BLOCK_RE = re.compile(r"^\s*#\+END_(?P<subtype>[^ ]+)\s*$", re.I)
@ -206,6 +208,14 @@ def get_links_from_content(content):
link_description.append(tok) link_description.append(tok)
else: else:
link_value.append(tok) link_value.append(tok)
elif isinstance(tok, str):
implicit_links = IMPLICIT_LINK_RE.findall(tok)
for link in implicit_links:
yield Link(
cast(str, link),
cast(str, link),
None
)
def text_to_dom(tokens, item): def text_to_dom(tokens, item):
if tokens is None: if tokens is None:
@ -1297,7 +1307,7 @@ class Line:
class Link: class Link:
def __init__(self, value: str, description: Optional[str], origin: RangeInRaw): def __init__(self, value: str, description: Optional[str], origin: Optional[RangeInRaw]):
self._value = value self._value = value
self._description = description self._description = description
self._origin = origin self._origin = origin

View File

@ -21,3 +21,10 @@
This is a [[https://codigoparallevar.com/4][[tricky web link]]] followed up with some text. This is a [[https://codigoparallevar.com/4][[tricky web link]]] followed up with some text.
This is [[[https://codigoparallevar.com/5][another tricky web link]]] followed up with some text. This is [[[https://codigoparallevar.com/5][another tricky web link]]] followed up with some text.
* Implicit links
:PROPERTIES:
:ID: 03-markup-implicit-links
:CREATED: [2020-01-01 Wed 01:01]
:END:
This is an implicit web link: https://codigoparallevar.com/implicit.

View File

@ -202,7 +202,7 @@ class TestSerde(unittest.TestCase):
doc = load(f) doc = load(f)
links = list(doc.get_links()) links = list(doc.get_links())
self.assertEqual(len(links), 7) self.assertEqual(len(links), 8)
self.assertEqual(links[0].value, "https://codigoparallevar.com/1") self.assertEqual(links[0].value, "https://codigoparallevar.com/1")
self.assertEqual(links[0].description, "web link") self.assertEqual(links[0].description, "web link")
@ -224,6 +224,9 @@ class TestSerde(unittest.TestCase):
self.assertEqual(links[6].value, "https://codigoparallevar.com/5") self.assertEqual(links[6].value, "https://codigoparallevar.com/5")
self.assertEqual(links[6].description, "another tricky web link") self.assertEqual(links[6].description, "another tricky web link")
self.assertEqual(links[7].value, "https://codigoparallevar.com/implicit")
self.assertEqual(links[7].description, "https://codigoparallevar.com/implicit")
ex = Doc( ex = Doc(
props=[ props=[
("TITLE", "03-Links"), ("TITLE", "03-Links"),
@ -290,8 +293,20 @@ class TestSerde(unittest.TestCase):
"] followed up with some text.\n", "] followed up with some text.\n",
), ),
], ],
) ),
), HL(
"Implicit links",
props=[
("ID", "03-markup-implicit-links"),
("CREATED", DT(2020, 1, 1, 1, 1)),
],
content=[
SPAN(
" This is an implicit web link: https://codigoparallevar.com/implicit.\n",
),
],
),
)
) )
ex.assert_matches(self, doc) ex.assert_matches(self, doc)
@ -301,7 +316,7 @@ class TestSerde(unittest.TestCase):
doc = load(f) doc = load(f)
links = list(doc.get_links()) links = list(doc.get_links())
self.assertEqual(len(links), 7) self.assertEqual(len(links), 8)
self.assertEqual(links[0].value, "https://codigoparallevar.com/1") self.assertEqual(links[0].value, "https://codigoparallevar.com/1")
self.assertEqual(links[0].description, "web link") self.assertEqual(links[0].description, "web link")
links[0].value = "https://codigoparallevar.com/1-updated" links[0].value = "https://codigoparallevar.com/1-updated"
@ -337,6 +352,9 @@ class TestSerde(unittest.TestCase):
links[6].value = "https://codigoparallevar.com/5-updated" links[6].value = "https://codigoparallevar.com/5-updated"
links[6].description = "another tricky web link #5 with update" links[6].description = "another tricky web link #5 with update"
self.assertEqual(links[7].value, "https://codigoparallevar.com/implicit")
self.assertEqual(links[7].description, "https://codigoparallevar.com/implicit")
ex = Doc( ex = Doc(
props=[ props=[
("TITLE", "03-Links"), ("TITLE", "03-Links"),
@ -416,7 +434,19 @@ class TestSerde(unittest.TestCase):
"] followed up with some text.\n", "] followed up with some text.\n",
), ),
], ],
) ),
HL(
"Implicit links",
props=[
("ID", "03-markup-implicit-links"),
("CREATED", DT(2020, 1, 1, 1, 1)),
],
content=[
SPAN(
" This is an implicit web link: https://codigoparallevar.com/implicit.\n",
),
],
),
), ),
) )