From bd4e4f8cb4810a9474a88788f65738e38725afbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Fri, 26 Aug 2022 19:17:12 +0200 Subject: [PATCH 1/2] Add (failing) tests for confusing links (close to `[]`). --- tests/03-links.org | 4 ++++ tests/test_org.py | 50 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/tests/03-links.org b/tests/03-links.org index 7683c7f..ad38d7a 100644 --- a/tests/03-links.org +++ b/tests/03-links.org @@ -17,3 +17,7 @@ This is [[id:03-markup-first-level-id][a link to a section by id]]. This is a [[https://codigoparallevar.com/3][web link]] followed up with some text. + + This is a [[https://codigoparallevar.com/4][[tricky web link]​]] followed up with some text. + + This is [[[https://codigoparallevar.com/5][another tricky web link]]] followed up with some text. diff --git a/tests/test_org.py b/tests/test_org.py index 7e16f4e..921b590 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -201,7 +201,7 @@ class TestSerde(unittest.TestCase): doc = load(f) links = list(doc.get_links()) - self.assertEqual(len(links), 5) + self.assertEqual(len(links), 7) self.assertEqual(links[0].value, "https://codigoparallevar.com/1") self.assertEqual(links[0].description, "web link") @@ -217,6 +217,12 @@ class TestSerde(unittest.TestCase): self.assertEqual(links[4].value, "https://codigoparallevar.com/3") self.assertEqual(links[4].description, "web link") + self.assertEqual(links[5].value, "https://codigoparallevar.com/4") + self.assertEqual(links[5].description, "[tricky web link]") + + self.assertEqual(links[6].value, "https://codigoparallevar.com/5") + self.assertEqual(links[6].description, "another tricky web link") + ex = Doc( props=[ ("TITLE", "03-Links"), @@ -270,6 +276,18 @@ class TestSerde(unittest.TestCase): WEB_LINK("web link", "https://codigoparallevar.com/3"), " followed up with some text.\n", ), + SPAN("\n"), + SPAN( + " This is a ", + WEB_LINK("[tricky web link]", "https://codigoparallevar.com/4"), + " followed up with some text.\n", + ), + SPAN("\n"), + SPAN( + " This is [", + WEB_LINK("another tricky web link", "https://codigoparallevar.com/5"), + "] followed up with some text.\n", + ), ], ) ), @@ -282,7 +300,7 @@ class TestSerde(unittest.TestCase): doc = load(f) links = list(doc.get_links()) - self.assertEqual(len(links), 5) + self.assertEqual(len(links), 7) self.assertEqual(links[0].value, "https://codigoparallevar.com/1") self.assertEqual(links[0].description, "web link") links[0].value = "https://codigoparallevar.com/1-updated" @@ -308,6 +326,16 @@ class TestSerde(unittest.TestCase): links[4].value = "https://codigoparallevar.com/3-updated" links[4].description = "web link #3 with update" + self.assertEqual(links[5].value, "https://codigoparallevar.com/4") + self.assertEqual(links[5].description, "[tricky web link]") + links[5].value = "https://codigoparallevar.com/4-updated" + links[5].description = "[tricky web link #4 with update]" + + self.assertEqual(links[6].value, "https://codigoparallevar.com/5") + self.assertEqual(links[6].description, "another tricky web link") + links[6].value = "https://codigoparallevar.com/5-updated" + links[6].description = "another tricky web link #5 with update" + ex = Doc( props=[ ("TITLE", "03-Links"), @@ -368,6 +396,24 @@ class TestSerde(unittest.TestCase): ), " followed up with some text.\n", ), + SPAN("\n"), + SPAN( + " This is a ", + WEB_LINK( + "[tricky web link #4 with update]", + "https://codigoparallevar.com/4-updated", + ), + " followed up with some text.\n", + ), + SPAN("\n"), + SPAN( + " This is [", + WEB_LINK( + "another tricky web link #5 with update", + "https://codigoparallevar.com/5-updated", + ), + "] followed up with some text.\n", + ), ], ) ), From efadb7814a7889132419f2254025e8496e3a5568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sat, 27 Aug 2022 13:02:10 +0200 Subject: [PATCH 2/2] Fix handling of links with `[]` characters around them. --- org_rw/org_rw.py | 16 ++++++---------- tests/test_org.py | 6 +++--- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 2b7ce1d..012eed6 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -1306,10 +1306,12 @@ def tokenize_contents(contents: str): len(contents) > i + 3 # At least 3 characters more to open and close a link and contents[i + 1] == "[" + # TODO: Generalize this to a backtracking, don't just fix the test case... + and contents[i + 2] != "[" ): - close = contents.find("]", i) + close = contents.find("]]", i) - if close != -1 and contents[close + 1] == "]": + if close != -1: # Link with no description cut_string() @@ -1333,7 +1335,7 @@ def tokenize_contents(contents: str): continue # Possible link close or open of description - if char == "]" and in_link: + if char == "]" and len(contents) > i + 1 and in_link: if contents[i + 1] == "]": cut_string() @@ -1343,19 +1345,13 @@ def tokenize_contents(contents: str): in_link_description = False continue - if contents[i + 1] == "[" and not in_link_description: + elif contents[i + 1] == "[": cut_string() tokens.append((TOKEN_TYPE_OPEN_DESCRIPTION, None)) assert "[" == (next(cursor)[1]) continue - raise Exception( - "Link cannot contain ']' not followed by '[' or ']'. Starting with {}".format( - contents[last_link_start : i + 10] - ) - ) - if in_link and not in_link_description: # Link's pointer have no formatting pass diff --git a/tests/test_org.py b/tests/test_org.py index 921b590..a3e346c 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -218,7 +218,7 @@ class TestSerde(unittest.TestCase): self.assertEqual(links[4].description, "web link") self.assertEqual(links[5].value, "https://codigoparallevar.com/4") - self.assertEqual(links[5].description, "[tricky web link]") + self.assertEqual(links[5].description, "[tricky web link]\u200b") self.assertEqual(links[6].value, "https://codigoparallevar.com/5") self.assertEqual(links[6].description, "another tricky web link") @@ -279,7 +279,7 @@ class TestSerde(unittest.TestCase): SPAN("\n"), SPAN( " This is a ", - WEB_LINK("[tricky web link]", "https://codigoparallevar.com/4"), + WEB_LINK("[tricky web link]\u200b", "https://codigoparallevar.com/4"), " followed up with some text.\n", ), SPAN("\n"), @@ -327,7 +327,7 @@ class TestSerde(unittest.TestCase): links[4].description = "web link #3 with update" self.assertEqual(links[5].value, "https://codigoparallevar.com/4") - self.assertEqual(links[5].description, "[tricky web link]") + self.assertEqual(links[5].description, "[tricky web link]\u200b") links[5].value = "https://codigoparallevar.com/4-updated" links[5].description = "[tricky web link #4 with update]"