Take links from 'Related' headline, map doc.ids to headlines.

This commit is contained in:
Sergio Martínez Portela 2022-05-16 23:55:58 +02:00
parent eda511171d
commit bdf397335c

View File

@ -51,6 +51,8 @@ def main(src_top, dest_top):
docs = load_all(src_top)
files_generated = 0
doc_to_headline_remapping = {}
os.makedirs(dest_top, exist_ok=True)
graph = {}
for doc in docs:
@ -87,29 +89,36 @@ def main(src_top, dest_top):
# print("Skip:", relpath)
continue
if doc.id is not None:
topHeadlines = doc.getTopHeadlines()
main_headline = None
topHeadlines = doc.getTopHeadlines()
if ((len(topHeadlines) == 1 and related is None)
or (len(topHeadlines) == 2 and related is not None)):
if ((len(topHeadlines) == 1 and related is None)
or (len(topHeadlines) == 2 and related is not None)):
main_headline = [h for h in topHeadlines if h != related][0]
if doc.id is not None:
endpath = os.path.join(dest_top, doc.id + ".node.html")
main_headline = [h for h in topHeadlines if h != related][0]
with open(endpath, "wt") as f:
doc_to_headline_remapping['id:' + doc.id] = 'id:' + main_headline.id
f.write(as_document(render(main_headline, doc, headlineLevel=0)))
files_generated += 1
else:
logging.error("Cannot render document from id: {}. {} headlines {} related".format(
relpath,
len(topHeadlines),
'with' if related is not None else 'without'
))
elif doc.id is not None:
logging.error("Cannot render document from id: {}. {} headlines {} related".format(
relpath,
len(topHeadlines),
'with' if related is not None else 'without'
))
for headline in headlines:
endpath = os.path.join(dest_top, headline.id + ".node.html")
links = []
for l in headline.get_links():
headline_links = list(headline.get_links())
if headline == main_headline and related is not None:
headline_links.extend(list(related.get_links()))
for l in headline_links:
if l.value.startswith('http://') or l.value.startswith('https://'):
pass # Ignore for now, external URL
elif l.value.startswith('id:'):
@ -147,6 +156,13 @@ def main(src_top, dest_top):
f.write(as_document(render(headline, doc, headlineLevel=0)))
files_generated += 1
# Update graph, replace document ids with headline ids
for headline_data in graph.values():
for link in headline_data['links']:
if link['target'] in doc_to_headline_remapping:
link['target'] = doc_to_headline_remapping[link['target']]
# Output graph files
graphpath = os.path.join(dest_top, "graph.json")
graph_explorer_path = os.path.join(dest_top, "graph.html")
with open(graphpath, "wt") as f: