Take links from 'Related' headline, map doc.ids to headlines.
This commit is contained in:
parent
eda511171d
commit
bdf397335c
@ -51,6 +51,8 @@ def main(src_top, dest_top):
|
|||||||
docs = load_all(src_top)
|
docs = load_all(src_top)
|
||||||
files_generated = 0
|
files_generated = 0
|
||||||
|
|
||||||
|
doc_to_headline_remapping = {}
|
||||||
|
|
||||||
os.makedirs(dest_top, exist_ok=True)
|
os.makedirs(dest_top, exist_ok=True)
|
||||||
graph = {}
|
graph = {}
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
@ -87,18 +89,21 @@ def main(src_top, dest_top):
|
|||||||
# print("Skip:", relpath)
|
# print("Skip:", relpath)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if doc.id is not None:
|
main_headline = None
|
||||||
topHeadlines = doc.getTopHeadlines()
|
topHeadlines = doc.getTopHeadlines()
|
||||||
|
|
||||||
if ((len(topHeadlines) == 1 and related is None)
|
if ((len(topHeadlines) == 1 and related is None)
|
||||||
or (len(topHeadlines) == 2 and related is not None)):
|
or (len(topHeadlines) == 2 and related is not None)):
|
||||||
endpath = os.path.join(dest_top, doc.id + ".node.html")
|
|
||||||
|
|
||||||
main_headline = [h for h in topHeadlines if h != related][0]
|
main_headline = [h for h in topHeadlines if h != related][0]
|
||||||
|
if doc.id is not None:
|
||||||
|
endpath = os.path.join(dest_top, doc.id + ".node.html")
|
||||||
with open(endpath, "wt") as f:
|
with open(endpath, "wt") as f:
|
||||||
|
doc_to_headline_remapping['id:' + doc.id] = 'id:' + main_headline.id
|
||||||
|
|
||||||
f.write(as_document(render(main_headline, doc, headlineLevel=0)))
|
f.write(as_document(render(main_headline, doc, headlineLevel=0)))
|
||||||
files_generated += 1
|
files_generated += 1
|
||||||
else:
|
elif doc.id is not None:
|
||||||
logging.error("Cannot render document from id: {}. {} headlines {} related".format(
|
logging.error("Cannot render document from id: {}. {} headlines {} related".format(
|
||||||
relpath,
|
relpath,
|
||||||
len(topHeadlines),
|
len(topHeadlines),
|
||||||
@ -109,7 +114,11 @@ def main(src_top, dest_top):
|
|||||||
endpath = os.path.join(dest_top, headline.id + ".node.html")
|
endpath = os.path.join(dest_top, headline.id + ".node.html")
|
||||||
|
|
||||||
links = []
|
links = []
|
||||||
for l in headline.get_links():
|
headline_links = list(headline.get_links())
|
||||||
|
if headline == main_headline and related is not None:
|
||||||
|
headline_links.extend(list(related.get_links()))
|
||||||
|
|
||||||
|
for l in headline_links:
|
||||||
if l.value.startswith('http://') or l.value.startswith('https://'):
|
if l.value.startswith('http://') or l.value.startswith('https://'):
|
||||||
pass # Ignore for now, external URL
|
pass # Ignore for now, external URL
|
||||||
elif l.value.startswith('id:'):
|
elif l.value.startswith('id:'):
|
||||||
@ -147,6 +156,13 @@ def main(src_top, dest_top):
|
|||||||
f.write(as_document(render(headline, doc, headlineLevel=0)))
|
f.write(as_document(render(headline, doc, headlineLevel=0)))
|
||||||
files_generated += 1
|
files_generated += 1
|
||||||
|
|
||||||
|
# Update graph, replace document ids with headline ids
|
||||||
|
for headline_data in graph.values():
|
||||||
|
for link in headline_data['links']:
|
||||||
|
if link['target'] in doc_to_headline_remapping:
|
||||||
|
link['target'] = doc_to_headline_remapping[link['target']]
|
||||||
|
|
||||||
|
# Output graph files
|
||||||
graphpath = os.path.join(dest_top, "graph.json")
|
graphpath = os.path.join(dest_top, "graph.json")
|
||||||
graph_explorer_path = os.path.join(dest_top, "graph.html")
|
graph_explorer_path = os.path.join(dest_top, "graph.html")
|
||||||
with open(graphpath, "wt") as f:
|
with open(graphpath, "wt") as f:
|
||||||
|
Loading…
Reference in New Issue
Block a user