Compare commits
1 Commits
develop
...
experiment
Author | SHA1 | Date | |
---|---|---|---|
|
16657d95ee |
40
scripts/brain-query
Executable file
40
scripts/brain-query
Executable file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import xapian
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
|
||||
def main(path, query):
|
||||
db = xapian.Database(path, xapian.DB_OPEN)
|
||||
docid_map_path = os.path.join(path, "docid_map.json")
|
||||
with open(docid_map_path, 'rt') as f:
|
||||
docid_to_node = json.load(f)
|
||||
|
||||
qp = xapian.QueryParser()
|
||||
stemmer = xapian.Stem("english")
|
||||
qp.set_stemmer(stemmer)
|
||||
qp.set_database(db)
|
||||
qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
|
||||
xap_query = qp.parse_query(query)
|
||||
print("Parsed query is: {}".format(xap_query))
|
||||
|
||||
enquire = xapian.Enquire(db)
|
||||
enquire.set_query(xap_query)
|
||||
matches = enquire.get_mset(0, 10)
|
||||
for match in matches:
|
||||
print(
|
||||
"ID {} {}% | DocId: {}".format(
|
||||
match.rank + 1,
|
||||
match.percent,
|
||||
docid_to_node[str(match.document.get_docid())],
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 3:
|
||||
print("Brain-Query")
|
||||
print("Usage: {} <path> <query>".format(sys.argv[0]))
|
||||
exit(0)
|
||||
main(sys.argv[1], sys.argv[2])
|
@ -6,6 +6,8 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
import xapian
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
|
||||
import org_rw
|
||||
@ -173,8 +175,49 @@ def main(src_top, dest_top):
|
||||
source = template.read()
|
||||
f.write(source.replace('<!-- REPLACE_THIS_WITH_GRAPH -->',
|
||||
json.dumps(graph)))
|
||||
|
||||
logging.info("Generated {} files".format(files_generated))
|
||||
|
||||
# Generate index files
|
||||
t0 = datetime.utcnow()
|
||||
logging.info("Generating text index...")
|
||||
|
||||
xapian_db = os.path.join(dest_top, "xapian")
|
||||
if os.path.exists(xapian_db):
|
||||
shutil.rmtree(xapian_db)
|
||||
db = xapian.WritableDatabase(xapian_db, xapian.DB_CREATE)
|
||||
|
||||
indexer = xapian.TermGenerator()
|
||||
stemmer = xapian.Stem("english")
|
||||
indexer.set_stemmer(stemmer)
|
||||
|
||||
docid_to_node = {}
|
||||
|
||||
for doc in docs:
|
||||
relpath = os.path.relpath(doc.path, src_top)
|
||||
|
||||
if not relpath.startswith("public/"):
|
||||
# print("Skip:", relpath)
|
||||
continue
|
||||
|
||||
changed = False
|
||||
for hl in doc.getAllHeadlines():
|
||||
xapian_doc = xapian.Document()
|
||||
content = "\n".join(doc.dump_headline(hl))
|
||||
|
||||
xapian_doc.set_data(content)
|
||||
indexer.set_document(xapian_doc)
|
||||
indexer.index_text(content)
|
||||
|
||||
doc_id = db.add_document(xapian_doc)
|
||||
docid_to_node[doc_id] = { 'hl': hl.id, 'doc': doc.path }
|
||||
|
||||
docid_map_path = os.path.join(xapian_db, "docid_map.json")
|
||||
with open(docid_map_path, 'wt') as f:
|
||||
json.dump(docid_to_node, f)
|
||||
|
||||
logging.info("Text index generated in {}".format(datetime.utcnow() - t0))
|
||||
|
||||
|
||||
def print_tree(tree, indentation=0):
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user