Skip to content
Snippets Groups Projects
indexing.py 1.43 KiB
Newer Older
  • Learn to ignore specific revisions
  • import os
    
    from whoosh.fields import *
    
    srebers's avatar
    srebers committed
    from whoosh.index import create_in
    
    from whoosh import index
    
    # Here, the structure of index entries is defined. You can add more fields with metadata, computed values etc.,
    # and use them for searching and ranking.
    # We only use a title and a text.
    #
    # The "stored" attribute is used for all parts that we want to be able to fully retrieve from the index
    
    ix_schema = Schema(title=TEXT(stored=True), url=ID(stored=True, unique=True), content=TEXT)
    
    # Create an index if not created or open an existing, then return it
    def get_index():
        if not os.path.exists("indexdir"):
            os.makedirs("indexdir")
            return create_in("indexdir", schema=ix_schema)
        return index.open_dir("indexdir")
    
    
    
    # # now let's add some texts (=documents)
    # writer.add_document(title=u"First document", content=u"This is the first document we've added!")
    # writer.add_document(title=u"Second document", content=u"The second one is even more interesting!")
    # writer.add_document(title=u"Songtext", content=u"Music was my first love and it will be the last")
    #
    # # write the index to the disk
    # writer.commit()
    
    # # Retrieving data
    # from whoosh.qparser import QueryParser
    #
    # with ix.searcher() as searcher:
    #     # find entries with the words 'first' AND 'last'
    #     query = QueryParser("content", ix.schema).parse("first last")
    #     results = searcher.search(query)
    #
    #     # print all results
    #     for r in results:
    #         print(r)