Newer
Older
import os
from whoosh.fields import *
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from whoosh import index
# Here, the structure of index entries is defined. You can add more fields with metadata, computed values etc.,
# and use them for searching and ranking.
# We only use a title and a text.
#
# The "stored" attribute is used for all parts that we want to be able to fully retrieve from the index
ix_schema = Schema(title=TEXT(stored=True), url=ID(stored=True, unique=True), content=TEXT)
# Create an index if not created or open an existing, then return it
def get_index():
if not os.path.exists("indexdir"):
os.makedirs("indexdir")
return create_in("indexdir", schema=ix_schema)
return index.open_dir("indexdir")
# # now let's add some texts (=documents)
# writer.add_document(title=u"First document", content=u"This is the first document we've added!")
# writer.add_document(title=u"Second document", content=u"The second one is even more interesting!")
# writer.add_document(title=u"Songtext", content=u"Music was my first love and it will be the last")
#
# # write the index to the disk
# writer.commit()
# # Retrieving data
# from whoosh.qparser import QueryParser
#
# with ix.searcher() as searcher:
# # find entries with the words 'first' AND 'last'
# query = QueryParser("content", ix.schema).parse("first last")
# results = searcher.search(query)
#
# # print all results
# for r in results:
# print(r)