Commit | Line | Data |
---|---|---|
c638d827 CR |
1 | #$Id: indexer_common.py,v 1.11 2008-09-11 19:41:07 schlatterbeck Exp $ |
2 | import re | |
3 | # Python 2.3 ... 2.6 compatibility: | |
4 | from roundup.anypy.sets_ import set | |
5 | ||
6 | from roundup import hyperdb | |
7 | ||
8 | STOPWORDS = [ | |
9 | "A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY", | |
10 | "FOR", "IF", "IN", "INTO", "IS", "IT", | |
11 | "NO", "NOT", "OF", "ON", "OR", "SUCH", | |
12 | "THAT", "THE", "THEIR", "THEN", "THERE", "THESE", | |
13 | "THEY", "THIS", "TO", "WAS", "WILL", "WITH" | |
14 | ] | |
15 | ||
16 | def _isLink(propclass): | |
17 | return (isinstance(propclass, hyperdb.Link) or | |
18 | isinstance(propclass, hyperdb.Multilink)) | |
19 | ||
20 | class Indexer: | |
21 | def __init__(self, db): | |
22 | self.stopwords = set(STOPWORDS) | |
23 | for word in db.config[('main', 'indexer_stopwords')]: | |
24 | self.stopwords.add(word) | |
25 | # Do not index anything longer than 25 characters since that'll be | |
26 | # gibberish (encoded text or somesuch) or shorter than 2 characters | |
27 | self.minlength = 2 | |
28 | self.maxlength = 25 | |
29 | ||
30 | def is_stopword(self, word): | |
31 | return word in self.stopwords | |
32 | ||
33 | def getHits(self, search_terms, klass): | |
34 | return self.find(search_terms) | |
35 | ||
36 | def search(self, search_terms, klass, ignore={}): | |
37 | """Display search results looking for [search, terms] associated | |
38 | with the hyperdb Class "klass". Ignore hits on {class: property}. | |
39 | """ | |
40 | # do the index lookup | |
41 | hits = self.getHits(search_terms, klass) | |
42 | if not hits: | |
43 | return {} | |
44 | ||
45 | designator_propname = {} | |
46 | for nm, propclass in klass.getprops().iteritems(): | |
47 | if _isLink(propclass): | |
48 | designator_propname.setdefault(propclass.classname, | |
49 | []).append(nm) | |
50 | ||
51 | # build a dictionary of nodes and their associated messages | |
52 | # and files | |
53 | nodeids = {} # this is the answer | |
54 | propspec = {} # used to do the klass.find | |
55 | for l in designator_propname.itervalues(): | |
56 | for propname in l: | |
57 | propspec[propname] = {} # used as a set (value doesn't matter) | |
58 | ||
59 | # don't unpack hits entries as sqlite3's Row can't be unpacked :( | |
60 | for entry in hits: | |
61 | # skip this result if we don't care about this class/property | |
62 | classname = entry[0] | |
63 | property = entry[2] | |
64 | if (classname, property) in ignore: | |
65 | continue | |
66 | ||
67 | # if it's a property on klass, it's easy | |
68 | # (make sure the nodeid is str() not unicode() as returned by some | |
69 | # backends as that can cause problems down the track) | |
70 | nodeid = str(entry[1]) | |
71 | if classname == klass.classname: | |
72 | if nodeid not in nodeids: | |
73 | nodeids[nodeid] = {} | |
74 | continue | |
75 | ||
76 | # make sure the class is a linked one, otherwise ignore | |
77 | if classname not in designator_propname: | |
78 | continue | |
79 | ||
80 | # it's a linked class - set up to do the klass.find | |
81 | for linkprop in designator_propname[classname]: | |
82 | propspec[linkprop][nodeid] = 1 | |
83 | ||
84 | # retain only the meaningful entries | |
85 | for propname, idset in list(propspec.items()): | |
86 | if not idset: | |
87 | del propspec[propname] | |
88 | ||
89 | # klass.find tells me the klass nodeids the linked nodes relate to | |
90 | propdefs = klass.getprops() | |
91 | for resid in klass.find(**propspec): | |
92 | resid = str(resid) | |
93 | if resid in nodeids: | |
94 | continue # we ignore duplicate resids | |
95 | nodeids[resid] = {} | |
96 | node_dict = nodeids[resid] | |
97 | # now figure out where it came from | |
98 | for linkprop in propspec: | |
99 | v = klass.get(resid, linkprop) | |
100 | # the link might be a Link so deal with a single result or None | |
101 | if isinstance(propdefs[linkprop], hyperdb.Link): | |
102 | if v is None: continue | |
103 | v = [v] | |
104 | for nodeid in v: | |
105 | if nodeid in propspec[linkprop]: | |
106 | # OK, this node[propname] has a winner | |
107 | if linkprop not in node_dict: | |
108 | node_dict[linkprop] = [nodeid] | |
109 | else: | |
110 | node_dict[linkprop].append(nodeid) | |
111 | return nodeids | |
112 |