Commit | Line | Data |
---|---|---|
c638d827 CR |
1 | #$Id: back_tsearch2.py,v 1.9 2005-01-08 16:16:59 jlgijsbers Exp $ |
2 | ||
3 | # Note: this backend is EXPERIMENTAL. Do not use if you value your data. | |
4 | import re | |
5 | ||
6 | import psycopg | |
7 | ||
8 | from roundup import hyperdb | |
9 | from roundup.support import ensureParentsExist | |
10 | from roundup.backends import back_postgresql, tsearch2_setup, indexer_rdbms | |
11 | from roundup.backends.back_postgresql import db_create, db_nuke, db_command | |
12 | from roundup.backends.back_postgresql import pg_command, db_exists, Class, IssueClass, FileClass | |
13 | from roundup.backends.indexer_common import _isLink, Indexer | |
14 | ||
15 | # XXX: Should probably be on the Class class. | |
16 | def _indexedProps(spec): | |
17 | """Get a list of properties to be indexed on 'spec'.""" | |
18 | return [prop for prop, propclass in spec.getprops().items() | |
19 | if isinstance(propclass, hyperdb.String) and propclass.indexme] | |
20 | ||
21 | def _getQueryDict(spec): | |
22 | """Get a convenience dictionary for creating tsearch2 indexes.""" | |
23 | query_dict = {'classname': spec.classname, | |
24 | 'indexedColumns': ['_' + prop for prop in _indexedProps(spec)]} | |
25 | query_dict['tablename'] = "_%(classname)s" % query_dict | |
26 | query_dict['triggername'] = "%(tablename)s_tsvectorupdate" % query_dict | |
27 | return query_dict | |
28 | ||
29 | class Database(back_postgresql.Database): | |
30 | def __init__(self, config, journaltag=None): | |
31 | back_postgresql.Database.__init__(self, config, journaltag) | |
32 | self.indexer = Indexer(self) | |
33 | ||
34 | def create_version_2_tables(self): | |
35 | back_postgresql.Database.create_version_2_tables(self) | |
36 | tsearch2_setup.setup(self.cursor) | |
37 | ||
38 | def create_class_table_indexes(self, spec): | |
39 | back_postgresql.Database.create_class_table_indexes(self, spec) | |
40 | self.cursor.execute("""CREATE INDEX _%(classname)s_idxFTI_idx | |
41 | ON %(tablename)s USING gist(idxFTI);""" % | |
42 | _getQueryDict(spec)) | |
43 | ||
44 | self.create_tsearch2_trigger(spec) | |
45 | ||
46 | def create_tsearch2_trigger(self, spec): | |
47 | d = _getQueryDict(spec) | |
48 | if d['indexedColumns']: | |
49 | ||
50 | d['joined'] = " || ' ' ||".join(d['indexedColumns']) | |
51 | query = """UPDATE %(tablename)s | |
52 | SET idxFTI = to_tsvector('default', %(joined)s)""" % d | |
53 | self.cursor.execute(query) | |
54 | ||
55 | d['joined'] = ", ".join(d['indexedColumns']) | |
56 | query = """CREATE TRIGGER %(triggername)s | |
57 | BEFORE UPDATE OR INSERT ON %(tablename)s | |
58 | FOR EACH ROW EXECUTE PROCEDURE | |
59 | tsearch2(idxFTI, %(joined)s);""" % d | |
60 | self.cursor.execute(query) | |
61 | ||
62 | def drop_tsearch2_trigger(self, spec): | |
63 | # Check whether the trigger exists before trying to drop it. | |
64 | query_dict = _getQueryDict(spec) | |
65 | self.sql("""SELECT tgname FROM pg_catalog.pg_trigger | |
66 | WHERE tgname = '%(triggername)s'""" % query_dict) | |
67 | if self.cursor.fetchall(): | |
68 | self.sql("""DROP TRIGGER %(triggername)s ON %(tablename)s""" % | |
69 | query_dict) | |
70 | ||
71 | def update_class(self, spec, old_spec, force=0): | |
72 | result = back_postgresql.Database.update_class(self, spec, old_spec, force) | |
73 | ||
74 | # Drop trigger... | |
75 | self.drop_tsearch2_trigger(spec) | |
76 | ||
77 | # and recreate if necessary. | |
78 | self.create_tsearch2_trigger(spec) | |
79 | ||
80 | return result | |
81 | ||
82 | def determine_all_columns(self, spec): | |
83 | cols, mls = back_postgresql.Database.determine_all_columns(self, spec) | |
84 | cols.append(('idxFTI', 'tsvector')) | |
85 | return cols, mls | |
86 | ||
87 | class Indexer(Indexer): | |
88 | def __init__(self, db): | |
89 | self.db = db | |
90 | ||
91 | # This indexer never needs to reindex. | |
92 | def should_reindex(self): | |
93 | return 0 | |
94 | ||
95 | def getHits(self, search_terms, klass): | |
96 | return self.find(search_terms, klass) | |
97 | ||
98 | def find(self, search_terms, klass): | |
99 | if not search_terms: | |
100 | return None | |
101 | ||
102 | hits = self.tsearchQuery(klass.classname, search_terms) | |
103 | designator_propname = {} | |
104 | ||
105 | for nm, propclass in klass.getprops().items(): | |
106 | if _isLink(propclass): | |
107 | hits.extend(self.tsearchQuery(propclass.classname, search_terms)) | |
108 | ||
109 | return hits | |
110 | ||
111 | def tsearchQuery(self, classname, search_terms): | |
112 | query = """SELECT id FROM _%(classname)s | |
113 | WHERE idxFTI @@ to_tsquery('default', '%(terms)s')""" | |
114 | ||
115 | query = query % {'classname': classname, | |
116 | 'terms': ' & '.join(search_terms)} | |
117 | self.db.cursor.execute(query) | |
118 | klass = self.db.getclass(classname) | |
119 | nodeids = [str(row[0]) for row in self.db.cursor.fetchall()] | |
120 | ||
121 | # filter out files without text/plain mime type | |
122 | # XXX: files without text/plain shouldn't be indexed at all, we | |
123 | # should take care of this in the trigger | |
124 | if klass.getprops().has_key('type'): | |
125 | nodeids = [nodeid for nodeid in nodeids | |
126 | if klass.get(nodeid, 'type') == 'text/plain'] | |
127 | ||
128 | # XXX: We haven't implemented property-level search, so I'm just faking | |
129 | # it here with a property named 'XXX'. We still need to fix the other | |
130 | # backends and indexer_common.Indexer.search to only want to unpack two | |
131 | # values. | |
132 | return [(classname, nodeid, 'XXX') for nodeid in nodeids] | |
133 | ||
134 | # These only exist to satisfy the interface that's expected from indexers. | |
135 | def force_reindex(self): | |
136 | pass | |
137 | ||
138 | def add_text(self, identifier, text, mime_type=None): | |
139 | pass | |
140 | ||
141 | def close(self): | |
142 | pass | |
143 | ||
144 | class FileClass(hyperdb.FileClass, Class): | |
145 | '''This class defines a large chunk of data. To support this, it has a | |
146 | mandatory String property "content" which is typically saved off | |
147 | externally to the hyperdb. | |
148 | ||
149 | However, this implementation just stores it in the hyperdb. | |
150 | ''' | |
151 | def __init__(self, db, classname, **properties): | |
152 | '''The newly-created class automatically includes the "content" property., | |
153 | ''' | |
154 | properties['content'] = hyperdb.String(indexme='yes') | |
155 | Class.__init__(self, db, classname, **properties) | |
156 | ||
157 | default_mime_type = 'text/plain' | |
158 | def create(self, **propvalues): | |
159 | # figure the mime type | |
160 | if self.getprops().has_key('type') and not propvalues.get('type'): | |
161 | propvalues['type'] = self.default_mime_type | |
162 | return Class.create(self, **propvalues) | |
163 | ||
164 | def export_files(self, dirname, nodeid): | |
165 | dest = self.exportFilename(dirname, nodeid) | |
166 | ensureParentsExist(dest) | |
167 | fp = open(dest, "w") | |
168 | fp.write(self.get(nodeid, "content", default='')) | |
169 | fp.close() | |
170 | ||
171 | def import_files(self, dirname, nodeid): | |
172 | source = self.exportFilename(dirname, nodeid) | |
173 | ||
174 | fp = open(source, "r") | |
175 | # Use Database.setnode instead of self.set or self.set_inner here, as | |
176 | # Database.setnode doesn't update the "activity" or "actor" properties. | |
177 | self.db.setnode(self.classname, nodeid, values={'content': fp.read()}) | |
178 | fp.close() |