1 #$Id: tsearch2_setup.py,v 1.2 2005-01-08 11:25:23 jlgijsbers Exp $
3 # All the SQL in this module is taken from the tsearch2 module in the contrib
4 # tree of PostgreSQL 7.4.6. PostgreSQL, and this code, has the following
7 # PostgreSQL Data Base Management System
8 # (formerly known as Postgres, then as Postgres95).
10 # Portions Copyright (c) 1996-2003, The PostgreSQL Global Development Group
12 # Portions Copyright (c) 1994, The Regents of the University of California
14 # Permission to use, copy, modify, and distribute this software and its
15 # documentation for any purpose, without fee, and without a written agreement
16 # is hereby granted, provided that the above copyright notice and this
17 # paragraph and the following two paragraphs appear in all copies.
19 # IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
20 # DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
21 # LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
22 # DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE
23 # POSSIBILITY OF SUCH DAMAGE.
25 # THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
26 # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27 # AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
28 # ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO
29 # PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
31 tsearch_sql
= """ -- Adjust this setting to control where the objects get CREATEd.
32 SET search_path = public;
35 CREATE TABLE pg_ts_dict (
36 dict_name text not null primary key,
39 dict_lexize oid not null,
44 CREATE FUNCTION lexize(oid, text)
50 CREATE FUNCTION lexize(text, text)
52 as '$libdir/tsearch2', 'lexize_byname'
56 CREATE FUNCTION lexize(text)
58 as '$libdir/tsearch2', 'lexize_bycurrent'
62 CREATE FUNCTION set_curdict(int)
68 CREATE FUNCTION set_curdict(text)
70 as '$libdir/tsearch2', 'set_curdict_byname'
74 --built-in dictionaries
75 CREATE FUNCTION dex_init(text)
80 CREATE FUNCTION dex_lexize(internal,internal,int4)
86 insert into pg_ts_dict select
88 (select oid from pg_proc where proname='dex_init'),
90 (select oid from pg_proc where proname='dex_lexize'),
91 'Simple example of dictionary.'
94 CREATE FUNCTION snb_en_init(text)
99 CREATE FUNCTION snb_lexize(internal,internal,int4)
101 as '$libdir/tsearch2'
105 insert into pg_ts_dict select
107 (select oid from pg_proc where proname='snb_en_init'),
108 '/usr/share/postgresql/contrib/english.stop',
109 (select oid from pg_proc where proname='snb_lexize'),
110 'English Stemmer. Snowball.'
113 CREATE FUNCTION snb_ru_init(text)
115 as '$libdir/tsearch2'
118 insert into pg_ts_dict select
120 (select oid from pg_proc where proname='snb_ru_init'),
121 '/usr/share/postgresql/contrib/russian.stop',
122 (select oid from pg_proc where proname='snb_lexize'),
123 'Russian Stemmer. Snowball.'
126 CREATE FUNCTION spell_init(text)
128 as '$libdir/tsearch2'
131 CREATE FUNCTION spell_lexize(internal,internal,int4)
133 as '$libdir/tsearch2'
137 insert into pg_ts_dict select
139 (select oid from pg_proc where proname='spell_init'),
141 (select oid from pg_proc where proname='spell_lexize'),
142 'ISpell interface. Must have .dict and .aff files'
145 CREATE FUNCTION syn_init(text)
147 as '$libdir/tsearch2'
150 CREATE FUNCTION syn_lexize(internal,internal,int4)
152 as '$libdir/tsearch2'
156 insert into pg_ts_dict select
158 (select oid from pg_proc where proname='syn_init'),
160 (select oid from pg_proc where proname='syn_lexize'),
161 'Example of synonym dictionary'
165 CREATE TABLE pg_ts_parser (
166 prs_name text not null primary key,
167 prs_start oid not null,
168 prs_nexttoken oid not null,
169 prs_end oid not null,
170 prs_headline oid not null,
171 prs_lextype oid not null,
175 --sql-level interface
176 CREATE TYPE tokentype
177 as (tokid int4, alias text, descr text);
179 CREATE FUNCTION token_type(int4)
180 returns setof tokentype
181 as '$libdir/tsearch2'
185 CREATE FUNCTION token_type(text)
186 returns setof tokentype
187 as '$libdir/tsearch2', 'token_type_byname'
191 CREATE FUNCTION token_type()
192 returns setof tokentype
193 as '$libdir/tsearch2', 'token_type_current'
197 CREATE FUNCTION set_curprs(int)
199 as '$libdir/tsearch2'
203 CREATE FUNCTION set_curprs(text)
205 as '$libdir/tsearch2', 'set_curprs_byname'
210 as (tokid int4, token text);
212 CREATE FUNCTION parse(oid,text)
213 returns setof tokenout
214 as '$libdir/tsearch2'
218 CREATE FUNCTION parse(text,text)
219 returns setof tokenout
220 as '$libdir/tsearch2', 'parse_byname'
224 CREATE FUNCTION parse(text)
225 returns setof tokenout
226 as '$libdir/tsearch2', 'parse_current'
231 CREATE FUNCTION prsd_start(internal,int4)
233 as '$libdir/tsearch2'
236 CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
238 as '$libdir/tsearch2'
241 CREATE FUNCTION prsd_end(internal)
243 as '$libdir/tsearch2'
246 CREATE FUNCTION prsd_lextype(internal)
248 as '$libdir/tsearch2'
251 CREATE FUNCTION prsd_headline(internal,internal,internal)
253 as '$libdir/tsearch2'
256 insert into pg_ts_parser select
258 (select oid from pg_proc where proname='prsd_start'),
259 (select oid from pg_proc where proname='prsd_getlexeme'),
260 (select oid from pg_proc where proname='prsd_end'),
261 (select oid from pg_proc where proname='prsd_headline'),
262 (select oid from pg_proc where proname='prsd_lextype'),
263 'Parser from OpenFTS v0.34'
268 CREATE TABLE pg_ts_cfg (
269 ts_name text not null primary key,
270 prs_name text not null,
274 CREATE TABLE pg_ts_cfgmap (
275 ts_name text not null,
276 tok_alias text not null,
278 primary key (ts_name,tok_alias)
281 CREATE FUNCTION set_curcfg(int)
283 as '$libdir/tsearch2'
287 CREATE FUNCTION set_curcfg(text)
289 as '$libdir/tsearch2', 'set_curcfg_byname'
293 CREATE FUNCTION show_curcfg()
295 as '$libdir/tsearch2'
299 insert into pg_ts_cfg values ('default', 'default','C');
300 insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
301 insert into pg_ts_cfg values ('simple', 'default');
303 insert into pg_ts_cfgmap values ('default', 'lword', '{en_stem}');
304 insert into pg_ts_cfgmap values ('default', 'nlword', '{simple}');
305 insert into pg_ts_cfgmap values ('default', 'word', '{simple}');
306 insert into pg_ts_cfgmap values ('default', 'email', '{simple}');
307 insert into pg_ts_cfgmap values ('default', 'url', '{simple}');
308 insert into pg_ts_cfgmap values ('default', 'host', '{simple}');
309 insert into pg_ts_cfgmap values ('default', 'sfloat', '{simple}');
310 insert into pg_ts_cfgmap values ('default', 'version', '{simple}');
311 insert into pg_ts_cfgmap values ('default', 'part_hword', '{simple}');
312 insert into pg_ts_cfgmap values ('default', 'nlpart_hword', '{simple}');
313 insert into pg_ts_cfgmap values ('default', 'lpart_hword', '{en_stem}');
314 insert into pg_ts_cfgmap values ('default', 'hword', '{simple}');
315 insert into pg_ts_cfgmap values ('default', 'lhword', '{en_stem}');
316 insert into pg_ts_cfgmap values ('default', 'nlhword', '{simple}');
317 insert into pg_ts_cfgmap values ('default', 'uri', '{simple}');
318 insert into pg_ts_cfgmap values ('default', 'file', '{simple}');
319 insert into pg_ts_cfgmap values ('default', 'float', '{simple}');
320 insert into pg_ts_cfgmap values ('default', 'int', '{simple}');
321 insert into pg_ts_cfgmap values ('default', 'uint', '{simple}');
322 insert into pg_ts_cfgmap values ('default_russian', 'lword', '{en_stem}');
323 insert into pg_ts_cfgmap values ('default_russian', 'nlword', '{ru_stem}');
324 insert into pg_ts_cfgmap values ('default_russian', 'word', '{ru_stem}');
325 insert into pg_ts_cfgmap values ('default_russian', 'email', '{simple}');
326 insert into pg_ts_cfgmap values ('default_russian', 'url', '{simple}');
327 insert into pg_ts_cfgmap values ('default_russian', 'host', '{simple}');
328 insert into pg_ts_cfgmap values ('default_russian', 'sfloat', '{simple}');
329 insert into pg_ts_cfgmap values ('default_russian', 'version', '{simple}');
330 insert into pg_ts_cfgmap values ('default_russian', 'part_hword', '{simple}');
331 insert into pg_ts_cfgmap values ('default_russian', 'nlpart_hword', '{ru_stem}');
332 insert into pg_ts_cfgmap values ('default_russian', 'lpart_hword', '{en_stem}');
333 insert into pg_ts_cfgmap values ('default_russian', 'hword', '{ru_stem}');
334 insert into pg_ts_cfgmap values ('default_russian', 'lhword', '{en_stem}');
335 insert into pg_ts_cfgmap values ('default_russian', 'nlhword', '{ru_stem}');
336 insert into pg_ts_cfgmap values ('default_russian', 'uri', '{simple}');
337 insert into pg_ts_cfgmap values ('default_russian', 'file', '{simple}');
338 insert into pg_ts_cfgmap values ('default_russian', 'float', '{simple}');
339 insert into pg_ts_cfgmap values ('default_russian', 'int', '{simple}');
340 insert into pg_ts_cfgmap values ('default_russian', 'uint', '{simple}');
341 insert into pg_ts_cfgmap values ('simple', 'lword', '{simple}');
342 insert into pg_ts_cfgmap values ('simple', 'nlword', '{simple}');
343 insert into pg_ts_cfgmap values ('simple', 'word', '{simple}');
344 insert into pg_ts_cfgmap values ('simple', 'email', '{simple}');
345 insert into pg_ts_cfgmap values ('simple', 'url', '{simple}');
346 insert into pg_ts_cfgmap values ('simple', 'host', '{simple}');
347 insert into pg_ts_cfgmap values ('simple', 'sfloat', '{simple}');
348 insert into pg_ts_cfgmap values ('simple', 'version', '{simple}');
349 insert into pg_ts_cfgmap values ('simple', 'part_hword', '{simple}');
350 insert into pg_ts_cfgmap values ('simple', 'nlpart_hword', '{simple}');
351 insert into pg_ts_cfgmap values ('simple', 'lpart_hword', '{simple}');
352 insert into pg_ts_cfgmap values ('simple', 'hword', '{simple}');
353 insert into pg_ts_cfgmap values ('simple', 'lhword', '{simple}');
354 insert into pg_ts_cfgmap values ('simple', 'nlhword', '{simple}');
355 insert into pg_ts_cfgmap values ('simple', 'uri', '{simple}');
356 insert into pg_ts_cfgmap values ('simple', 'file', '{simple}');
357 insert into pg_ts_cfgmap values ('simple', 'float', '{simple}');
358 insert into pg_ts_cfgmap values ('simple', 'int', '{simple}');
359 insert into pg_ts_cfgmap values ('simple', 'uint', '{simple}');
362 CREATE FUNCTION tsvector_in(cstring)
364 AS '$libdir/tsearch2'
365 LANGUAGE 'C' with (isstrict);
367 CREATE FUNCTION tsvector_out(tsvector)
369 AS '$libdir/tsearch2'
370 LANGUAGE 'C' with (isstrict);
372 CREATE TYPE tsvector (
375 OUTPUT = tsvector_out,
379 CREATE FUNCTION length(tsvector)
381 AS '$libdir/tsearch2', 'tsvector_length'
382 LANGUAGE 'C' with (isstrict,iscachable);
384 CREATE FUNCTION to_tsvector(oid, text)
386 AS '$libdir/tsearch2'
387 LANGUAGE 'C' with (isstrict,iscachable);
389 CREATE FUNCTION to_tsvector(text, text)
391 AS '$libdir/tsearch2', 'to_tsvector_name'
392 LANGUAGE 'C' with (isstrict,iscachable);
394 CREATE FUNCTION to_tsvector(text)
396 AS '$libdir/tsearch2', 'to_tsvector_current'
397 LANGUAGE 'C' with (isstrict,iscachable);
399 CREATE FUNCTION strip(tsvector)
401 AS '$libdir/tsearch2'
402 LANGUAGE 'C' with (isstrict,iscachable);
404 CREATE FUNCTION setweight(tsvector,"char")
406 AS '$libdir/tsearch2'
407 LANGUAGE 'C' with (isstrict,iscachable);
409 CREATE FUNCTION concat(tsvector,tsvector)
411 AS '$libdir/tsearch2'
412 LANGUAGE 'C' with (isstrict,iscachable);
421 CREATE FUNCTION tsquery_in(cstring)
423 AS '$libdir/tsearch2'
424 LANGUAGE 'C' with (isstrict);
426 CREATE FUNCTION tsquery_out(tsquery)
428 AS '$libdir/tsearch2'
429 LANGUAGE 'C' with (isstrict);
431 CREATE TYPE tsquery (
437 CREATE FUNCTION querytree(tsquery)
439 AS '$libdir/tsearch2', 'tsquerytree'
440 LANGUAGE 'C' with (isstrict);
442 CREATE FUNCTION to_tsquery(oid, text)
444 AS '$libdir/tsearch2'
445 LANGUAGE 'c' with (isstrict,iscachable);
447 CREATE FUNCTION to_tsquery(text, text)
449 AS '$libdir/tsearch2','to_tsquery_name'
450 LANGUAGE 'c' with (isstrict,iscachable);
452 CREATE FUNCTION to_tsquery(text)
454 AS '$libdir/tsearch2','to_tsquery_current'
455 LANGUAGE 'c' with (isstrict,iscachable);
458 CREATE FUNCTION exectsq(tsvector, tsquery)
460 AS '$libdir/tsearch2'
461 LANGUAGE 'C' with (isstrict, iscachable);
463 COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
465 CREATE FUNCTION rexectsq(tsquery, tsvector)
467 AS '$libdir/tsearch2'
468 LANGUAGE 'C' with (isstrict, iscachable);
470 COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
483 PROCEDURE = rexectsq,
490 CREATE FUNCTION tsearch2()
492 AS '$libdir/tsearch2'
496 CREATE FUNCTION rank(float4[], tsvector, tsquery)
498 AS '$libdir/tsearch2'
499 LANGUAGE 'C' WITH (isstrict, iscachable);
501 CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
503 AS '$libdir/tsearch2'
504 LANGUAGE 'C' WITH (isstrict, iscachable);
506 CREATE FUNCTION rank(tsvector, tsquery)
508 AS '$libdir/tsearch2', 'rank_def'
509 LANGUAGE 'C' WITH (isstrict, iscachable);
511 CREATE FUNCTION rank(tsvector, tsquery, int4)
513 AS '$libdir/tsearch2', 'rank_def'
514 LANGUAGE 'C' WITH (isstrict, iscachable);
516 CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
518 AS '$libdir/tsearch2'
519 LANGUAGE 'C' WITH (isstrict, iscachable);
521 CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
523 AS '$libdir/tsearch2'
524 LANGUAGE 'C' WITH (isstrict, iscachable);
526 CREATE FUNCTION rank_cd(tsvector, tsquery)
528 AS '$libdir/tsearch2', 'rank_cd_def'
529 LANGUAGE 'C' WITH (isstrict, iscachable);
531 CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
533 AS '$libdir/tsearch2', 'rank_cd_def'
534 LANGUAGE 'C' WITH (isstrict, iscachable);
536 CREATE FUNCTION headline(oid, text, tsquery, text)
538 AS '$libdir/tsearch2', 'headline'
539 LANGUAGE 'C' WITH (isstrict, iscachable);
541 CREATE FUNCTION headline(oid, text, tsquery)
543 AS '$libdir/tsearch2', 'headline'
544 LANGUAGE 'C' WITH (isstrict, iscachable);
546 CREATE FUNCTION headline(text, text, tsquery, text)
548 AS '$libdir/tsearch2', 'headline_byname'
549 LANGUAGE 'C' WITH (isstrict, iscachable);
551 CREATE FUNCTION headline(text, text, tsquery)
553 AS '$libdir/tsearch2', 'headline_byname'
554 LANGUAGE 'C' WITH (isstrict, iscachable);
556 CREATE FUNCTION headline(text, tsquery, text)
558 AS '$libdir/tsearch2', 'headline_current'
559 LANGUAGE 'C' WITH (isstrict, iscachable);
561 CREATE FUNCTION headline(text, tsquery)
563 AS '$libdir/tsearch2', 'headline_current'
564 LANGUAGE 'C' WITH (isstrict, iscachable);
568 CREATE FUNCTION gtsvector_in(cstring)
570 AS '$libdir/tsearch2'
571 LANGUAGE 'C' with (isstrict);
573 CREATE FUNCTION gtsvector_out(gtsvector)
575 AS '$libdir/tsearch2'
576 LANGUAGE 'C' with (isstrict);
578 CREATE TYPE gtsvector (
580 INPUT = gtsvector_in,
581 OUTPUT = gtsvector_out
585 CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
587 AS '$libdir/tsearch2'
590 CREATE FUNCTION gtsvector_compress(internal)
592 AS '$libdir/tsearch2'
595 CREATE FUNCTION gtsvector_decompress(internal)
597 AS '$libdir/tsearch2'
600 CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
602 AS '$libdir/tsearch2'
603 LANGUAGE 'C' with (isstrict);
605 CREATE FUNCTION gtsvector_picksplit(internal, internal)
607 AS '$libdir/tsearch2'
610 CREATE FUNCTION gtsvector_union(bytea, internal)
612 AS '$libdir/tsearch2'
615 CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
617 AS '$libdir/tsearch2'
620 -- CREATE the OPERATOR class
621 CREATE OPERATOR CLASS gist_tsvector_ops
622 DEFAULT FOR TYPE tsvector USING gist
624 OPERATOR 1 @@ (tsvector, tsquery) RECHECK ,
625 FUNCTION 1 gtsvector_consistent (gtsvector, internal, int4),
626 FUNCTION 2 gtsvector_union (bytea, internal),
627 FUNCTION 3 gtsvector_compress (internal),
628 FUNCTION 4 gtsvector_decompress (internal),
629 FUNCTION 5 gtsvector_penalty (internal, internal, internal),
630 FUNCTION 6 gtsvector_picksplit (internal, internal),
631 FUNCTION 7 gtsvector_same (gtsvector, gtsvector, internal),
637 as (word text, ndoc int4, nentry int4);
639 --CREATE FUNCTION tsstat_in(cstring)
641 --AS '$libdir/tsearch2'
642 --LANGUAGE 'C' with (isstrict);
644 --CREATE FUNCTION tsstat_out(tsstat)
646 --AS '$libdir/tsearch2'
647 --LANGUAGE 'C' with (isstrict);
649 --CREATE TYPE tsstat (
650 -- INTERNALLENGTH = -1,
651 -- INPUT = tsstat_in,
652 -- OUTPUT = tsstat_out,
656 --CREATE FUNCTION ts_accum(tsstat,tsvector)
658 --AS '$libdir/tsearch2'
659 --LANGUAGE 'C' with (isstrict);
661 --CREATE FUNCTION ts_accum_finish(tsstat)
662 -- returns setof statinfo
663 -- as '$libdir/tsearch2'
667 --CREATE AGGREGATE stat (
668 -- BASETYPE=tsvector,
671 -- FINALFUNC = ts_accum_finish,
675 CREATE FUNCTION stat(text)
676 returns setof statinfo
677 as '$libdir/tsearch2', 'ts_stat'
681 --reset - just for debuging
682 CREATE FUNCTION reset_tsearch()
684 as '$libdir/tsearch2'
688 --get cover (debug for rank_cd)
689 CREATE FUNCTION get_covers(tsvector,tsquery)
691 as '$libdir/tsearch2'
696 create type tsdebug as (
705 create function _get_parser_from_curcfg()
707 ' select prs_name from pg_ts_cfg where oid = show_curcfg() '
708 language 'SQL' with(isstrict,iscachable);
710 create function ts_debug(text)
711 returns setof tsdebug as '
715 t.descr as description,
718 strip(to_tsvector(p.token)) as tsvector
720 parse( _get_parser_from_curcfg(), $1 ) as p,
726 t.alias = m.tok_alias and
727 m.ts_name=c.ts_name and
729 ' language 'SQL' with(isstrict);
733 sql
= '\n'.join([line
for line
in tsearch_sql
.split('\n')
734 if not line
.startswith('--')])
735 for query
in sql
.split(';'):
737 cursor
.execute(query
)