Commit | Line | Data |
---|---|---|
c638d827 CR |
1 | #! /usr/bin/env python |
2 | # -*- coding: iso-8859-1 -*- | |
3 | # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> | |
4 | # Plural forms support added by alexander smishlajev <alex@tycobka.lv> | |
5 | ||
6 | """Generate binary message catalog from textual translation description. | |
7 | ||
8 | This program converts a textual Uniforum-style message catalog (.po file) into | |
9 | a binary GNU catalog (.mo file). This is essentially the same function as the | |
10 | GNU msgfmt program, however, it is a simpler implementation. | |
11 | ||
12 | Usage: msgfmt.py [OPTIONS] filename.po | |
13 | ||
14 | Options: | |
15 | -o file | |
16 | --output-file=file | |
17 | Specify the output file to write to. If omitted, output will go to a | |
18 | file named filename.mo (based off the input file name). | |
19 | ||
20 | -h | |
21 | --help | |
22 | Print this message and exit. | |
23 | ||
24 | -V | |
25 | --version | |
26 | Display version information and exit. | |
27 | """ | |
28 | ||
29 | import sys | |
30 | import os | |
31 | import getopt | |
32 | import struct | |
33 | import array | |
34 | ||
35 | __version__ = "1.1" | |
36 | ||
37 | MESSAGES = {} | |
38 | ||
39 | ||
40 | \f | |
41 | def usage(code, msg=''): | |
42 | print >> sys.stderr, __doc__ | |
43 | if msg: | |
44 | print >> sys.stderr, msg | |
45 | sys.exit(code) | |
46 | ||
47 | ||
48 | \f | |
49 | def add(id, str, fuzzy): | |
50 | "Add a non-fuzzy translation to the dictionary." | |
51 | global MESSAGES | |
52 | if not fuzzy and str and not str.startswith('\0'): | |
53 | MESSAGES[id] = str | |
54 | ||
55 | ||
56 | \f | |
57 | def generate(): | |
58 | "Return the generated output." | |
59 | global MESSAGES | |
60 | keys = MESSAGES.keys() | |
61 | # the keys are sorted in the .mo file | |
62 | keys.sort() | |
63 | offsets = [] | |
64 | ids = strs = '' | |
65 | for id in keys: | |
66 | # For each string, we need size and file offset. Each string is NUL | |
67 | # terminated; the NUL does not count into the size. | |
68 | offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) | |
69 | ids += id + '\0' | |
70 | strs += MESSAGES[id] + '\0' | |
71 | output = '' | |
72 | # The header is 7 32-bit unsigned integers. We don't use hash tables, so | |
73 | # the keys start right after the index tables. | |
74 | # translated string. | |
75 | keystart = 7*4+16*len(keys) | |
76 | # and the values start after the keys | |
77 | valuestart = keystart + len(ids) | |
78 | koffsets = [] | |
79 | voffsets = [] | |
80 | # The string table first has the list of keys, then the list of values. | |
81 | # Each entry has first the size of the string, then the file offset. | |
82 | for o1, l1, o2, l2 in offsets: | |
83 | koffsets += [l1, o1+keystart] | |
84 | voffsets += [l2, o2+valuestart] | |
85 | offsets = koffsets + voffsets | |
86 | output = struct.pack("Iiiiiii", | |
87 | 0x950412deL, # Magic | |
88 | 0, # Version | |
89 | len(keys), # # of entries | |
90 | 7*4, # start of key index | |
91 | 7*4+len(keys)*8, # start of value index | |
92 | 0, 0) # size and offset of hash table | |
93 | output += array.array("i", offsets).tostring() | |
94 | output += ids | |
95 | output += strs | |
96 | return output | |
97 | ||
98 | ||
99 | \f | |
100 | def make(filename, outfile): | |
101 | ID = 1 | |
102 | STR = 2 | |
103 | global MESSAGES | |
104 | MESSAGES = {} | |
105 | ||
106 | # Compute .mo name from .po name and arguments | |
107 | if filename.endswith('.po'): | |
108 | infile = filename | |
109 | else: | |
110 | infile = filename + '.po' | |
111 | if outfile is None: | |
112 | outfile = os.path.splitext(infile)[0] + '.mo' | |
113 | ||
114 | try: | |
115 | lines = open(infile).readlines() | |
116 | except IOError, msg: | |
117 | print >> sys.stderr, msg | |
118 | sys.exit(1) | |
119 | ||
120 | # remove UTF-8 Byte Order Mark, if any. | |
121 | # (UCS2 BOMs are not handled because messages in UCS2 cannot be handled) | |
122 | if lines[0].startswith('\xEF\xBB\xBF'): | |
123 | lines[0] = lines[0][3:] | |
124 | ||
125 | section = None | |
126 | fuzzy = 0 | |
127 | ||
128 | # Parse the catalog | |
129 | lno = 0 | |
130 | for l in lines: | |
131 | lno += 1 | |
132 | # If we get a comment line after a msgstr, this is a new entry | |
133 | if l[0] == '#' and section == STR: | |
134 | add(msgid, msgstr, fuzzy) | |
135 | section = None | |
136 | fuzzy = 0 | |
137 | # Record a fuzzy mark | |
138 | if l[:2] == '#,' and (l.find('fuzzy') >= 0): | |
139 | fuzzy = 1 | |
140 | # Skip comments | |
141 | if l[0] == '#': | |
142 | continue | |
143 | # Start of msgid_plural section, separate from singular form with \0 | |
144 | if l.startswith('msgid_plural'): | |
145 | msgid += '\0' | |
146 | l = l[12:] | |
147 | # Now we are in a msgid section, output previous section | |
148 | elif l.startswith('msgid'): | |
149 | if section == STR: | |
150 | add(msgid, msgstr, fuzzy) | |
151 | section = ID | |
152 | l = l[5:] | |
153 | msgid = msgstr = '' | |
154 | # Now we are in a msgstr section | |
155 | elif l.startswith('msgstr'): | |
156 | section = STR | |
157 | l = l[6:] | |
158 | # Check for plural forms | |
159 | if l.startswith('['): | |
160 | # Separate plural forms with \0 | |
161 | if not l.startswith('[0]'): | |
162 | msgstr += '\0' | |
163 | # Ignore the index - must come in sequence | |
164 | l = l[l.index(']') + 1:] | |
165 | # Skip empty lines | |
166 | l = l.strip() | |
167 | if not l: | |
168 | continue | |
169 | # XXX: Does this always follow Python escape semantics? | |
170 | l = eval(l) | |
171 | if section == ID: | |
172 | msgid += l | |
173 | elif section == STR: | |
174 | msgstr += l | |
175 | else: | |
176 | print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ | |
177 | 'before:' | |
178 | print >> sys.stderr, l | |
179 | sys.exit(1) | |
180 | # Add last entry | |
181 | if section == STR: | |
182 | add(msgid, msgstr, fuzzy) | |
183 | ||
184 | # Compute output | |
185 | output = generate() | |
186 | ||
187 | try: | |
188 | open(outfile,"wb").write(output) | |
189 | except IOError,msg: | |
190 | print >> sys.stderr, msg | |
191 | ||
192 | ||
193 | \f | |
194 | def main(): | |
195 | try: | |
196 | opts, args = getopt.getopt(sys.argv[1:], 'hVo:', | |
197 | ['help', 'version', 'output-file=']) | |
198 | except getopt.error, msg: | |
199 | usage(1, msg) | |
200 | ||
201 | outfile = None | |
202 | # parse options | |
203 | for opt, arg in opts: | |
204 | if opt in ('-h', '--help'): | |
205 | usage(0) | |
206 | elif opt in ('-V', '--version'): | |
207 | print >> sys.stderr, "msgfmt.py", __version__ | |
208 | sys.exit(0) | |
209 | elif opt in ('-o', '--output-file'): | |
210 | outfile = arg | |
211 | # do it | |
212 | if not args: | |
213 | print >> sys.stderr, 'No input file given' | |
214 | print >> sys.stderr, "Try `msgfmt --help' for more information." | |
215 | return | |
216 | ||
217 | for filename in args: | |
218 | make(filename, outfile) | |
219 | ||
220 | ||
221 | if __name__ == '__main__': | |
222 | main() | |
223 | ||
224 | # vim: set et sts=4 sw=4 : |