2 # -*- coding: iso-8859-1 -*-
3 # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
4 # Plural forms support added by alexander smishlajev <alex@tycobka.lv>
6 """Generate binary message catalog from textual translation description.
8 This program converts a textual Uniforum-style message catalog (.po file) into
9 a binary GNU catalog (.mo file). This is essentially the same function as the
10 GNU msgfmt program, however, it is a simpler implementation.
12 Usage: msgfmt.py [OPTIONS] filename.po
17 Specify the output file to write to. If omitted, output will go to a
18 file named filename.mo (based off the input file name).
22 Print this message and exit.
26 Display version information and exit.
41 def usage(code
, msg
=''):
42 print >> sys
.stderr
, __doc__
44 print >> sys
.stderr
, msg
49 def add(id, str, fuzzy
):
50 "Add a non-fuzzy translation to the dictionary."
52 if not fuzzy
and str and not str.startswith('\0'):
58 "Return the generated output."
60 keys
= MESSAGES
.keys()
61 # the keys are sorted in the .mo file
66 # For each string, we need size and file offset. Each string is NUL
67 # terminated; the NUL does not count into the size.
68 offsets
.append((len(ids
), len(id), len(strs
), len(MESSAGES
[id])))
70 strs
+= MESSAGES
[id] + '\0'
72 # The header is 7 32-bit unsigned integers. We don't use hash tables, so
73 # the keys start right after the index tables.
75 keystart
= 7*4+16*len(keys
)
76 # and the values start after the keys
77 valuestart
= keystart
+ len(ids
)
80 # The string table first has the list of keys, then the list of values.
81 # Each entry has first the size of the string, then the file offset.
82 for o1
, l1
, o2
, l2
in offsets
:
83 koffsets
+= [l1
, o1
+keystart
]
84 voffsets
+= [l2
, o2
+valuestart
]
85 offsets
= koffsets
+ voffsets
86 output
= struct
.pack("Iiiiiii",
89 len(keys
), # # of entries
90 7*4, # start of key index
91 7*4+len(keys
)*8, # start of value index
92 0, 0) # size and offset of hash table
93 output
+= array
.array("i", offsets
).tostring()
100 def make(filename
, outfile
):
106 # Compute .mo name from .po name and arguments
107 if filename
.endswith('.po'):
110 infile
= filename
+ '.po'
112 outfile
= os
.path
.splitext(infile
)[0] + '.mo'
115 lines
= open(infile
).readlines()
117 print >> sys
.stderr
, msg
120 # remove UTF-8 Byte Order Mark, if any.
121 # (UCS2 BOMs are not handled because messages in UCS2 cannot be handled)
122 if lines
[0].startswith('\xEF\xBB\xBF'):
123 lines
[0] = lines
[0][3:]
132 # If we get a comment line after a msgstr, this is a new entry
133 if l
[0] == '#' and section
== STR
:
134 add(msgid
, msgstr
, fuzzy
)
137 # Record a fuzzy mark
138 if l
[:2] == '#,' and (l
.find('fuzzy') >= 0):
143 # Start of msgid_plural section, separate from singular form with \0
144 if l
.startswith('msgid_plural'):
147 # Now we are in a msgid section, output previous section
148 elif l
.startswith('msgid'):
150 add(msgid
, msgstr
, fuzzy
)
154 # Now we are in a msgstr section
155 elif l
.startswith('msgstr'):
158 # Check for plural forms
159 if l
.startswith('['):
160 # Separate plural forms with \0
161 if not l
.startswith('[0]'):
163 # Ignore the index - must come in sequence
164 l
= l
[l
.index(']') + 1:]
169 # XXX: Does this always follow Python escape semantics?
176 print >> sys
.stderr
, 'Syntax error on %s:%d' % (infile
, lno
), \
178 print >> sys
.stderr
, l
182 add(msgid
, msgstr
, fuzzy
)
188 open(outfile
,"wb").write(output
)
190 print >> sys
.stderr
, msg
196 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'hVo:',
197 ['help', 'version', 'output-file='])
198 except getopt
.error
, msg
:
203 for opt
, arg
in opts
:
204 if opt
in ('-h', '--help'):
206 elif opt
in ('-V', '--version'):
207 print >> sys
.stderr
, "msgfmt.py", __version__
209 elif opt
in ('-o', '--output-file'):
213 print >> sys
.stderr
, 'No input file given'
214 print >> sys
.stderr
, "Try `msgfmt --help' for more information."
217 for filename
in args
:
218 make(filename
, outfile
)
221 if __name__
== '__main__':
224 # vim: set et sts=4 sw=4 :