lint_po 12.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/usr/bin/env python3

"""Checks and Unifies PO headers and rewraps PO files to 79 chars.

Usage:
./lint_po --help

Default is check mode where the error are listed but not fixed.
With --fix the files get changed and unified.

Run for all po files in the working directory (including subdirs):
./lint_po

Run with a list of files:
./lint_po file1.de.po file2.fr.po

Run for all po files that are staged for git commit:
./lint_po --cached

20
Run for all po files of one language in the current directory (recursively):
21
22
23
24
25
26
27
28
29
30
./lint_po --lang de

When modifying lint_po (this script), you should check if the current type
annotations match, using `mypy` (`apt install mypy`):

mypy lint_po
"""

import argparse
import contextlib
31
import copy
32
33
import functools
import glob
34
import itertools
35
36
37
38
39
40
41
42
43
44
import logging
import multiprocessing
import os.path
import re
import shutil
import subprocess
import sys
import tempfile

try:
45
    import polib  # type: ignore
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
except ImportError:
    sys.exit("You need to install python3-polib to use this program.")

from typing import Dict, List, Tuple

# i18nspector issues, that we accept
I18NSPECTOR_ACCEPT = [
        "boilerplate-in-date",
        "boilerplate-in-initial-comments",
        "boilerplate-in-language-team",
        "boilerplate-in-last-translator",
        "boilerplate-in-project-id-version",
        "codomain-error-in-plural-forms",
        "codomain-error-in-unused-plural-forms",
        "conflict-marker-in-header-entry",
        "fuzzy-header-entry",
        "incorrect-plural-forms",
        "invalid-content-transfer-encoding",
        "invalid-date",
        "invalid-language",
        "invalid-last-translator",
        "language-team-equal-to-last-translator",
        "no-language-header-field",
        "no-package-name-in-project-id-version",
        "no-plural-forms-header-field",
        "no-report-msgid-bugs-to-header-field",
        "no-version-in-project-id-version",
        "stray-previous-msgid",
        "unable-to-determine-language",
75
        "unknown-message-flag",
76
77
78
79
80
        "unknown-poedit-language",
        "unusual-plural-forms",
        "unusual-unused-plural-forms",
        ]

81

82
83
84
85
86
class NoLanguageError(Exception):
    def __init__(self, fname):
        self.fname = fname

    def __str__(self):
87
88
89
        return(
            "Can't detect expect file suffix .XX.po for '{fname}'."
            .format(fname=self.fname))
90
91
    pass

92

93
94
95
96
97
98
class PoFile:
    def __init__(self, fname: str) -> None:
        self.fname = fname
        self.wrapwidth = 79

    def fixedHeaders(self) -> Dict[str, str]:
99
100
        """@returns: a dict of key,value parts that should be fixed
        within the po file"""
101
102
103
104
105
106
107
108
        return {"Language": self.lang(),
                "Content-Type": "text/plain; charset=UTF-8",
                "Project-Id-Version": "",
                "Language-Team": "Tails translators <tails-l10n@boum.org>",
                "Last-Translator": "Tails translators",
                }

    def lang(self) -> str:
intrigeri's avatar
intrigeri committed
109
110
        """@returns: language of filename, possibly ending with an underscore
        followed by the territory or script"""
111
        name = os.path.basename(self.fname)
112
        m = re.match(r"^(?:[^.].*\.)?(?P<lang>[A-Za-z0-9_@]+)\.po$", name)
113
114
115
116
        if not m:
            raise NoLanguageError(self.fname)
        return m.group("lang")

intrigeri's avatar
intrigeri committed
117
118
119
120
121
122
    def lang_without_script(self) -> str:
        """@returns: language of filename, with any script suffix stripped"""
        lang = self.lang()
        # a script suffix (e.g. "_Latn") starts with an underscore,
        # followed by the script name in title case, which we
        # approximate as an upper case letter followed by a lower case one
123
        m = re.match(r"^(?P<lang_without_script>.*?)(_[A-Z][a-z][A-Za-z]*)?(@[A-Za-z0-9]+)?$", lang)
intrigeri's avatar
intrigeri committed
124
125
126
127
128
        if m:
            return m.group("lang_without_script")
        else:
            return lang

129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
    def check(self, key: str, value: str) -> bool:
        """check if there is "key: value\\n" in PO header"""
        try:
            return (self.pf.metadata[key] == value)
        except KeyError:
            return False

    def unifyKey(self, key: str, value: str) -> None:
        """ set value of PO header key to "key: value\\n" """
        if not self.check(key, value):
            self.pf.metadata[key] = value
            self.__changed = True

    def open(self) -> None:
        """read po file content"""
        if not os.path.exists(self.fname):
            raise FileNotFoundError(self.fname)
        self.pf = polib.pofile(self.fname)
        self.pf.wrapwidth = self.wrapwidth
        self.__changed = False

    def write(self) -> None:
        """write file, if content was changed"""
        if self.__changed:
            _prefix = os.path.basename(self.fname)
            _dir = os.path.dirname(self.fname)
155
156
            with tempfile.NamedTemporaryFile(
                    prefix=_prefix, dir=_dir, delete=False) as fd:
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
                try:
                    self.pf.save(fd.name)
                    fd.flush()
                    os.fdatasync(fd.fileno())
                except Exception:
                    os.unlink(fd.name)
                    raise
                else:
                    os.rename(fd.name, self.fname)

    def needs_rewrap(self) -> bool:
        """checks if lines are wrapped propperly.
        @returns: returns True if content is fine.
        """
        _pf = polib.pofile(self.fname)
        _pf.wrapwidth = self.wrapwidth
        with open(self.fname, 'r', encoding='utf-8') as f:
            content = f.read()
            if str(_pf) != content:
                self.__changed = True
                return True
            else:
                return False

181
182
183
    def i18nspector(self, i18nspector_env: Dict[str, str]) -> List[str]:
        """i18nspector_env sets the environment variables for i18nspector
           @returns a list of issues raised by i18nspector removes
184
185
                    allowed issues from @I18NINSPECTOR_ACCEPT.
        """
186
187
        cmd = ["i18nspector", "--language",
               self.lang_without_script(), self.fname]
188
189
        process = subprocess.run(
                cmd,
190
                env = i18nspector_env,
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=True,
                check=True)
        issues = []
        for line in process.stdout.strip().split("\n"):
            severity, fname, issue, *content = line.split(" ")
            if issue not in I18NSPECTOR_ACCEPT:
                issues.append(" ".join([severity, issue, *content]))

        return issues

@contextlib.contextmanager
def pofile_readonly(fname: str):
    pf = PoFile(fname)
    pf.open()
    yield pf


@contextlib.contextmanager
def pofile_writable(fname: str):
    pf = PoFile(fname)
    pf.open()
    yield pf
    pf.write()


218
def check_po_file(fname: str, extended: bool, i18nspector_env: Dict[str, str]) -> Tuple[str, List[str]]:
219
220
221
222
223
224
225
    """check PO file for issues.
    @returns: nothing or a list of errors
    @extended: is used to check the header fields in more detail.
    """
    errors = list()
    with pofile_readonly(fname) as poFile:
        try:
226
227
228
229
            # Make sure to have a specific subdirectory for each call to
            # avoid race conditions regarding the rply cache (#17359):
            with tempfile.TemporaryDirectory() as tmpdir:
                my_env = copy.copy(i18nspector_env)
230
231
                my_env["XDG_CACHE_HOME"] = tmpdir

232
233
234
235
                issues = poFile.i18nspector(my_env)
                if issues:
                    errors.append(
                        "i18nspector is not happy:\n\t"+"\n\t".join(issues))
236
237
238
239
240
241
242
        except subprocess.CalledProcessError as e:
            errors.append("i18nspector exited with {e.returncode} - stderr:\n"
                          "{e.stderr}".format(e=e))

        if extended:
            for key, value in poFile.fixedHeaders().items():
                if not poFile.check(key, value):
243
244
                    errors.append("{key} is not '{value}'."
                                  .format(key=key, value=value))
245
246
247
248
249
250
251
252
253

    return (fname, errors)


def unify_po_file(fname: str) -> None:
    """unify PO header and rewrapps file named `fname`"""
    with pofile_writable(fname) as poFile:
        for key, value in poFile.fixedHeaders().items():
            poFile.unifyKey(key, value)
254
255
256
        # As a side-effect this updates the store flag,
        # if the file is not properly wrapped:
        poFile.needs_rewrap()
257
258
259
260
261


def main(logger) -> None:
    parser = argparse.ArgumentParser(description='Unify PO files')
    parser.add_argument('--fix', dest='fix', action='store_true',
262
263
264
                        help='Fixes issues found in PO headers.')
    parser.add_argument('--check-extended', dest='extended',
                        action='store_true',
265
                        help='Do extended checks of PO headers.')
266
267
    parser.add_argument('--lang', dest='lang',
                        help='Check all PO files of the specified language.')
268
    parser.add_argument('--cached', dest='cached', action='store_true',
269
                        help='Only check PO files staged with Git.')
270
    parser.add_argument('files', metavar='file', type=str, nargs='*',
271
                        help='List of files to process.')
272
273
274
    args = parser.parse_args()

    if args.lang:
275
276
277
278
        args.files += glob.glob("**/*.{lang}.po".format(lang=args.lang),
                                recursive=True)
        args.files += glob.glob("**/{lang}.po".format(lang=args.lang),
                                recursive=True)
279
280
281
282

    if args.cached:
        # get top level directory of the current git repository
        # git diff returns always relative paths to the top level directory
283
284
285
        toplevel = subprocess.check_output(
            ["git", "rev-parse", "--show-toplevel"],
            universal_newlines=True).rstrip()
286
287
288

        # get a list of changes and added files in stage for the next commit
        output = subprocess.check_output(
289
290
291
            ["git", "diff", "--name-only", "--cached",
             "--ignore-submodules", "--diff-filter=d"],
            universal_newlines=True)
292
293

        # add all po files to list to unify
294
295
        args.files += [os.path.join(toplevel, f) for f in output.splitlines()
                       if f.endswith(".po")]
296
297
298
299

    if not args.files and not args.cached and not args.lang:
        args.files += glob.glob("**/*.po", recursive=True)

300
    # Skip files in ./tmp/
301
302
303
304
305
    files = list(itertools.filterfalse(
        lambda f:
        f.startswith('tmp/')
        or f.startswith('submodules/torbrowser-launcher/'),
        args.files))
306

307
308
309
310
311
    if not files:
        if not args.cached:
            logger.warning("No file to process.\n"
                           "You may want to add files to operate on."
                           " See --help for further information.")
Sandro Knauß's avatar
Sandro Knauß committed
312
        sys.exit(0)
313
314
315
316

    for prog in ("i18nspector",):
        if shutil.which(prog) is None:
            sys.exit("{prog}: command not found\n"
317
318
                     "You need to install {prog} first.\n"
                     "See /contribute/l10n_tricks."
319
320
321
322
323
                     .format(prog=prog))

    pool = multiprocessing.Pool()
    if args.fix:
        # unify PO headers for a list of files
Sandro Knauß's avatar
Sandro Knauß committed
324
        list(pool.map(unify_po_file, files))
325
326
327
328
    else:
        fine = True
        # check only the headers
        pool = multiprocessing.Pool()
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346

        # The environment is no longer set here, but let's keep the
        # signatures unchanged, in case we need to tweak some more
        # things later on:
        i18nspector_env = {}
        _check_po_file = functools.partial(check_po_file,
                                           extended=args.extended,
                                           i18nspector_env=i18nspector_env)
        for fname, issues in pool.imap_unordered(_check_po_file,
                                                 files, 10):
            if issues:
                fine = False
                # indent sub-issues:
                issues = [i.replace("\n", "\n\t") for i in issues]
                logger.error("{fname}:\n\t{issues}"
                             .format(fname=fname, issues="\n\t".join(issues)))
            else:
                logger.debug("{fname} - No issue found.".format(fname=fname))
347
348
349
350
351
352

        if not fine:
            sys.exit("checked files are not clean.")


if __name__ == '__main__':
353
354
    logging.basicConfig(level=logging.INFO,
                        format='%(levelname)s: %(message)s')
355
    main(logging.getLogger())