lint_po 12.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/usr/bin/env python3

"""Checks and Unifies PO headers and rewraps PO files to 79 chars.

Usage:
./lint_po --help

Default is check mode where the error are listed but not fixed.
With --fix the files get changed and unified.

Run for all po files in the working directory (including subdirs):
./lint_po

Run with a list of files:
./lint_po file1.de.po file2.fr.po

Run for all po files that are staged for git commit:
./lint_po --cached

20
Run for all po files of one language in the current directory (recursively):
21
22
23
24
25
26
27
28
29
30
./lint_po --lang de

When modifying lint_po (this script), you should check if the current type
annotations match, using `mypy` (`apt install mypy`):

mypy lint_po
"""

import argparse
import contextlib
31
import copy
32
33
import functools
import glob
34
import itertools
35
36
37
38
39
40
41
42
43
44
import logging
import multiprocessing
import os.path
import re
import shutil
import subprocess
import sys
import tempfile

try:
45
    import polib  # type: ignore
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
except ImportError:
    sys.exit("You need to install python3-polib to use this program.")

from typing import Dict, List, Tuple

# i18nspector issues, that we accept
I18NSPECTOR_ACCEPT = [
        "boilerplate-in-date",
        "boilerplate-in-initial-comments",
        "boilerplate-in-language-team",
        "boilerplate-in-last-translator",
        "boilerplate-in-project-id-version",
        "codomain-error-in-plural-forms",
        "codomain-error-in-unused-plural-forms",
        "conflict-marker-in-header-entry",
        "fuzzy-header-entry",
        "incorrect-plural-forms",
        "invalid-content-transfer-encoding",
        "invalid-date",
        "invalid-language",
        "invalid-last-translator",
        "language-team-equal-to-last-translator",
        "no-language-header-field",
        "no-package-name-in-project-id-version",
        "no-plural-forms-header-field",
        "no-report-msgid-bugs-to-header-field",
        "no-version-in-project-id-version",
        "stray-previous-msgid",
        "unable-to-determine-language",
        "unknown-poedit-language",
        "unusual-plural-forms",
        "unusual-unused-plural-forms",
        ]

80

81
82
83
84
85
class NoLanguageError(Exception):
    def __init__(self, fname):
        self.fname = fname

    def __str__(self):
86
87
88
        return(
            "Can't detect expect file suffix .XX.po for '{fname}'."
            .format(fname=self.fname))
89
90
    pass

91

92
93
94
95
96
97
class PoFile:
    def __init__(self, fname: str) -> None:
        self.fname = fname
        self.wrapwidth = 79

    def fixedHeaders(self) -> Dict[str, str]:
98
99
        """@returns: a dict of key,value parts that should be fixed
        within the po file"""
100
101
102
103
104
105
106
107
        return {"Language": self.lang(),
                "Content-Type": "text/plain; charset=UTF-8",
                "Project-Id-Version": "",
                "Language-Team": "Tails translators <tails-l10n@boum.org>",
                "Last-Translator": "Tails translators",
                }

    def lang(self) -> str:
intrigeri's avatar
intrigeri committed
108
109
        """@returns: language of filename, possibly ending with an underscore
        followed by the territory or script"""
110
        name = os.path.basename(self.fname)
111
        m = re.match(r"^(?:[^.].*\.)?(?P<lang>[A-Za-z0-9_@]+)\.po$", name)
112
113
114
115
        if not m:
            raise NoLanguageError(self.fname)
        return m.group("lang")

intrigeri's avatar
intrigeri committed
116
117
118
119
120
121
    def lang_without_script(self) -> str:
        """@returns: language of filename, with any script suffix stripped"""
        lang = self.lang()
        # a script suffix (e.g. "_Latn") starts with an underscore,
        # followed by the script name in title case, which we
        # approximate as an upper case letter followed by a lower case one
122
        m = re.match(r"^(?P<lang_without_script>.*?)(_[A-Z][a-z][A-Za-z]*)?(@[A-Za-z0-9]+)?$", lang)
intrigeri's avatar
intrigeri committed
123
124
125
126
127
        if m:
            return m.group("lang_without_script")
        else:
            return lang

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    def check(self, key: str, value: str) -> bool:
        """check if there is "key: value\\n" in PO header"""
        try:
            return (self.pf.metadata[key] == value)
        except KeyError:
            return False

    def unifyKey(self, key: str, value: str) -> None:
        """ set value of PO header key to "key: value\\n" """
        if not self.check(key, value):
            self.pf.metadata[key] = value
            self.__changed = True

    def open(self) -> None:
        """read po file content"""
        if not os.path.exists(self.fname):
            raise FileNotFoundError(self.fname)
        self.pf = polib.pofile(self.fname)
        self.pf.wrapwidth = self.wrapwidth
        self.__changed = False

    def write(self) -> None:
        """write file, if content was changed"""
        if self.__changed:
            _prefix = os.path.basename(self.fname)
            _dir = os.path.dirname(self.fname)
154
155
            with tempfile.NamedTemporaryFile(
                    prefix=_prefix, dir=_dir, delete=False) as fd:
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
                try:
                    self.pf.save(fd.name)
                    fd.flush()
                    os.fdatasync(fd.fileno())
                except Exception:
                    os.unlink(fd.name)
                    raise
                else:
                    os.rename(fd.name, self.fname)

    def needs_rewrap(self) -> bool:
        """checks if lines are wrapped propperly.
        @returns: returns True if content is fine.
        """
        _pf = polib.pofile(self.fname)
        _pf.wrapwidth = self.wrapwidth
        with open(self.fname, 'r', encoding='utf-8') as f:
            content = f.read()
            if str(_pf) != content:
                self.__changed = True
                return True
            else:
                return False

180
181
182
    def i18nspector(self, i18nspector_env: Dict[str, str]) -> List[str]:
        """i18nspector_env sets the environment variables for i18nspector
           @returns a list of issues raised by i18nspector removes
183
184
                    allowed issues from @I18NINSPECTOR_ACCEPT.
        """
185
186
        cmd = ["i18nspector", "--language",
               self.lang_without_script(), self.fname]
187
188
        process = subprocess.run(
                cmd,
189
                env = i18nspector_env,
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=True,
                check=True)
        issues = []
        for line in process.stdout.strip().split("\n"):
            severity, fname, issue, *content = line.split(" ")
            if issue not in I18NSPECTOR_ACCEPT:
                issues.append(" ".join([severity, issue, *content]))

        return issues

@contextlib.contextmanager
def pofile_readonly(fname: str):
    pf = PoFile(fname)
    pf.open()
    yield pf


@contextlib.contextmanager
def pofile_writable(fname: str):
    pf = PoFile(fname)
    pf.open()
    yield pf
    pf.write()


217
def check_po_file(fname: str, extended: bool, i18nspector_env: Dict[str, str]) -> Tuple[str, List[str]]:
218
219
220
221
222
223
224
    """check PO file for issues.
    @returns: nothing or a list of errors
    @extended: is used to check the header fields in more detail.
    """
    errors = list()
    with pofile_readonly(fname) as poFile:
        try:
225
226
227
228
            # Make sure to have a specific subdirectory for each call to
            # avoid race conditions regarding the rply cache (#17359):
            with tempfile.TemporaryDirectory() as tmpdir:
                my_env = copy.copy(i18nspector_env)
229
230
                my_env["XDG_CACHE_HOME"] = tmpdir

231
232
233
234
                issues = poFile.i18nspector(my_env)
                if issues:
                    errors.append(
                        "i18nspector is not happy:\n\t"+"\n\t".join(issues))
235
236
237
238
239
240
241
        except subprocess.CalledProcessError as e:
            errors.append("i18nspector exited with {e.returncode} - stderr:\n"
                          "{e.stderr}".format(e=e))

        if extended:
            for key, value in poFile.fixedHeaders().items():
                if not poFile.check(key, value):
242
243
                    errors.append("{key} is not '{value}'."
                                  .format(key=key, value=value))
244
245
246
247
248
249
250
251
252

    return (fname, errors)


def unify_po_file(fname: str) -> None:
    """unify PO header and rewrapps file named `fname`"""
    with pofile_writable(fname) as poFile:
        for key, value in poFile.fixedHeaders().items():
            poFile.unifyKey(key, value)
253
254
255
        # As a side-effect this updates the store flag,
        # if the file is not properly wrapped:
        poFile.needs_rewrap()
256
257
258
259
260


def main(logger) -> None:
    parser = argparse.ArgumentParser(description='Unify PO files')
    parser.add_argument('--fix', dest='fix', action='store_true',
261
262
263
                        help='Fixes issues found in PO headers.')
    parser.add_argument('--check-extended', dest='extended',
                        action='store_true',
264
                        help='Do extended checks of PO headers.')
265
266
    parser.add_argument('--lang', dest='lang',
                        help='Check all PO files of the specified language.')
267
    parser.add_argument('--cached', dest='cached', action='store_true',
268
                        help='Only check PO files staged with Git.')
269
    parser.add_argument('files', metavar='file', type=str, nargs='*',
270
                        help='List of files to process.')
271
272
273
    args = parser.parse_args()

    if args.lang:
274
275
276
277
        args.files += glob.glob("**/*.{lang}.po".format(lang=args.lang),
                                recursive=True)
        args.files += glob.glob("**/{lang}.po".format(lang=args.lang),
                                recursive=True)
278
279
280
281

    if args.cached:
        # get top level directory of the current git repository
        # git diff returns always relative paths to the top level directory
282
283
284
        toplevel = subprocess.check_output(
            ["git", "rev-parse", "--show-toplevel"],
            universal_newlines=True).rstrip()
285
286
287

        # get a list of changes and added files in stage for the next commit
        output = subprocess.check_output(
288
289
290
            ["git", "diff", "--name-only", "--cached",
             "--ignore-submodules", "--diff-filter=d"],
            universal_newlines=True)
291
292

        # add all po files to list to unify
293
294
        args.files += [os.path.join(toplevel, f) for f in output.splitlines()
                       if f.endswith(".po")]
295
296
297
298

    if not args.files and not args.cached and not args.lang:
        args.files += glob.glob("**/*.po", recursive=True)

299
    # Skip files in ./tmp/
300
301
302
303
304
    files = list(itertools.filterfalse(
        lambda f:
        f.startswith('tmp/')
        or f.startswith('submodules/torbrowser-launcher/'),
        args.files))
305

306
307
308
309
310
    if not files:
        if not args.cached:
            logger.warning("No file to process.\n"
                           "You may want to add files to operate on."
                           " See --help for further information.")
Sandro Knauß's avatar
Sandro Knauß committed
311
        sys.exit(0)
312
313
314
315

    for prog in ("i18nspector",):
        if shutil.which(prog) is None:
            sys.exit("{prog}: command not found\n"
316
317
                     "You need to install {prog} first.\n"
                     "See /contribute/l10n_tricks."
318
319
320
321
322
                     .format(prog=prog))

    pool = multiprocessing.Pool()
    if args.fix:
        # unify PO headers for a list of files
Sandro Knauß's avatar
Sandro Knauß committed
323
        list(pool.map(unify_po_file, files))
324
325
326
327
    else:
        fine = True
        # check only the headers
        pool = multiprocessing.Pool()
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345

        # The environment is no longer set here, but let's keep the
        # signatures unchanged, in case we need to tweak some more
        # things later on:
        i18nspector_env = {}
        _check_po_file = functools.partial(check_po_file,
                                           extended=args.extended,
                                           i18nspector_env=i18nspector_env)
        for fname, issues in pool.imap_unordered(_check_po_file,
                                                 files, 10):
            if issues:
                fine = False
                # indent sub-issues:
                issues = [i.replace("\n", "\n\t") for i in issues]
                logger.error("{fname}:\n\t{issues}"
                             .format(fname=fname, issues="\n\t".join(issues)))
            else:
                logger.debug("{fname} - No issue found.".format(fname=fname))
346
347
348
349
350
351

        if not fine:
            sys.exit("checked files are not clean.")


if __name__ == '__main__':
352
353
    logging.basicConfig(level=logging.INFO,
                        format='%(levelname)s: %(message)s')
354
    main(logging.getLogger())