2022-01-16 08:24:05 +08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2015-11-14 04:34:37 +08:00
|
|
|
import argparse
|
2024-04-02 23:11:58 +08:00
|
|
|
import sys
|
2024-10-12 07:30:07 +08:00
|
|
|
from collections.abc import Sequence
|
2019-02-01 11:19:10 +08:00
|
|
|
from typing import IO
|
2020-02-06 03:10:42 +08:00
|
|
|
from typing import NamedTuple
|
2015-11-14 04:34:37 +08:00
|
|
|
|
2019-05-16 01:04:18 +08:00
|
|
|
DEFAULT_PRAGMA = b'# -*- coding: utf-8 -*-'
|
2015-11-14 04:34:37 +08:00
|
|
|
|
|
|
|
|
2020-02-06 03:10:42 +08:00
|
|
|
def has_coding(line: bytes) -> bool:
|
2015-11-14 04:34:37 +08:00
|
|
|
if not line.strip():
|
|
|
|
return False
|
|
|
|
return (
|
2019-05-16 01:04:18 +08:00
|
|
|
line.lstrip()[:1] == b'#' and (
|
2015-11-14 04:34:37 +08:00
|
|
|
b'unicode' in line or
|
|
|
|
b'encoding' in line or
|
|
|
|
b'coding:' in line or
|
|
|
|
b'coding=' in line
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-02-06 03:10:42 +08:00
|
|
|
class ExpectedContents(NamedTuple):
|
|
|
|
shebang: bytes
|
|
|
|
rest: bytes
|
|
|
|
# True: has exactly the coding pragma expected
|
|
|
|
# False: missing coding pragma entirely
|
|
|
|
# None: has a coding pragma, but it does not match
|
2022-01-16 08:24:05 +08:00
|
|
|
pragma_status: bool | None
|
2020-02-06 03:10:42 +08:00
|
|
|
ending: bytes
|
2015-11-14 04:34:37 +08:00
|
|
|
|
2016-04-28 02:18:14 +08:00
|
|
|
@property
|
2020-02-06 03:10:42 +08:00
|
|
|
def has_any_pragma(self) -> bool:
|
2016-04-28 02:18:14 +08:00
|
|
|
return self.pragma_status is not False
|
2015-11-14 04:34:37 +08:00
|
|
|
|
2020-02-06 03:10:42 +08:00
|
|
|
def is_expected_pragma(self, remove: bool) -> bool:
|
2016-04-28 02:18:14 +08:00
|
|
|
expected_pragma_status = not remove
|
|
|
|
return self.pragma_status is expected_pragma_status
|
2015-11-14 04:34:37 +08:00
|
|
|
|
|
|
|
|
2020-02-06 03:10:42 +08:00
|
|
|
def _get_expected_contents(
|
|
|
|
first_line: bytes,
|
|
|
|
second_line: bytes,
|
|
|
|
rest: bytes,
|
|
|
|
expected_pragma: bytes,
|
|
|
|
) -> ExpectedContents:
|
2019-05-16 01:04:18 +08:00
|
|
|
ending = b'\r\n' if first_line.endswith(b'\r\n') else b'\n'
|
|
|
|
|
2015-11-14 04:34:37 +08:00
|
|
|
if first_line.startswith(b'#!'):
|
2016-04-28 02:18:14 +08:00
|
|
|
shebang = first_line
|
|
|
|
potential_coding = second_line
|
2015-11-14 04:34:37 +08:00
|
|
|
else:
|
2016-04-28 02:18:14 +08:00
|
|
|
shebang = b''
|
|
|
|
potential_coding = first_line
|
|
|
|
rest = second_line + rest
|
|
|
|
|
2019-05-16 01:04:18 +08:00
|
|
|
if potential_coding.rstrip(b'\r\n') == expected_pragma:
|
2022-01-16 08:24:05 +08:00
|
|
|
pragma_status: bool | None = True
|
2016-04-28 02:18:14 +08:00
|
|
|
elif has_coding(potential_coding):
|
|
|
|
pragma_status = None
|
|
|
|
else:
|
|
|
|
pragma_status = False
|
|
|
|
rest = potential_coding + rest
|
|
|
|
|
|
|
|
return ExpectedContents(
|
2019-05-16 01:04:18 +08:00
|
|
|
shebang=shebang, rest=rest, pragma_status=pragma_status, ending=ending,
|
2016-04-28 02:18:14 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-02-06 03:10:42 +08:00
|
|
|
def fix_encoding_pragma(
|
|
|
|
f: IO[bytes],
|
|
|
|
remove: bool = False,
|
|
|
|
expected_pragma: bytes = DEFAULT_PRAGMA,
|
|
|
|
) -> int:
|
2016-08-12 13:56:54 +08:00
|
|
|
expected = _get_expected_contents(
|
|
|
|
f.readline(), f.readline(), f.read(), expected_pragma,
|
|
|
|
)
|
2016-04-28 02:18:14 +08:00
|
|
|
|
|
|
|
# Special cases for empty files
|
|
|
|
if not expected.rest.strip():
|
|
|
|
# If a file only has a shebang or a coding pragma, remove it
|
|
|
|
if expected.has_any_pragma or expected.shebang:
|
|
|
|
f.seek(0)
|
|
|
|
f.truncate()
|
|
|
|
f.write(b'')
|
|
|
|
return 1
|
|
|
|
else:
|
|
|
|
return 0
|
|
|
|
|
|
|
|
if expected.is_expected_pragma(remove):
|
|
|
|
return 0
|
|
|
|
|
|
|
|
# Otherwise, write out the new file
|
|
|
|
f.seek(0)
|
|
|
|
f.truncate()
|
|
|
|
f.write(expected.shebang)
|
|
|
|
if not remove:
|
2019-05-16 01:04:18 +08:00
|
|
|
f.write(expected_pragma + expected.ending)
|
2016-04-28 02:18:14 +08:00
|
|
|
f.write(expected.rest)
|
2015-11-14 04:34:37 +08:00
|
|
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
2020-02-06 03:10:42 +08:00
|
|
|
def _normalize_pragma(pragma: str) -> bytes:
|
|
|
|
return pragma.encode().rstrip()
|
2016-08-12 13:56:54 +08:00
|
|
|
|
|
|
|
|
2022-01-16 08:24:05 +08:00
|
|
|
def main(argv: Sequence[str] | None = None) -> int:
|
2024-04-02 23:11:58 +08:00
|
|
|
print(
|
|
|
|
'warning: this hook is deprecated and will be removed in a future '
|
|
|
|
'release because py2 is EOL. instead, use '
|
|
|
|
'https://github.com/asottile/pyupgrade',
|
|
|
|
file=sys.stderr,
|
|
|
|
)
|
|
|
|
|
2019-02-12 11:56:15 +08:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
'Fixes the encoding pragma of python files',
|
|
|
|
)
|
2015-11-14 04:34:37 +08:00
|
|
|
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
|
2016-08-12 13:56:54 +08:00
|
|
|
parser.add_argument(
|
|
|
|
'--pragma', default=DEFAULT_PRAGMA, type=_normalize_pragma,
|
2020-02-06 03:10:42 +08:00
|
|
|
help=(
|
|
|
|
f'The encoding pragma to use. '
|
|
|
|
f'Default: {DEFAULT_PRAGMA.decode()}'
|
2016-08-12 13:56:54 +08:00
|
|
|
),
|
|
|
|
)
|
2016-04-28 02:18:14 +08:00
|
|
|
parser.add_argument(
|
|
|
|
'--remove', action='store_true',
|
|
|
|
help='Remove the encoding pragma (Useful in a python3-only codebase)',
|
|
|
|
)
|
2015-11-14 04:34:37 +08:00
|
|
|
args = parser.parse_args(argv)
|
|
|
|
|
|
|
|
retv = 0
|
|
|
|
|
2016-04-28 02:18:14 +08:00
|
|
|
if args.remove:
|
|
|
|
fmt = 'Removed encoding pragma from {filename}'
|
|
|
|
else:
|
|
|
|
fmt = 'Added `{pragma}` to {filename}'
|
|
|
|
|
2015-11-14 04:34:37 +08:00
|
|
|
for filename in args.filenames:
|
2016-04-28 02:18:14 +08:00
|
|
|
with open(filename, 'r+b') as f:
|
2016-08-12 13:56:54 +08:00
|
|
|
file_ret = fix_encoding_pragma(
|
|
|
|
f, remove=args.remove, expected_pragma=args.pragma,
|
|
|
|
)
|
2015-11-14 04:34:37 +08:00
|
|
|
retv |= file_ret
|
|
|
|
if file_ret:
|
2020-02-04 00:41:48 +08:00
|
|
|
print(
|
2020-02-06 03:10:42 +08:00
|
|
|
fmt.format(pragma=args.pragma.decode(), filename=filename),
|
2020-02-04 00:41:48 +08:00
|
|
|
)
|
2015-11-14 04:34:37 +08:00
|
|
|
|
|
|
|
return retv
|
|
|
|
|
2016-12-01 01:56:42 +08:00
|
|
|
|
2019-02-12 11:57:37 +08:00
|
|
|
if __name__ == '__main__':
|
2021-10-24 01:23:50 +08:00
|
|
|
raise SystemExit(main())
|