diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 4b4d0cf..456a4f2 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -144,12 +144,6 @@ entry: fix-byte-order-marker language: python types: [text] -- id: fix-encoding-pragma - name: fix python encoding pragma (deprecated) - description: 'adds # -*- coding: utf-8 -*- to the top of python files.' - language: python - entry: fix-encoding-pragma - types: [python] - id: forbid-new-submodules name: forbid new submodules description: prevents addition of new git submodules. diff --git a/README.md b/README.md index 4992baf..9bcda1b 100644 --- a/README.md +++ b/README.md @@ -126,13 +126,6 @@ The following arguments are available: #### `fix-byte-order-marker` removes UTF-8 byte order marker -#### `fix-encoding-pragma` - -_Deprecated since py2 is EOL - use [pyupgrade](https://github.com/asottile/pyupgrade) instead._ - -Add `# -*- coding: utf-8 -*-` to the top of python files. - - To remove the coding pragma pass `--remove` (useful in a python3-only codebase) - #### `forbid-new-submodules` Prevent addition of new git submodules. @@ -210,6 +203,7 @@ Trims trailing whitespace. ### Deprecated / replaced hooks - `check-byte-order-marker`: instead use fix-byte-order-marker +- `fix-encoding-pragma`: instead use [`pyupgrade`](https://github.com/asottile/pyupgrade) ### As a standalone package diff --git a/pre_commit_hooks/fix_encoding_pragma.py b/pre_commit_hooks/fix_encoding_pragma.py deleted file mode 100644 index eee6705..0000000 --- a/pre_commit_hooks/fix_encoding_pragma.py +++ /dev/null @@ -1,157 +0,0 @@ -from __future__ import annotations - -import argparse -import sys -from typing import IO -from typing import NamedTuple -from typing import Sequence - -DEFAULT_PRAGMA = b'# -*- coding: utf-8 -*-' - - -def has_coding(line: bytes) -> bool: - if not line.strip(): - return False - return ( - line.lstrip()[:1] == b'#' and ( - b'unicode' in line or - b'encoding' in line or - b'coding:' in line or - b'coding=' in line - ) - ) - - -class ExpectedContents(NamedTuple): - shebang: bytes - rest: bytes - # True: has exactly the coding pragma expected - # False: missing coding pragma entirely - # None: has a coding pragma, but it does not match - pragma_status: bool | None - ending: bytes - - @property - def has_any_pragma(self) -> bool: - return self.pragma_status is not False - - def is_expected_pragma(self, remove: bool) -> bool: - expected_pragma_status = not remove - return self.pragma_status is expected_pragma_status - - -def _get_expected_contents( - first_line: bytes, - second_line: bytes, - rest: bytes, - expected_pragma: bytes, -) -> ExpectedContents: - ending = b'\r\n' if first_line.endswith(b'\r\n') else b'\n' - - if first_line.startswith(b'#!'): - shebang = first_line - potential_coding = second_line - else: - shebang = b'' - potential_coding = first_line - rest = second_line + rest - - if potential_coding.rstrip(b'\r\n') == expected_pragma: - pragma_status: bool | None = True - elif has_coding(potential_coding): - pragma_status = None - else: - pragma_status = False - rest = potential_coding + rest - - return ExpectedContents( - shebang=shebang, rest=rest, pragma_status=pragma_status, ending=ending, - ) - - -def fix_encoding_pragma( - f: IO[bytes], - remove: bool = False, - expected_pragma: bytes = DEFAULT_PRAGMA, -) -> int: - expected = _get_expected_contents( - f.readline(), f.readline(), f.read(), expected_pragma, - ) - - # Special cases for empty files - if not expected.rest.strip(): - # If a file only has a shebang or a coding pragma, remove it - if expected.has_any_pragma or expected.shebang: - f.seek(0) - f.truncate() - f.write(b'') - return 1 - else: - return 0 - - if expected.is_expected_pragma(remove): - return 0 - - # Otherwise, write out the new file - f.seek(0) - f.truncate() - f.write(expected.shebang) - if not remove: - f.write(expected_pragma + expected.ending) - f.write(expected.rest) - - return 1 - - -def _normalize_pragma(pragma: str) -> bytes: - return pragma.encode().rstrip() - - -def main(argv: Sequence[str] | None = None) -> int: - print( - 'warning: this hook is deprecated and will be removed in a future ' - 'release because py2 is EOL. instead, use ' - 'https://github.com/asottile/pyupgrade', - file=sys.stderr, - ) - - parser = argparse.ArgumentParser( - 'Fixes the encoding pragma of python files', - ) - parser.add_argument('filenames', nargs='*', help='Filenames to fix') - parser.add_argument( - '--pragma', default=DEFAULT_PRAGMA, type=_normalize_pragma, - help=( - f'The encoding pragma to use. ' - f'Default: {DEFAULT_PRAGMA.decode()}' - ), - ) - parser.add_argument( - '--remove', action='store_true', - help='Remove the encoding pragma (Useful in a python3-only codebase)', - ) - args = parser.parse_args(argv) - - retv = 0 - - if args.remove: - fmt = 'Removed encoding pragma from {filename}' - else: - fmt = 'Added `{pragma}` to {filename}' - - for filename in args.filenames: - with open(filename, 'r+b') as f: - file_ret = fix_encoding_pragma( - f, remove=args.remove, expected_pragma=args.pragma, - ) - retv |= file_ret - if file_ret: - print( - fmt.format(pragma=args.pragma.decode(), filename=filename), - ) - - return retv - - -if __name__ == '__main__': - raise SystemExit(main()) diff --git a/tests/fix_encoding_pragma_test.py b/tests/fix_encoding_pragma_test.py deleted file mode 100644 index 98557e9..0000000 --- a/tests/fix_encoding_pragma_test.py +++ /dev/null @@ -1,161 +0,0 @@ -from __future__ import annotations - -import io - -import pytest - -from pre_commit_hooks.fix_encoding_pragma import _normalize_pragma -from pre_commit_hooks.fix_encoding_pragma import fix_encoding_pragma -from pre_commit_hooks.fix_encoding_pragma import main - - -def test_integration_inserting_pragma(tmpdir): - path = tmpdir.join('foo.py') - path.write_binary(b'import httplib\n') - - assert main((str(path),)) == 1 - - assert path.read_binary() == ( - b'# -*- coding: utf-8 -*-\n' - b'import httplib\n' - ) - - -def test_integration_ok(tmpdir): - path = tmpdir.join('foo.py') - path.write_binary(b'# -*- coding: utf-8 -*-\nx = 1\n') - assert main((str(path),)) == 0 - - -def test_integration_remove(tmpdir): - path = tmpdir.join('foo.py') - path.write_binary(b'# -*- coding: utf-8 -*-\nx = 1\n') - - assert main((str(path), '--remove')) == 1 - - assert path.read_binary() == b'x = 1\n' - - -def test_integration_remove_ok(tmpdir): - path = tmpdir.join('foo.py') - path.write_binary(b'x = 1\n') - assert main((str(path), '--remove')) == 0 - - -@pytest.mark.parametrize( - 'input_str', - ( - b'', - ( - b'# -*- coding: utf-8 -*-\n' - b'x = 1\n' - ), - ( - b'#!/usr/bin/env python\n' - b'# -*- coding: utf-8 -*-\n' - b'foo = "bar"\n' - ), - ), -) -def test_ok_inputs(input_str): - bytesio = io.BytesIO(input_str) - assert fix_encoding_pragma(bytesio) == 0 - bytesio.seek(0) - assert bytesio.read() == input_str - - -@pytest.mark.parametrize( - ('input_str', 'output'), - ( - ( - b'import httplib\n', - b'# -*- coding: utf-8 -*-\n' - b'import httplib\n', - ), - ( - b'#!/usr/bin/env python\n' - b'x = 1\n', - b'#!/usr/bin/env python\n' - b'# -*- coding: utf-8 -*-\n' - b'x = 1\n', - ), - ( - b'#coding=utf-8\n' - b'x = 1\n', - b'# -*- coding: utf-8 -*-\n' - b'x = 1\n', - ), - ( - b'#!/usr/bin/env python\n' - b'#coding=utf8\n' - b'x = 1\n', - b'#!/usr/bin/env python\n' - b'# -*- coding: utf-8 -*-\n' - b'x = 1\n', - ), - # These should each get truncated - (b'#coding: utf-8\n', b''), - (b'# -*- coding: utf-8 -*-\n', b''), - (b'#!/usr/bin/env python\n', b''), - (b'#!/usr/bin/env python\n#coding: utf8\n', b''), - (b'#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n', b''), - ), -) -def test_not_ok_inputs(input_str, output): - bytesio = io.BytesIO(input_str) - assert fix_encoding_pragma(bytesio) == 1 - bytesio.seek(0) - assert bytesio.read() == output - - -def test_ok_input_alternate_pragma(): - input_s = b'# coding: utf-8\nx = 1\n' - bytesio = io.BytesIO(input_s) - ret = fix_encoding_pragma(bytesio, expected_pragma=b'# coding: utf-8') - assert ret == 0 - bytesio.seek(0) - assert bytesio.read() == input_s - - -def test_not_ok_input_alternate_pragma(): - bytesio = io.BytesIO(b'x = 1\n') - ret = fix_encoding_pragma(bytesio, expected_pragma=b'# coding: utf-8') - assert ret == 1 - bytesio.seek(0) - assert bytesio.read() == b'# coding: utf-8\nx = 1\n' - - -@pytest.mark.parametrize( - ('input_s', 'expected'), - ( - ('# coding: utf-8', b'# coding: utf-8'), - # trailing whitespace - ('# coding: utf-8\n', b'# coding: utf-8'), - ), -) -def test_normalize_pragma(input_s, expected): - assert _normalize_pragma(input_s) == expected - - -def test_integration_alternate_pragma(tmpdir, capsys): - f = tmpdir.join('f.py') - f.write('x = 1\n') - - pragma = '# coding: utf-8' - assert main((str(f), '--pragma', pragma)) == 1 - assert f.read() == '# coding: utf-8\nx = 1\n' - out, _ = capsys.readouterr() - assert out == f'Added `# coding: utf-8` to {str(f)}\n' - - -def test_crlf_ok(tmpdir): - f = tmpdir.join('f.py') - f.write_binary(b'# -*- coding: utf-8 -*-\r\nx = 1\r\n') - assert not main((str(f),)) - - -def test_crfl_adds(tmpdir): - f = tmpdir.join('f.py') - f.write_binary(b'x = 1\r\n') - assert main((str(f),)) - assert f.read_binary() == b'# -*- coding: utf-8 -*-\r\nx = 1\r\n'