2017-07-03 12:00:28 +08:00
|
|
|
"""Check that executable text files have a shebang."""
|
2022-01-16 08:24:05 +08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2017-07-03 12:00:28 +08:00
|
|
|
import argparse
|
2020-02-06 03:10:42 +08:00
|
|
|
import shlex
|
2017-07-03 12:00:28 +08:00
|
|
|
import sys
|
2024-10-12 07:30:07 +08:00
|
|
|
from collections.abc import Generator
|
|
|
|
from collections.abc import Sequence
|
2021-01-08 23:27:16 +08:00
|
|
|
from typing import NamedTuple
|
2017-07-03 12:00:28 +08:00
|
|
|
|
2020-05-17 11:59:08 +08:00
|
|
|
from pre_commit_hooks.util import cmd_output
|
2020-08-03 02:25:07 +08:00
|
|
|
from pre_commit_hooks.util import zsplit
|
2017-07-03 12:00:28 +08:00
|
|
|
|
2020-05-17 11:59:08 +08:00
|
|
|
EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))
|
|
|
|
|
|
|
|
|
2022-01-16 08:24:05 +08:00
|
|
|
def check_executables(paths: list[str]) -> int:
|
2022-02-19 15:51:58 +08:00
|
|
|
fs_tracks_executable_bit = cmd_output(
|
|
|
|
'git', 'config', 'core.fileMode', retcode=None,
|
|
|
|
).strip()
|
|
|
|
if fs_tracks_executable_bit == 'false': # pragma: win32 cover
|
2020-05-17 11:59:08 +08:00
|
|
|
return _check_git_filemode(paths)
|
|
|
|
else: # pragma: win32 no cover
|
|
|
|
retv = 0
|
|
|
|
for path in paths:
|
2021-01-08 23:27:16 +08:00
|
|
|
if not has_shebang(path):
|
2020-05-17 11:59:08 +08:00
|
|
|
_message(path)
|
|
|
|
retv = 1
|
|
|
|
|
|
|
|
return retv
|
|
|
|
|
|
|
|
|
2021-01-08 23:27:16 +08:00
|
|
|
class GitLsFile(NamedTuple):
|
|
|
|
mode: str
|
|
|
|
filename: str
|
|
|
|
|
|
|
|
|
2024-07-30 06:00:04 +08:00
|
|
|
def git_ls_files(paths: Sequence[str]) -> Generator[GitLsFile]:
|
Fix parsing of git output with unusual characters
On Windows, all files are "executable".
Therefore, to know if a file is supposed to be executed,
we check how its attributes were recorded by git:
we run a `git ls-files` command in a subprocess.
By default, this command outputs information
on multiple lines (file and their data separated by newlines).
When a file contains an unusual character,
the character is escaped with an integer sequence
(such as `\303\261`), and git wraps the whole filename
in double-quotes because of the backslashes.
It breaks the current code because we try to open
the filename containing the double-quotes:
it doesn't exist, of course.
Instead of trying to fix this special case by removing
the double-quotes, and breaking other cases
(a double-quote is a valid filename character on Linux),
we tell git to separate each item with the null character `\0`
instead of a new line `\n`, with the option `-z`.
With this option, git doesn't escape unusual characters
with integer sequence, so the output is fixed, and we
parse it by splitting on `\0` instead of `\n`.
Fixes #508.
2020-07-29 15:57:24 +08:00
|
|
|
outs = cmd_output('git', 'ls-files', '-z', '--stage', '--', *paths)
|
|
|
|
for out in zsplit(outs):
|
2021-01-08 23:27:16 +08:00
|
|
|
metadata, filename = out.split('\t')
|
|
|
|
mode, _, _ = metadata.split()
|
|
|
|
yield GitLsFile(mode, filename)
|
2020-05-17 11:59:08 +08:00
|
|
|
|
2021-01-08 23:27:16 +08:00
|
|
|
|
|
|
|
def _check_git_filemode(paths: Sequence[str]) -> int:
|
2022-01-16 08:24:05 +08:00
|
|
|
seen: set[str] = set()
|
2021-01-08 23:27:16 +08:00
|
|
|
for ls_file in git_ls_files(paths):
|
|
|
|
is_executable = any(b in EXECUTABLE_VALUES for b in ls_file.mode[-3:])
|
|
|
|
if is_executable and not has_shebang(ls_file.filename):
|
|
|
|
_message(ls_file.filename)
|
|
|
|
seen.add(ls_file.filename)
|
2020-05-17 11:59:08 +08:00
|
|
|
|
|
|
|
return int(bool(seen))
|
|
|
|
|
|
|
|
|
2021-01-08 23:27:16 +08:00
|
|
|
def has_shebang(path: str) -> int:
|
2017-07-03 12:00:28 +08:00
|
|
|
with open(path, 'rb') as f:
|
|
|
|
first_bytes = f.read(2)
|
|
|
|
|
2020-05-17 11:59:08 +08:00
|
|
|
return first_bytes == b'#!'
|
|
|
|
|
|
|
|
|
|
|
|
def _message(path: str) -> None:
|
|
|
|
print(
|
|
|
|
f'{path}: marked executable but has no (or invalid) shebang!\n'
|
|
|
|
f" If it isn't supposed to be executable, try: "
|
|
|
|
f'`chmod -x {shlex.quote(path)}`\n'
|
2021-11-20 01:13:37 +08:00
|
|
|
f' If on Windows, you may also need to: '
|
|
|
|
f'`git add --chmod=-x {shlex.quote(path)}`\n'
|
2020-05-17 11:59:08 +08:00
|
|
|
f' If it is supposed to be executable, double-check its shebang.',
|
|
|
|
file=sys.stderr,
|
|
|
|
)
|
2017-07-03 12:00:28 +08:00
|
|
|
|
|
|
|
|
2022-01-16 08:24:05 +08:00
|
|
|
def main(argv: Sequence[str] | None = None) -> int:
|
2017-07-03 12:00:28 +08:00
|
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
|
|
parser.add_argument('filenames', nargs='*')
|
|
|
|
args = parser.parse_args(argv)
|
|
|
|
|
2020-05-17 11:59:08 +08:00
|
|
|
return check_executables(args.filenames)
|
2019-02-01 11:19:10 +08:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2021-10-24 01:23:50 +08:00
|
|
|
raise SystemExit(main())
|