Fix #518, provide --enforce-all option to check_added_large_files

The --enforce-all option when provided ensures that all files passed
on the command line are checked against the size limit.  Default
behaviour remains unchanged.
This commit is contained in:
Marcus Shawcroft 2020-09-16 06:26:11 +01:00 committed by Anthony Sottile
parent 31d41ff291
commit 012bb0691f
3 changed files with 47 additions and 4 deletions

View File

@ -26,8 +26,11 @@ Add this to your `.pre-commit-config.yaml`
#### `check-added-large-files`
Prevent giant files from being committed.
- Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB).
- Limits checked files to those indicated as staged for addition by git.
- If `git-lfs` is installed, lfs files will be skipped
(requires `git-lfs>=2.2.1`)
- `--enforce-all` - Check all listed files not just those staged for
addition.
#### `check-ast`
Simply check whether files parse as valid python.

View File

@ -21,11 +21,20 @@ def lfs_files() -> Set[str]:
return set(json.loads(lfs_ret)['files'])
def find_large_added_files(filenames: Sequence[str], maxkb: int) -> int:
def find_large_added_files(
filenames: Sequence[str],
maxkb: int,
*,
enforce_all: bool = False,
) -> int:
# Find all added files that are also in the list of files pre-commit tells
# us about
retv = 0
for filename in (added_files() & set(filenames)) - lfs_files():
filenames_filtered = set(filenames) - lfs_files()
if not enforce_all:
filenames_filtered &= added_files()
for filename in filenames_filtered:
kb = int(math.ceil(os.stat(filename).st_size / 1024))
if kb > maxkb:
print(f'{filename} ({kb} KB) exceeds {maxkb} KB.')
@ -40,13 +49,21 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
'filenames', nargs='*',
help='Filenames pre-commit believes are changed.',
)
parser.add_argument(
'--enforce-all', action='store_true',
help='Enforce all files are checked, not just staged files.',
)
parser.add_argument(
'--maxkb', type=int, default=500,
help='Maxmimum allowable KB for added files',
)
args = parser.parse_args(argv)
return find_large_added_files(args.filenames, args.maxkb)
return find_large_added_files(
args.filenames,
args.maxkb,
enforce_all=args.enforce_all,
)
if __name__ == '__main__':

View File

@ -40,6 +40,17 @@ def test_add_something_giant(temp_git_dir):
assert find_large_added_files(['f.py'], 10) == 0
def test_enforce_all(temp_git_dir):
with temp_git_dir.as_cwd():
temp_git_dir.join('f.py').write('a' * 10000)
# Should fail, when not staged with enforce_all
assert find_large_added_files(['f.py'], 0, enforce_all=True) == 1
# Should pass, when not staged without enforce_all
assert find_large_added_files(['f.py'], 0, enforce_all=False) == 0
def test_added_file_not_in_pre_commits_list(temp_git_dir):
with temp_git_dir.as_cwd():
temp_git_dir.join('f.py').write("print('hello world')")
@ -97,3 +108,15 @@ def test_moves_with_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover
# Now move it and make sure the hook still succeeds
cmd_output('git', 'mv', 'a.bin', 'b.bin')
assert main(('--maxkb', '9', 'b.bin')) == 0
@xfailif_no_gitlfs
def test_enforce_allows_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover
with temp_git_dir.as_cwd():
monkeypatch.setenv('HOME', str(temp_git_dir))
cmd_output('git', 'lfs', 'install')
temp_git_dir.join('f.py').write('a' * 10000)
cmd_output('git', 'lfs', 'track', 'f.py')
cmd_output('git', 'add', '--', '.')
# With --enforce-all large files on git lfs should succeed
assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0