Fix check-added-large-files --enforce-all to correctly consider all git-lfs files.

`git lfs status` only outputs status for files that are pending some git-lfs related operation.
For usage with --enforce-all, we need the list of all files that are tracked, which can be
achived by `git lfs ls-files`.

Fixes: https://github.com/pre-commit/pre-commit-hooks/issues/560
This commit is contained in:
Alex Martani 2021-10-21 15:29:54 -07:00 committed by Anthony Sottile
parent b8d76787ff
commit 03a65ca357
2 changed files with 35 additions and 11 deletions

View File

@ -1,24 +1,33 @@
import argparse
import json
import math
import os
import subprocess
from typing import Optional
from typing import Sequence
from typing import Set
from pre_commit_hooks.util import added_files
from pre_commit_hooks.util import CalledProcessError
from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit
def lfs_files() -> Set[str]:
try:
# Introduced in git-lfs 2.2.0, first working in 2.2.1
lfs_ret = cmd_output('git', 'lfs', 'status', '--json')
except CalledProcessError: # pragma: no cover (with git-lfs)
lfs_ret = '{"files":{}}'
def filter_lfs_files(filenames: Set[str]) -> None: # pragma: no cover (lfs)
"""Remove files tracked by git-lfs from the set."""
if not filenames:
return
return set(json.loads(lfs_ret)['files'])
check_attr = subprocess.run(
('git', 'check-attr', 'filter', '-z', '--stdin'),
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
encoding='utf-8',
check=True,
input='\0'.join(filenames),
)
stdout = zsplit(check_attr.stdout)
for i in range(0, len(stdout), 3):
filename, filter_tag = stdout[i], stdout[i + 2]
if filter_tag == 'lfs':
filenames.remove(filename)
def find_large_added_files(
@ -30,7 +39,9 @@ def find_large_added_files(
# Find all added files that are also in the list of files pre-commit tells
# us about
retv = 0
filenames_filtered = set(filenames) - lfs_files()
filenames_filtered = set(filenames)
filter_lfs_files(filenames_filtered)
if not enforce_all:
filenames_filtered &= added_files()

View File

@ -121,3 +121,16 @@ def test_enforce_allows_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover
cmd_output('git', 'add', '--', '.')
# With --enforce-all large files on git lfs should succeed
assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0
@xfailif_no_gitlfs # pragma: no cover
def test_enforce_allows_gitlfs_after_commit(temp_git_dir, monkeypatch):
with temp_git_dir.as_cwd():
monkeypatch.setenv('HOME', str(temp_git_dir))
cmd_output('git', 'lfs', 'install')
temp_git_dir.join('f.py').write('a' * 10000)
cmd_output('git', 'lfs', 'track', 'f.py')
cmd_output('git', 'add', '--', '.')
git_commit('-am', 'foo')
# With --enforce-all large files on git lfs should succeed
assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0