pre-commit-hooks/pre_commit_hooks/requirements_txt_fixer.py

166 lines
4.7 KiB
Python
Raw Permalink Normal View History

from __future__ import annotations
2014-12-17 04:22:37 +08:00
import argparse
2020-05-17 11:58:27 +08:00
import re
from collections.abc import Sequence
from typing import IO
2014-12-17 04:22:37 +08:00
2017-06-26 01:14:58 +08:00
PASS = 0
FAIL = 1
2020-02-06 03:10:42 +08:00
class Requirement:
2020-05-17 11:58:27 +08:00
UNTIL_COMPARISON = re.compile(b'={2,3}|!=|~=|>=?|<=?')
UNTIL_SEP = re.compile(rb'[^;\s]+')
2020-02-06 03:10:42 +08:00
def __init__(self) -> None:
self.value: bytes | None = None
self.comments: list[bytes] = []
2014-12-17 04:22:37 +08:00
@property
2020-02-06 03:10:42 +08:00
def name(self) -> bytes:
assert self.value is not None, self.value
2020-05-17 11:58:27 +08:00
name = self.value.lower()
for egg in (b'#egg=', b'&egg='):
if egg in self.value:
2020-05-17 11:58:27 +08:00
return name.partition(egg)[-1]
m = self.UNTIL_SEP.match(name)
assert m is not None
name = m.group()
m = self.UNTIL_COMPARISON.search(name)
if not m:
return name
2020-05-17 11:58:27 +08:00
return name[:m.start()]
def __lt__(self, requirement: Requirement) -> bool:
2014-12-17 04:22:37 +08:00
# \n means top of file comment, so always return True,
# otherwise just do a string comparison with value.
assert self.value is not None, self.value
2014-12-17 04:22:37 +08:00
if self.value == b'\n':
return True
elif requirement.value == b'\n':
return False
else:
2024-02-21 21:22:08 +08:00
# if 2 requirements have the same name, the one with comments
# needs to go first (so that when removing duplicates, the one
# with comments is kept)
if self.name == requirement.name:
return bool(self.comments) > bool(requirement.comments)
return self.name < requirement.name
2014-12-17 04:22:37 +08:00
2020-05-08 04:02:12 +08:00
def is_complete(self) -> bool:
return (
self.value is not None and
not self.value.rstrip(b'\r\n').endswith(b'\\')
)
def append_value(self, value: bytes) -> None:
if self.value is not None:
self.value += value
else:
self.value = value
2014-12-17 04:22:37 +08:00
2020-02-06 03:10:42 +08:00
def fix_requirements(f: IO[bytes]) -> int:
requirements: list[Requirement] = []
2019-09-29 03:16:20 +08:00
before = list(f)
after: list[bytes] = []
2014-12-17 04:22:37 +08:00
before_string = b''.join(before)
# adds new line in case one is missing
# AND a change to the requirements file is needed regardless:
if before and not before[-1].endswith(b'\n'):
before[-1] += b'\n'
# If the file is empty (i.e. only whitespace/newlines) exit early
if before_string.strip() == b'':
2017-06-26 01:14:58 +08:00
return PASS
2014-12-17 04:22:37 +08:00
for line in before:
# If the most recent requirement object has a value, then it's
# time to start building the next requirement object.
2020-05-08 04:02:12 +08:00
if not len(requirements) or requirements[-1].is_complete():
2014-12-17 04:22:37 +08:00
requirements.append(Requirement())
requirement = requirements[-1]
# If we see a newline before any requirements, then this is a
# top of file comment.
2014-12-17 04:22:37 +08:00
if len(requirements) == 1 and line.strip() == b'':
2019-02-12 11:56:15 +08:00
if (
len(requirement.comments) and
requirement.comments[0].startswith(b'#')
):
2014-12-17 04:22:37 +08:00
requirement.value = b'\n'
else:
requirement.comments.append(line)
elif line.lstrip().startswith(b'#') or line.strip() == b'':
2014-12-17 04:22:37 +08:00
requirement.comments.append(line)
else:
2020-05-08 04:02:12 +08:00
requirement.append_value(line)
2014-12-17 04:22:37 +08:00
# if a file ends in a comment, preserve it at the end
if requirements[-1].value is None:
rest = requirements.pop().comments
else:
rest = []
2018-03-26 06:02:23 +08:00
# find and remove pkg-resources==0.0.0
# which is automatically added by broken pip package under Debian
2018-03-26 06:17:13 +08:00
requirements = [
req for req in requirements
if req.value not in [
b'pkg-resources==0.0.0\n',
b'pkg_resources==0.0.0\n',
]
2018-03-26 06:17:13 +08:00
]
2024-02-21 21:22:08 +08:00
# sort the requirements and remove duplicates
prev = None
2014-12-17 04:22:37 +08:00
for requirement in sorted(requirements):
2017-06-26 01:14:58 +08:00
after.extend(requirement.comments)
assert requirement.value, requirement.value
2024-02-21 21:22:08 +08:00
if prev is None or requirement.value != prev.value:
after.append(requirement.value)
prev = requirement
after.extend(rest)
2014-12-17 04:22:37 +08:00
after_string = b''.join(after)
if before_string == after_string:
2017-06-26 01:14:58 +08:00
return PASS
2014-12-17 04:22:37 +08:00
else:
f.seek(0)
f.write(after_string)
f.truncate()
2017-06-26 01:14:58 +08:00
return FAIL
2014-12-17 04:22:37 +08:00
def main(argv: Sequence[str] | None = None) -> int:
2014-12-17 04:22:37 +08:00
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
2017-06-26 01:14:58 +08:00
retv = PASS
2014-12-17 04:22:37 +08:00
for arg in args.filenames:
with open(arg, 'rb+') as file_obj:
2015-01-13 03:03:11 +08:00
ret_for_file = fix_requirements(file_obj)
if ret_for_file:
2020-02-06 03:10:42 +08:00
print(f'Sorting {arg}')
2015-01-13 03:03:11 +08:00
retv |= ret_for_file
2014-12-17 04:22:37 +08:00
return retv
if __name__ == '__main__':
raise SystemExit(main())