2017-06-24 07:26:00 +08:00
|
|
|
"""Sort a simple YAML file, keeping blocks of comments and definitions
|
|
|
|
together.
|
|
|
|
|
|
|
|
We assume a strict subset of YAML that looks like:
|
|
|
|
|
|
|
|
# block of header comments
|
|
|
|
# here that should always
|
|
|
|
# be at the top of the file
|
|
|
|
|
|
|
|
# optional comments
|
|
|
|
# can go here
|
|
|
|
key: value
|
|
|
|
key: value
|
|
|
|
|
|
|
|
key: value
|
|
|
|
|
|
|
|
In other words, we don't sort deeper than the top layer, and might corrupt
|
|
|
|
complicated YAML files.
|
|
|
|
"""
|
2022-01-16 08:24:05 +08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2017-06-24 07:26:00 +08:00
|
|
|
import argparse
|
2024-10-12 07:30:07 +08:00
|
|
|
from collections.abc import Sequence
|
2017-06-24 07:26:00 +08:00
|
|
|
|
|
|
|
|
|
|
|
QUOTES = ["'", '"']
|
|
|
|
|
|
|
|
|
2022-01-16 08:24:05 +08:00
|
|
|
def sort(lines: list[str]) -> list[str]:
|
2017-06-24 07:26:00 +08:00
|
|
|
"""Sort a YAML file in alphabetical order, keeping blocks together.
|
|
|
|
|
|
|
|
:param lines: array of strings (without newlines)
|
|
|
|
:return: sorted array of strings
|
|
|
|
"""
|
|
|
|
# make a copy of lines since we will clobber it
|
|
|
|
lines = list(lines)
|
|
|
|
new_lines = parse_block(lines, header=True)
|
|
|
|
|
|
|
|
for block in sorted(parse_blocks(lines), key=first_key):
|
|
|
|
if new_lines:
|
|
|
|
new_lines.append('')
|
|
|
|
new_lines.extend(block)
|
|
|
|
|
|
|
|
return new_lines
|
|
|
|
|
|
|
|
|
2022-01-16 08:24:05 +08:00
|
|
|
def parse_block(lines: list[str], header: bool = False) -> list[str]:
|
2017-06-24 07:26:00 +08:00
|
|
|
"""Parse and return a single block, popping off the start of `lines`.
|
|
|
|
|
|
|
|
If parsing a header block, we stop after we reach a line that is not a
|
|
|
|
comment. Otherwise, we stop after reaching an empty line.
|
|
|
|
|
|
|
|
:param lines: list of lines
|
|
|
|
:param header: whether we are parsing a header block
|
|
|
|
:return: list of lines that form the single block
|
|
|
|
"""
|
|
|
|
block_lines = []
|
|
|
|
while lines and lines[0] and (not header or lines[0].startswith('#')):
|
|
|
|
block_lines.append(lines.pop(0))
|
|
|
|
return block_lines
|
|
|
|
|
|
|
|
|
2022-01-16 08:24:05 +08:00
|
|
|
def parse_blocks(lines: list[str]) -> list[list[str]]:
|
2017-06-24 07:26:00 +08:00
|
|
|
"""Parse and return all possible blocks, popping off the start of `lines`.
|
|
|
|
|
|
|
|
:param lines: list of lines
|
|
|
|
:return: list of blocks, where each block is a list of lines
|
|
|
|
"""
|
|
|
|
blocks = []
|
|
|
|
|
|
|
|
while lines:
|
|
|
|
if lines[0] == '':
|
|
|
|
lines.pop(0)
|
|
|
|
else:
|
|
|
|
blocks.append(parse_block(lines))
|
|
|
|
|
|
|
|
return blocks
|
|
|
|
|
|
|
|
|
2022-01-16 08:24:05 +08:00
|
|
|
def first_key(lines: list[str]) -> str:
|
2017-06-24 07:26:00 +08:00
|
|
|
"""Returns a string representing the sort key of a block.
|
|
|
|
|
|
|
|
The sort key is the first YAML key we encounter, ignoring comments, and
|
|
|
|
stripping leading quotes.
|
|
|
|
|
|
|
|
>>> print(test)
|
|
|
|
# some comment
|
|
|
|
'foo': true
|
|
|
|
>>> first_key(test)
|
|
|
|
'foo'
|
|
|
|
"""
|
|
|
|
for line in lines:
|
|
|
|
if line.startswith('#'):
|
|
|
|
continue
|
|
|
|
if any(line.startswith(quote) for quote in QUOTES):
|
|
|
|
return line[1:]
|
|
|
|
return line
|
2019-02-01 11:19:10 +08:00
|
|
|
else:
|
|
|
|
return '' # not actually reached in reality
|
2017-06-24 07:26:00 +08:00
|
|
|
|
|
|
|
|
2022-01-16 08:24:05 +08:00
|
|
|
def main(argv: Sequence[str] | None = None) -> int:
|
2017-06-24 07:26:00 +08:00
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
|
|
|
|
args = parser.parse_args(argv)
|
|
|
|
|
|
|
|
retval = 0
|
|
|
|
|
|
|
|
for filename in args.filenames:
|
|
|
|
with open(filename, 'r+') as f:
|
|
|
|
lines = [line.rstrip() for line in f.readlines()]
|
|
|
|
new_lines = sort(lines)
|
|
|
|
|
|
|
|
if lines != new_lines:
|
2020-02-06 03:10:42 +08:00
|
|
|
print(f'Fixing file `{filename}`')
|
2017-06-24 07:26:00 +08:00
|
|
|
f.seek(0)
|
2019-02-12 11:57:37 +08:00
|
|
|
f.write('\n'.join(new_lines) + '\n')
|
2017-06-24 07:26:00 +08:00
|
|
|
f.truncate()
|
|
|
|
retval = 1
|
|
|
|
|
|
|
|
return retval
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2021-10-24 01:23:50 +08:00
|
|
|
raise SystemExit(main())
|