pre-commit-hooks/pre_commit_hooks/string_fixer.py

from __future__ import annotations

import argparse
import io
import re
import sys
import tokenize
from collections.abc import Sequence

if sys.version_info >= (3, 12):  # pragma: >=3.12 cover
    FSTRING_START = tokenize.FSTRING_START
    FSTRING_END = tokenize.FSTRING_END
else:  # pragma: <3.12 cover
    FSTRING_START = FSTRING_END = -1

START_QUOTE_RE = re.compile('^[a-zA-Z]*"')


def handle_match(token_text: str) -> str:
    if '"""' in token_text or "'''" in token_text:
        return token_text

    match = START_QUOTE_RE.match(token_text)
    if match is not None:
        meat = token_text[match.end():-1]
        if '"' in meat or "'" in meat:
            return token_text
        else:
            return match.group().replace('"', "'") + meat + "'"
    else:
        return token_text


def get_line_offsets_by_line_no(src: str) -> list[int]:
    # Padded so we can index with line number
    offsets = [-1, 0]
    for line in src.splitlines(True):
        offsets.append(offsets[-1] + len(line))
    return offsets


def fix_strings(filename: str) -> int:
    with open(filename, encoding='UTF-8', newline='') as f:
        contents = f.read()
    line_offsets = get_line_offsets_by_line_no(contents)

    # Basically a mutable string
    splitcontents = list(contents)

    fstring_depth = 0

    # Iterate in reverse so the offsets are always correct
    tokens_l = list(tokenize.generate_tokens(io.StringIO(contents).readline))
    tokens = reversed(tokens_l)
    for token_type, token_text, (srow, scol), (erow, ecol), _ in tokens:
        if token_type == FSTRING_START:  # pragma: >=3.12 cover
            fstring_depth += 1
        elif token_type == FSTRING_END:  # pragma: >=3.12 cover
            fstring_depth -= 1
        elif fstring_depth == 0 and token_type == tokenize.STRING:
            new_text = handle_match(token_text)
            splitcontents[
                line_offsets[srow] + scol:
                line_offsets[erow] + ecol
            ] = new_text

    new_contents = ''.join(splitcontents)
    if contents != new_contents:
        with open(filename, 'w', encoding='UTF-8', newline='') as f:
            f.write(new_contents)
        return 1
    else:
        return 0


def main(argv: Sequence[str] | None = None) -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
    args = parser.parse_args(argv)

    retv = 0

    for filename in args.filenames:
        return_value = fix_strings(filename)
        if return_value != 0:
            print(f'Fixing strings in {filename}')
        retv |= return_value

    return retv


if __name__ == '__main__':
    raise SystemExit(main())
drop python3.6 support python 3.6 reached end of life on 2021-12-23 Committed via https://github.com/asottile/all-repos 2022-01-16 08:24:05 +08:00			`from __future__ import annotations`

implemented a basic double quote string fixer 2015-02-06 11:58:20 +08:00			`import argparse`
Use the tokenizer for great success 2015-02-11 00:14:53 +08:00			`import io`
Apply typing to all of pre-commit-hooks 2019-02-01 11:19:10 +08:00			`import re`
don't rewrite string quotes inside f-strings 2023-10-08 01:50:33 +08:00			`import sys`
implemented a basic double quote string fixer 2015-02-06 11:58:20 +08:00			`import tokenize`
py39+ Committed via https://github.com/asottile/all-repos 2024-10-12 07:30:07 +08:00			`from collections.abc import Sequence`
implemented a basic double quote string fixer 2015-02-06 11:58:20 +08:00
don't rewrite string quotes inside f-strings 2023-10-08 01:50:33 +08:00			`if sys.version_info >= (3, 12): # pragma: >=3.12 cover`
			`FSTRING_START = tokenize.FSTRING_START`
			`FSTRING_END = tokenize.FSTRING_END`
			`else: # pragma: <3.12 cover`
			`FSTRING_START = FSTRING_END = -1`

Apply typing to all of pre-commit-hooks 2019-02-01 11:19:10 +08:00			`START_QUOTE_RE = re.compile('^[a-zA-Z]*"')`
implemented a basic double quote string fixer 2015-02-06 11:58:20 +08:00

pre-commit-hooks: python3.6+ 2020-02-06 03:10:42 +08:00			`def handle_match(token_text: str) -> str:`
Use the tokenizer for great success 2015-02-11 00:14:53 +08:00			`if '"""' in token_text or "'''" in token_text:`
			`return token_text`
implemented a basic double quote string fixer 2015-02-06 11:58:20 +08:00
Apply typing to all of pre-commit-hooks 2019-02-01 11:19:10 +08:00			`match = START_QUOTE_RE.match(token_text)`
			`if match is not None:`
			`meat = token_text[match.end():-1]`
			`if '"' in meat or "'" in meat:`
			`return token_text`
			`else:`
			`return match.group().replace('"', "'") + meat + "'"`
			`else:`
			`return token_text`
Use the tokenizer for great success 2015-02-11 00:14:53 +08:00

drop python3.6 support python 3.6 reached end of life on 2021-12-23 Committed via https://github.com/asottile/all-repos 2022-01-16 08:24:05 +08:00			`def get_line_offsets_by_line_no(src: str) -> list[int]:`
Use the tokenizer for great success 2015-02-11 00:14:53 +08:00			`# Padded so we can index with line number`
Apply typing to all of pre-commit-hooks 2019-02-01 11:19:10 +08:00			`offsets = [-1, 0]`
Fix crlf line endings for double-quote-string-fixer 2019-05-17 00:42:04 +08:00			`for line in src.splitlines(True):`
			`offsets.append(offsets[-1] + len(line))`
Use the tokenizer for great success 2015-02-11 00:14:53 +08:00			`return offsets`
implemented a basic double quote string fixer 2015-02-06 11:58:20 +08:00

pre-commit-hooks: python3.6+ 2020-02-06 03:10:42 +08:00			`def fix_strings(filename: str) -> int:`
			`with open(filename, encoding='UTF-8', newline='') as f:`
Fix resource warnings 2018-06-18 15:00:38 +08:00			`contents = f.read()`
Use the tokenizer for great success 2015-02-11 00:14:53 +08:00			`line_offsets = get_line_offsets_by_line_no(contents)`

			`# Basically a mutable string`
			`splitcontents = list(contents)`

don't rewrite string quotes inside f-strings 2023-10-08 01:50:33 +08:00			`fstring_depth = 0`

Use the tokenizer for great success 2015-02-11 00:14:53 +08:00			`# Iterate in reverse so the offsets are always correct`
Fix CI by upgrading AP templates 2020-02-04 00:41:48 +08:00			`tokens_l = list(tokenize.generate_tokens(io.StringIO(contents).readline))`
			`tokens = reversed(tokens_l)`
Use the tokenizer for great success 2015-02-11 00:14:53 +08:00			`for token_type, token_text, (srow, scol), (erow, ecol), _ in tokens:`
don't rewrite string quotes inside f-strings 2023-10-08 01:50:33 +08:00			`if token_type == FSTRING_START: # pragma: >=3.12 cover`
			`fstring_depth += 1`
			`elif token_type == FSTRING_END: # pragma: >=3.12 cover`
			`fstring_depth -= 1`
			`elif fstring_depth == 0 and token_type == tokenize.STRING:`
Use the tokenizer for great success 2015-02-11 00:14:53 +08:00			`new_text = handle_match(token_text)`
			`splitcontents[`
			`line_offsets[srow] + scol:`
			`line_offsets[erow] + ecol`
			`] = new_text`

			`new_contents = ''.join(splitcontents)`
			`if contents != new_contents:`
pre-commit-hooks: python3.6+ 2020-02-06 03:10:42 +08:00			`with open(filename, 'w', encoding='UTF-8', newline='') as f:`
Fix crlf line endings for double-quote-string-fixer 2019-05-17 00:42:04 +08:00			`f.write(new_contents)`
Use the tokenizer for great success 2015-02-11 00:14:53 +08:00			`return 1`
			`else:`
			`return 0`
implemented a basic double quote string fixer 2015-02-06 11:58:20 +08:00

drop python3.6 support python 3.6 reached end of life on 2021-12-23 Committed via https://github.com/asottile/all-repos 2022-01-16 08:24:05 +08:00			`def main(argv: Sequence[str] \| None = None) -> int:`
implemented a basic double quote string fixer 2015-02-06 11:58:20 +08:00			`parser = argparse.ArgumentParser()`
			`parser.add_argument('filenames', nargs='*', help='Filenames to fix')`
			`args = parser.parse_args(argv)`

			`retv = 0`

			`for filename in args.filenames:`
			`return_value = fix_strings(filename)`
			`if return_value != 0:`
pre-commit-hooks: python3.6+ 2020-02-06 03:10:42 +08:00			`print(f'Fixing strings in {filename}')`
implemented a basic double quote string fixer 2015-02-06 11:58:20 +08:00			`retv \|= return_value`

			`return retv`
Apply typing to all of pre-commit-hooks 2019-02-01 11:19:10 +08:00

			`if __name__ == '__main__':`
replace exit(main()) with raise SystemExit(main()) Committed via https://github.com/asottile/all-repos 2021-10-24 01:23:50 +08:00			`raise SystemExit(main())`