fluent_widgets_pyside6/qfluentwidgets/common/auto_wrap.py

from enum import Enum, auto
from functools import lru_cache
from re import sub
from typing import List, Optional, Tuple
from unicodedata import east_asian_width


class CharType(Enum):
    SPACE = auto()
    ASIAN = auto()
    LATIN = auto()


class TextWrap:
    """Text wrap"""

    EAST_ASAIN_WIDTH_TABLE = {
        "F": 2,
        "H": 1,
        "W": 2,
        "A": 1,
        "N": 1,
        "Na": 1,
    }

    @classmethod
    @lru_cache(maxsize=128)
    def get_width(cls, char: str) -> int:
        """Returns the width of the char"""
        return cls.EAST_ASAIN_WIDTH_TABLE.get(east_asian_width(char), 1)

    @classmethod
    @lru_cache(maxsize=32)
    def get_text_width(cls, text: str) -> int:
        """Returns the width of the text"""
        return sum(cls.get_width(char) for char in text)

    @classmethod
    @lru_cache(maxsize=128)
    def get_char_type(cls, char: str) -> CharType:
        """Returns the type of the char"""

        if char.isspace():
            return CharType.SPACE

        if cls.get_width(char) == 1:
            return CharType.LATIN

        return CharType.ASIAN

    @classmethod
    def process_text_whitespace(cls, text: str) -> str:
        """Process whitespace and leading and trailing spaces in strings"""
        return sub(pattern=r"\s+", repl=" ", string=text).strip()

    @classmethod
    @lru_cache(maxsize=32)
    def split_long_token(cls, token: str, width: int) -> List[str]:
        """Split long token into smaller chunks."""
        return [token[i : i + width] for i in range(0, len(token), width)]

    @classmethod
    def tokenizer(cls, text: str):
        """tokenize line"""

        buffer = ""
        last_char_type: Optional[CharType] = None

        for char in text:
            char_type = cls.get_char_type(char)

            if buffer and (char_type != last_char_type or char_type != CharType.LATIN):
                yield buffer
                buffer = ""

            buffer += char
            last_char_type = char_type

        yield buffer

    @classmethod
    def wrap(cls, text: str, width: int, once: bool = True) -> Tuple[str, bool]:
        """Wrap according to string length

        Parameters
        ----------
        text: str
            the text to be wrapped

        width: int
            the maximum length of a single line, the length of Chinese characters is 2

        once: bool
            whether to wrap only once

        Returns
        -------
        wrap_text: str
            text after auto word wrap process

        is_wrapped: bool
            whether a line break occurs in the text
        """

        width = int(width)
        lines = text.splitlines()
        is_wrapped = False
        wrapped_lines = []

        for line in lines:
            line = cls.process_text_whitespace(line)

            if cls.get_text_width(line) > width:
                wrapped_line, is_wrapped = cls._wrap_line(line, width, once)
                wrapped_lines.append(wrapped_line)

                if once:
                    wrapped_lines.append(text[len(wrapped_line) :].rstrip())
                    return "".join(wrapped_lines), is_wrapped

            else:
                wrapped_lines.append(line)

        return "\n".join(wrapped_lines), is_wrapped

    @classmethod
    def _wrap_line(cls, text: str, width: int, once: bool = True) -> Tuple[str, bool]:
        line_buffer = ""
        wrapped_lines = []
        current_width = 0

        for token in cls.tokenizer(text):
            token_width = cls.get_text_width(token)

            if token == " " and current_width == 0:
                continue

            if current_width + token_width <= width:
                line_buffer += token
                current_width += token_width

                if current_width == width:
                    wrapped_lines.append(line_buffer.rstrip())
                    line_buffer = ""
                    current_width = 0
            else:
                if current_width != 0:
                    wrapped_lines.append(line_buffer.rstrip())

                chunks = cls.split_long_token(token, width)

                for chunk in chunks[:-1]:
                    wrapped_lines.append(chunk.rstrip())

                line_buffer = chunks[-1]
                current_width = cls.get_text_width(chunks[-1])

        if current_width != 0:
            wrapped_lines.append(line_buffer.rstrip())

        if once:
            return "\n".join([wrapped_lines[0], " ".join(wrapped_lines[1:])]), True

        return "\n".join(wrapped_lines), True