165 lines
4.5 KiB
Python
165 lines
4.5 KiB
Python
from enum import Enum, auto
|
|
from functools import lru_cache
|
|
from re import sub
|
|
from typing import List, Optional, Tuple
|
|
from unicodedata import east_asian_width
|
|
|
|
|
|
class CharType(Enum):
|
|
SPACE = auto()
|
|
ASIAN = auto()
|
|
LATIN = auto()
|
|
|
|
|
|
class TextWrap:
|
|
"""Text wrap"""
|
|
|
|
EAST_ASAIN_WIDTH_TABLE = {
|
|
"F": 2,
|
|
"H": 1,
|
|
"W": 2,
|
|
"A": 1,
|
|
"N": 1,
|
|
"Na": 1,
|
|
}
|
|
|
|
@classmethod
|
|
@lru_cache(maxsize=128)
|
|
def get_width(cls, char: str) -> int:
|
|
"""Returns the width of the char"""
|
|
return cls.EAST_ASAIN_WIDTH_TABLE.get(east_asian_width(char), 1)
|
|
|
|
@classmethod
|
|
@lru_cache(maxsize=32)
|
|
def get_text_width(cls, text: str) -> int:
|
|
"""Returns the width of the text"""
|
|
return sum(cls.get_width(char) for char in text)
|
|
|
|
@classmethod
|
|
@lru_cache(maxsize=128)
|
|
def get_char_type(cls, char: str) -> CharType:
|
|
"""Returns the type of the char"""
|
|
|
|
if char.isspace():
|
|
return CharType.SPACE
|
|
|
|
if cls.get_width(char) == 1:
|
|
return CharType.LATIN
|
|
|
|
return CharType.ASIAN
|
|
|
|
@classmethod
|
|
def process_text_whitespace(cls, text: str) -> str:
|
|
"""Process whitespace and leading and trailing spaces in strings"""
|
|
return sub(pattern=r"\s+", repl=" ", string=text).strip()
|
|
|
|
@classmethod
|
|
@lru_cache(maxsize=32)
|
|
def split_long_token(cls, token: str, width: int) -> List[str]:
|
|
"""Split long token into smaller chunks."""
|
|
return [token[i : i + width] for i in range(0, len(token), width)]
|
|
|
|
@classmethod
|
|
def tokenizer(cls, text: str):
|
|
"""tokenize line"""
|
|
|
|
buffer = ""
|
|
last_char_type: Optional[CharType] = None
|
|
|
|
for char in text:
|
|
char_type = cls.get_char_type(char)
|
|
|
|
if buffer and (char_type != last_char_type or char_type != CharType.LATIN):
|
|
yield buffer
|
|
buffer = ""
|
|
|
|
buffer += char
|
|
last_char_type = char_type
|
|
|
|
yield buffer
|
|
|
|
@classmethod
|
|
def wrap(cls, text: str, width: int, once: bool = True) -> Tuple[str, bool]:
|
|
"""Wrap according to string length
|
|
|
|
Parameters
|
|
----------
|
|
text: str
|
|
the text to be wrapped
|
|
|
|
width: int
|
|
the maximum length of a single line, the length of Chinese characters is 2
|
|
|
|
once: bool
|
|
whether to wrap only once
|
|
|
|
Returns
|
|
-------
|
|
wrap_text: str
|
|
text after auto word wrap process
|
|
|
|
is_wrapped: bool
|
|
whether a line break occurs in the text
|
|
"""
|
|
|
|
width = int(width)
|
|
lines = text.splitlines()
|
|
is_wrapped = False
|
|
wrapped_lines = []
|
|
|
|
for line in lines:
|
|
line = cls.process_text_whitespace(line)
|
|
|
|
if cls.get_text_width(line) > width:
|
|
wrapped_line, is_wrapped = cls._wrap_line(line, width, once)
|
|
wrapped_lines.append(wrapped_line)
|
|
|
|
if once:
|
|
wrapped_lines.append(text[len(wrapped_line) :].rstrip())
|
|
return "".join(wrapped_lines), is_wrapped
|
|
|
|
else:
|
|
wrapped_lines.append(line)
|
|
|
|
return "\n".join(wrapped_lines), is_wrapped
|
|
|
|
@classmethod
|
|
def _wrap_line(cls, text: str, width: int, once: bool = True) -> Tuple[str, bool]:
|
|
line_buffer = ""
|
|
wrapped_lines = []
|
|
current_width = 0
|
|
|
|
for token in cls.tokenizer(text):
|
|
token_width = cls.get_text_width(token)
|
|
|
|
if token == " " and current_width == 0:
|
|
continue
|
|
|
|
if current_width + token_width <= width:
|
|
line_buffer += token
|
|
current_width += token_width
|
|
|
|
if current_width == width:
|
|
wrapped_lines.append(line_buffer.rstrip())
|
|
line_buffer = ""
|
|
current_width = 0
|
|
else:
|
|
if current_width != 0:
|
|
wrapped_lines.append(line_buffer.rstrip())
|
|
|
|
chunks = cls.split_long_token(token, width)
|
|
|
|
for chunk in chunks[:-1]:
|
|
wrapped_lines.append(chunk.rstrip())
|
|
|
|
line_buffer = chunks[-1]
|
|
current_width = cls.get_text_width(chunks[-1])
|
|
|
|
if current_width != 0:
|
|
wrapped_lines.append(line_buffer.rstrip())
|
|
|
|
if once:
|
|
return "\n".join([wrapped_lines[0], " ".join(wrapped_lines[1:])]), True
|
|
|
|
return "\n".join(wrapped_lines), True
|