blob: e824ea2a6df91e2a6af08e5d5cc2f6703f8853d8 [file] [log] [blame]
Yi Kong878f9942023-12-13 12:55:04 +09001from functools import lru_cache
2import re
3from typing import Dict, List
4
5from ._cell_widths import CELL_WIDTHS
6from ._lru_cache import LRUCache
7
8# Regex to match sequence of the most common character ranges
9_is_single_cell_widths = re.compile("^[\u0020-\u006f\u00a0\u02ff\u0370-\u0482]*$").match
10
11
12def cell_len(text: str, _cache: Dict[str, int] = LRUCache(1024 * 4)) -> int:
13 """Get the number of cells required to display text.
14
15 Args:
16 text (str): Text to display.
17
18 Returns:
19 int: Get the number of cells required to display text.
20 """
21
22 if _is_single_cell_widths(text):
23 return len(text)
24 else:
25 cached_result = _cache.get(text, None)
26 if cached_result is not None:
27 return cached_result
28 _get_size = get_character_cell_size
29 total_size = sum(_get_size(character) for character in text)
30 if len(text) <= 64:
31 _cache[text] = total_size
32 return total_size
33
34
35@lru_cache(maxsize=4096)
36def get_character_cell_size(character: str) -> int:
37 """Get the cell size of a character.
38
39 Args:
40 character (str): A single character.
41
42 Returns:
43 int: Number of cells (0, 1 or 2) occupied by that character.
44 """
45 if _is_single_cell_widths(character):
46 return 1
47
48 return _get_codepoint_cell_size(ord(character))
49
50
51@lru_cache(maxsize=4096)
52def _get_codepoint_cell_size(codepoint: int) -> int:
53 """Get the cell size of a character.
54
55 Args:
56 character (str): A single character.
57
58 Returns:
59 int: Number of cells (0, 1 or 2) occupied by that character.
60 """
61
62 _table = CELL_WIDTHS
63 lower_bound = 0
64 upper_bound = len(_table) - 1
65 index = (lower_bound + upper_bound) // 2
66 while True:
67 start, end, width = _table[index]
68 if codepoint < start:
69 upper_bound = index - 1
70 elif codepoint > end:
71 lower_bound = index + 1
72 else:
73 return 0 if width == -1 else width
74 if upper_bound < lower_bound:
75 break
76 index = (lower_bound + upper_bound) // 2
77 return 1
78
79
80def set_cell_size(text: str, total: int) -> str:
81 """Set the length of a string to fit within given number of cells."""
82
83 if _is_single_cell_widths(text):
84 size = len(text)
85 if size < total:
86 return text + " " * (total - size)
87 return text[:total]
88
89 if not total:
90 return ""
91 cell_size = cell_len(text)
92 if cell_size == total:
93 return text
94 if cell_size < total:
95 return text + " " * (total - cell_size)
96
97 start = 0
98 end = len(text)
99
100 # Binary search until we find the right size
101 while True:
102 pos = (start + end) // 2
103 before = text[: pos + 1]
104 before_len = cell_len(before)
105 if before_len == total + 1 and cell_len(before[-1]) == 2:
106 return before[:-1] + " "
107 if before_len == total:
108 return before
109 if before_len > total:
110 end = pos
111 else:
112 start = pos
113
114
115# TODO: This is inefficient
116# TODO: This might not work with CWJ type characters
117def chop_cells(text: str, max_size: int, position: int = 0) -> List[str]:
118 """Break text in to equal (cell) length strings."""
119 _get_character_cell_size = get_character_cell_size
120 characters = [
121 (character, _get_character_cell_size(character)) for character in text
122 ][::-1]
123 total_size = position
124 lines: List[List[str]] = [[]]
125 append = lines[-1].append
126
127 pop = characters.pop
128 while characters:
129 character, size = pop()
130 if total_size + size > max_size:
131 lines.append([character])
132 append = lines[-1].append
133 total_size = size
134 else:
135 total_size += size
136 append(character)
137 return ["".join(line) for line in lines]
138
139
140if __name__ == "__main__": # pragma: no cover
141
142 print(get_character_cell_size("😽"))
143 for line in chop_cells("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", 8):
144 print(line)
145 for n in range(80, 1, -1):
146 print(set_cell_size("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", n) + "|")
147 print("x" * n)