Yi Kong | 878f994 | 2023-12-13 12:55:04 +0900 | [diff] [blame^] | 1 | from functools import lru_cache |
| 2 | import re |
| 3 | from typing import Dict, List |
| 4 | |
| 5 | from ._cell_widths import CELL_WIDTHS |
| 6 | from ._lru_cache import LRUCache |
| 7 | |
| 8 | # Regex to match sequence of the most common character ranges |
| 9 | _is_single_cell_widths = re.compile("^[\u0020-\u006f\u00a0\u02ff\u0370-\u0482]*$").match |
| 10 | |
| 11 | |
| 12 | def cell_len(text: str, _cache: Dict[str, int] = LRUCache(1024 * 4)) -> int: |
| 13 | """Get the number of cells required to display text. |
| 14 | |
| 15 | Args: |
| 16 | text (str): Text to display. |
| 17 | |
| 18 | Returns: |
| 19 | int: Get the number of cells required to display text. |
| 20 | """ |
| 21 | |
| 22 | if _is_single_cell_widths(text): |
| 23 | return len(text) |
| 24 | else: |
| 25 | cached_result = _cache.get(text, None) |
| 26 | if cached_result is not None: |
| 27 | return cached_result |
| 28 | _get_size = get_character_cell_size |
| 29 | total_size = sum(_get_size(character) for character in text) |
| 30 | if len(text) <= 64: |
| 31 | _cache[text] = total_size |
| 32 | return total_size |
| 33 | |
| 34 | |
| 35 | @lru_cache(maxsize=4096) |
| 36 | def get_character_cell_size(character: str) -> int: |
| 37 | """Get the cell size of a character. |
| 38 | |
| 39 | Args: |
| 40 | character (str): A single character. |
| 41 | |
| 42 | Returns: |
| 43 | int: Number of cells (0, 1 or 2) occupied by that character. |
| 44 | """ |
| 45 | if _is_single_cell_widths(character): |
| 46 | return 1 |
| 47 | |
| 48 | return _get_codepoint_cell_size(ord(character)) |
| 49 | |
| 50 | |
| 51 | @lru_cache(maxsize=4096) |
| 52 | def _get_codepoint_cell_size(codepoint: int) -> int: |
| 53 | """Get the cell size of a character. |
| 54 | |
| 55 | Args: |
| 56 | character (str): A single character. |
| 57 | |
| 58 | Returns: |
| 59 | int: Number of cells (0, 1 or 2) occupied by that character. |
| 60 | """ |
| 61 | |
| 62 | _table = CELL_WIDTHS |
| 63 | lower_bound = 0 |
| 64 | upper_bound = len(_table) - 1 |
| 65 | index = (lower_bound + upper_bound) // 2 |
| 66 | while True: |
| 67 | start, end, width = _table[index] |
| 68 | if codepoint < start: |
| 69 | upper_bound = index - 1 |
| 70 | elif codepoint > end: |
| 71 | lower_bound = index + 1 |
| 72 | else: |
| 73 | return 0 if width == -1 else width |
| 74 | if upper_bound < lower_bound: |
| 75 | break |
| 76 | index = (lower_bound + upper_bound) // 2 |
| 77 | return 1 |
| 78 | |
| 79 | |
| 80 | def set_cell_size(text: str, total: int) -> str: |
| 81 | """Set the length of a string to fit within given number of cells.""" |
| 82 | |
| 83 | if _is_single_cell_widths(text): |
| 84 | size = len(text) |
| 85 | if size < total: |
| 86 | return text + " " * (total - size) |
| 87 | return text[:total] |
| 88 | |
| 89 | if not total: |
| 90 | return "" |
| 91 | cell_size = cell_len(text) |
| 92 | if cell_size == total: |
| 93 | return text |
| 94 | if cell_size < total: |
| 95 | return text + " " * (total - cell_size) |
| 96 | |
| 97 | start = 0 |
| 98 | end = len(text) |
| 99 | |
| 100 | # Binary search until we find the right size |
| 101 | while True: |
| 102 | pos = (start + end) // 2 |
| 103 | before = text[: pos + 1] |
| 104 | before_len = cell_len(before) |
| 105 | if before_len == total + 1 and cell_len(before[-1]) == 2: |
| 106 | return before[:-1] + " " |
| 107 | if before_len == total: |
| 108 | return before |
| 109 | if before_len > total: |
| 110 | end = pos |
| 111 | else: |
| 112 | start = pos |
| 113 | |
| 114 | |
| 115 | # TODO: This is inefficient |
| 116 | # TODO: This might not work with CWJ type characters |
| 117 | def chop_cells(text: str, max_size: int, position: int = 0) -> List[str]: |
| 118 | """Break text in to equal (cell) length strings.""" |
| 119 | _get_character_cell_size = get_character_cell_size |
| 120 | characters = [ |
| 121 | (character, _get_character_cell_size(character)) for character in text |
| 122 | ][::-1] |
| 123 | total_size = position |
| 124 | lines: List[List[str]] = [[]] |
| 125 | append = lines[-1].append |
| 126 | |
| 127 | pop = characters.pop |
| 128 | while characters: |
| 129 | character, size = pop() |
| 130 | if total_size + size > max_size: |
| 131 | lines.append([character]) |
| 132 | append = lines[-1].append |
| 133 | total_size = size |
| 134 | else: |
| 135 | total_size += size |
| 136 | append(character) |
| 137 | return ["".join(line) for line in lines] |
| 138 | |
| 139 | |
| 140 | if __name__ == "__main__": # pragma: no cover |
| 141 | |
| 142 | print(get_character_cell_size("😽")) |
| 143 | for line in chop_cells("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", 8): |
| 144 | print(line) |
| 145 | for n in range(80, 1, -1): |
| 146 | print(set_cell_size("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", n) + "|") |
| 147 | print("x" * n) |