-
Notifications
You must be signed in to change notification settings - Fork 125
Improve support of emoji/flags/skin tones in monospaced_width() #227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,11 +6,7 @@ | |
| the 'wcwidth' library. | ||
| """ | ||
|
|
||
| from unicodedata import normalize | ||
|
|
||
| from wcwidth import wcswidth, wcwidth | ||
|
|
||
| from ftfy.fixes import remove_terminal_escapes | ||
| import wcwidth | ||
|
|
||
|
|
||
| def character_width(char: str) -> int: | ||
|
|
@@ -31,7 +27,7 @@ def character_width(char: str) -> int: | |
| >>> character_width('\n') | ||
| -1 | ||
| """ | ||
| return int(wcwidth(char)) | ||
| return wcwidth.wcwidth(char) | ||
|
|
||
|
|
||
| def monospaced_width(text: str) -> int: | ||
|
|
@@ -43,16 +39,12 @@ def monospaced_width(text: str) -> int: | |
| This can be useful for formatting text that may contain non-spacing | ||
| characters, or CJK characters that take up two character cells. | ||
|
|
||
| Returns -1 if the string contains a non-printable or control character. | ||
|
|
||
| >>> monospaced_width('ちゃぶ台返し') | ||
| 12 | ||
| >>> len('ちゃぶ台返し') | ||
| 6 | ||
| >>> monospaced_width('owl\N{SOFT HYPHEN}flavored') | ||
| 11 | ||
| >>> monospaced_width('example\x80') | ||
| -1 | ||
| 12 | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Related to SOFT HYPHEN: #226 (comment)
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| A more complex example: The Korean word 'ibnida' can be written with 3 | ||
| pre-composed characters or 7 jamo. Either way, it *looks* the same and | ||
|
|
@@ -67,13 +59,16 @@ def monospaced_width(text: str) -> int: | |
| 4 characters, when shown as intended. | ||
| >>> monospaced_width('\x1b[34mblue\x1b[m') | ||
| 4 | ||
|
|
||
| Emoji ZWJ sequences are treated as single grapheme clusters with width 2. | ||
| >>> monospaced_width('👨👩👧') | ||
| 2 | ||
|
|
||
| Control characters are parsed and treated as zero-width. | ||
| >>> monospaced_width('example\x80') | ||
| 7 | ||
| """ | ||
| # NFC-normalize the text first, so that we don't need special cases for | ||
| # Hangul jamo. | ||
| # | ||
| # Remove terminal escapes before calculating width, because if they are | ||
| # displayed as intended, they will have zero width. | ||
| return int(wcswidth(remove_terminal_escapes(normalize("NFC", text)))) | ||
| return wcwidth.width(text, control_codes="parse") | ||
|
|
||
|
|
||
| def display_ljust(text: str, width: int, fillchar: str = " ") -> str: | ||
|
|
@@ -102,13 +97,7 @@ def display_ljust(text: str, width: int, fillchar: str = " ") -> str: | |
| msg = "The padding character must have display width 1" | ||
| raise ValueError(msg) | ||
|
|
||
| text_width = monospaced_width(text) | ||
| if text_width == -1: | ||
| # There's a control character here, so just don't add padding | ||
| return text | ||
|
|
||
| padding = max(0, width - text_width) | ||
| return text + fillchar * padding | ||
| return wcwidth.ljust(text, width, fillchar=fillchar) | ||
|
|
||
|
|
||
| def display_rjust(text: str, width: int, fillchar: str = " ") -> str: | ||
|
|
@@ -133,12 +122,7 @@ def display_rjust(text: str, width: int, fillchar: str = " ") -> str: | |
| msg = "The padding character must have display width 1" | ||
| raise ValueError(msg) | ||
|
|
||
| text_width = monospaced_width(text) | ||
| if text_width == -1: | ||
| return text | ||
|
|
||
| padding = max(0, width - text_width) | ||
| return fillchar * padding + text | ||
| return wcwidth.rjust(text, width, fillchar=fillchar) | ||
|
|
||
|
|
||
| def display_center(text: str, width: int, fillchar: str = " ") -> str: | ||
|
|
@@ -159,11 +143,4 @@ def display_center(text: str, width: int, fillchar: str = " ") -> str: | |
| msg = "The padding character must have display width 1" | ||
| raise ValueError(msg) | ||
|
|
||
| text_width = monospaced_width(text) | ||
| if text_width == -1: | ||
| return text | ||
|
|
||
| padding = max(0, width - text_width) | ||
| left_padding = padding // 2 | ||
| right_padding = padding - left_padding | ||
| return fillchar * left_padding + text + fillchar * right_padding | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there was a bug here, see https://jazcap53.github.io/pythons-eccentric-strcenter.html but you never would have guessed, i made the same mistake jquast/wcwidth#188 |
||
| return wcwidth.center(text, width, fillchar=fillchar) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| import pytest | ||
|
|
||
| from ftfy.formatting import ( | ||
| character_width, | ||
| display_center, | ||
| display_ljust, | ||
| display_rjust, | ||
| monospaced_width, | ||
| ) | ||
|
|
||
|
|
||
| class TestMonospacedWidth: | ||
| def test_ascii_and_cjk(self): | ||
| assert monospaced_width("hello") == 5 | ||
| assert monospaced_width("中文") == 4 | ||
| assert monospaced_width("ちゃぶ台返し") == 12 | ||
| assert monospaced_width("Hello 中文 👍") == 13 | ||
|
|
||
| def test_grapheme_clusters(self): | ||
| assert monospaced_width("cafe\u0301") == 4 | ||
| assert monospaced_width("\u200d") == 0 | ||
| assert monospaced_width("👨👩👧") == 2 | ||
| assert monospaced_width("👩🏻💻") == 2 | ||
| assert monospaced_width("🇨🇦") == 2 | ||
| assert monospaced_width("❤️") == 2 | ||
|
|
||
| def test_ansi_escape_sequences(self): | ||
| assert monospaced_width("\x1b[31mred\x1b[0m") == 3 | ||
| assert monospaced_width("\x1b[34mblue\x1b[m") == 4 | ||
| assert monospaced_width("\x1b[31;1mBold Red\x1b[0m") == 8 | ||
|
|
||
| def test_osc8_hyperlinks(self): | ||
| assert monospaced_width("\x1b]8;;https://example.com\x07Click here\x1b]8;;\x07") == 10 | ||
| assert monospaced_width( | ||
| "\x1b]8;;https://example.com\x07\x1b[34mBlue Link\x1b[0m\x1b]8;;\x07" | ||
| ) == 9 | ||
|
|
||
| def test_control_characters(self): | ||
| assert monospaced_width("example\x80") == 7 | ||
| assert monospaced_width("aaa\b\b\bxxx") == 3 | ||
| assert monospaced_width("hello\b\bXX") == 5 | ||
|
|
||
|
|
||
| class TestCharacterWidth: | ||
| def test_character_widths(self): | ||
| assert character_width("A") == 1 | ||
| assert character_width("車") == 2 | ||
| assert character_width("\N{ZERO WIDTH JOINER}") == 0 | ||
| assert character_width("\n") == -1 | ||
|
|
||
|
|
||
| class TestDisplayJustify: | ||
| def test_ljust(self): | ||
| assert display_ljust("hello", 10) == "hello " | ||
| assert display_ljust("中", 4) == "中 " | ||
| assert display_ljust("👍", 4) == "👍 " | ||
| assert display_ljust("hello", 3) == "hello" | ||
| assert display_ljust("hi", 5, ".") == "hi..." | ||
|
|
||
| def test_rjust(self): | ||
| assert display_rjust("hello", 10) == " hello" | ||
| assert display_rjust("中", 4) == " 中" | ||
| assert display_rjust("👍", 4) == " 👍" | ||
|
|
||
| def test_center(self): | ||
| assert display_center("hi", 6) == " hi " | ||
| assert display_center("中", 6) == " 中 " | ||
| assert display_center("hi", 5) == " hi " | ||
|
|
||
| def test_invalid_fillchar(self): | ||
| with pytest.raises(ValueError, match="display width 1"): | ||
| display_ljust("hi", 10, "中") | ||
| with pytest.raises(ValueError, match="display width 1"): | ||
| display_ljust("hi", 10, "\u200d") | ||
| with pytest.raises(ValueError, match="display width 1"): | ||
| display_rjust("hi", 10, "中") | ||
| with pytest.raises(ValueError, match="display width 1"): | ||
| display_center("hi", 10, "中") | ||
|
|
||
|
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the
int()wrapper was probably needed from an earlier version of wcwidth that was not typed, but it is typed now