Skip to content

Commit 75687c9

Browse files
authored
Fix UTF-8 encoding for special characters (emoji, accented letters, etc.) by encoding JSON as UTF-8 bytes (#452)
1 parent 00e3f81 commit 75687c9

File tree

3 files changed

+169
-21
lines changed

3 files changed

+169
-21
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
nylas-python Changelog
22
======================
33

4+
Unreleased
5+
----------
6+
* Fix UTF-8 encoding for special characters (emoji, accented letters, etc.) by encoding JSON as UTF-8 bytes
7+
48
v6.14.1
59
----------
610
* Fix attachment id to not be a requirement

nylas/handler/http_client.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import sys
21
import json
2+
import sys
33
from typing import Union, Tuple, Dict
44
from urllib.parse import urlparse, quote
55

@@ -91,16 +91,17 @@ def _execute(
9191
timeout = overrides["timeout"]
9292

9393
# Serialize request_body to JSON with ensure_ascii=False to preserve UTF-8 characters
94-
# This ensures special characters (accented letters, emoji, etc.) are not escaped
94+
# and allow_nan=True to support NaN/Infinity values (matching default json.dumps behavior).
95+
# Encode as UTF-8 bytes to avoid Latin-1 encoding errors with special characters.
9596
json_data = None
9697
if request_body is not None and data is None:
97-
json_data = json.dumps(request_body, ensure_ascii=False)
98+
json_data = json.dumps(request_body, ensure_ascii=False, allow_nan=True).encode("utf-8")
9899
try:
99100
response = requests.request(
100101
request["method"],
101102
request["url"],
102103
headers=request["headers"],
103-
data=json_data or data,
104+
data=json_data if json_data is not None else data,
104105
timeout=timeout,
105106
)
106107
except requests.exceptions.Timeout as exc:

tests/handler/test_http_client.py

Lines changed: 160 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def test_execute(self, http_client, patched_version_and_sys, patched_request):
302302
"Content-type": "application/json; charset=utf-8",
303303
"test": "header",
304304
},
305-
data='{"foo": "bar"}',
305+
data=b'{"foo": "bar"}',
306306
timeout=30,
307307
)
308308

@@ -336,7 +336,7 @@ def test_execute_override_timeout(
336336
"Content-type": "application/json; charset=utf-8",
337337
"test": "header",
338338
},
339-
data='{"foo": "bar"}',
339+
data=b'{"foo": "bar"}',
340340
timeout=60,
341341
)
342342

@@ -426,7 +426,7 @@ def test_execute_with_headers(self, http_client, patched_version_and_sys, patche
426426
"Content-type": "application/json; charset=utf-8",
427427
"test": "header",
428428
},
429-
data='{"foo": "bar"}',
429+
data=b'{"foo": "bar"}',
430430
timeout=30,
431431
)
432432

@@ -452,17 +452,19 @@ def test_execute_with_utf8_characters(self, http_client, patched_version_and_sys
452452
)
453453

454454
assert response_json == {"success": True}
455-
# Verify that the data sent preserves UTF-8 characters (not escaped)
455+
# Verify that the data is sent as UTF-8 encoded bytes
456456
call_kwargs = patched_request.call_args[1]
457457
assert "data" in call_kwargs
458458
sent_data = call_kwargs["data"]
459459

460-
# The JSON should contain actual UTF-8 characters, not escape sequences
461-
assert "Réunion d'équipe" in sent_data
462-
assert "De l'idée à la post-prod" in sent_data
463-
assert "café" in sent_data
460+
# The data should be bytes with actual UTF-8 characters (not escape sequences)
461+
assert isinstance(sent_data, bytes)
462+
decoded_data = sent_data.decode("utf-8")
463+
assert "Réunion d'équipe" in decoded_data
464+
assert "De l'idée à la post-prod, sans friction" in decoded_data
465+
assert "café" in decoded_data
464466
# Should NOT contain unicode escape sequences
465-
assert "\\u" not in sent_data
467+
assert "\\u" not in decoded_data
466468

467469
def test_execute_with_none_request_body(self, http_client, patched_version_and_sys, patched_request):
468470
"""Test that None request_body is handled correctly."""
@@ -479,9 +481,33 @@ def test_execute_with_none_request_body(self, http_client, patched_version_and_s
479481
)
480482

481483
assert response_json == {"success": True}
482-
# Verify that data is None when request_body is None
484+
# Verify that data branch is used when request_body is None
483485
call_kwargs = patched_request.call_args[1]
486+
# Should use data= parameter, not json= parameter
484487
assert "data" in call_kwargs
488+
assert "json" not in call_kwargs
489+
assert call_kwargs["data"] is None
490+
491+
def test_execute_with_none_request_body_and_none_data(self, http_client, patched_version_and_sys, patched_request):
492+
"""Test that both None request_body and None data are handled correctly."""
493+
mock_response = Mock()
494+
mock_response.json.return_value = {"success": True}
495+
mock_response.headers = {"X-Test-Header": "test"}
496+
mock_response.status_code = 200
497+
patched_request.return_value = mock_response
498+
499+
response_json, response_headers = http_client._execute(
500+
method="DELETE",
501+
path="/events/123",
502+
request_body=None,
503+
data=None,
504+
)
505+
506+
assert response_json == {"success": True}
507+
call_kwargs = patched_request.call_args[1]
508+
# Should use data= parameter with None value
509+
assert "data" in call_kwargs
510+
assert "json" not in call_kwargs
485511
assert call_kwargs["data"] is None
486512

487513
def test_execute_with_emoji_and_international_characters(self, http_client, patched_version_and_sys, patched_request):
@@ -511,13 +537,130 @@ def test_execute_with_emoji_and_international_characters(self, http_client, patc
511537
call_kwargs = patched_request.call_args[1]
512538
sent_data = call_kwargs["data"]
513539

514-
# All characters should be preserved
515-
assert "🎉 Party time! 🥳" in sent_data
516-
assert "こんにちは" in sent_data
517-
assert "你好" in sent_data
518-
assert "Привет" in sent_data
519-
assert "Größe" in sent_data
520-
assert "¿Cómo estás?" in sent_data
540+
# All characters should be preserved as UTF-8 encoded bytes
541+
assert isinstance(sent_data, bytes)
542+
decoded_data = sent_data.decode("utf-8")
543+
assert "🎉 Party time! 🥳" in decoded_data
544+
assert "こんにちは" in decoded_data
545+
assert "你好" in decoded_data
546+
assert "Привет" in decoded_data
547+
assert "Größe" in decoded_data
548+
assert "¿Cómo estás?" in decoded_data
549+
550+
def test_execute_with_right_single_quotation_mark(self, http_client, patched_version_and_sys, patched_request):
551+
"""Test that right single quotation mark (\\u2019) is handled correctly.
552+
553+
This character caused UnicodeEncodeError: 'latin-1' codec can't encode character '\\u2019'.
554+
"""
555+
mock_response = Mock()
556+
mock_response.json.return_value = {"success": True}
557+
mock_response.headers = {"X-Test-Header": "test"}
558+
mock_response.status_code = 200
559+
patched_request.return_value = mock_response
560+
561+
# The \u2019 character is the right single quotation mark (')
562+
# This was the exact character that caused the original encoding error
563+
request_body = {
564+
"subject": "It's a test", # Contains \u2019 (right single quotation mark)
565+
"body": "Here's another example with curly apostrophe",
566+
}
567+
568+
response_json, response_headers = http_client._execute(
569+
method="POST",
570+
path="/messages/send",
571+
request_body=request_body,
572+
)
573+
574+
assert response_json == {"success": True}
575+
call_kwargs = patched_request.call_args[1]
576+
sent_data = call_kwargs["data"]
577+
578+
# The data should be UTF-8 encoded bytes with the \u2019 character preserved
579+
assert isinstance(sent_data, bytes)
580+
decoded_data = sent_data.decode("utf-8")
581+
assert "'" in decoded_data # \u2019 right single quotation mark
582+
assert "It's a test" in decoded_data
583+
assert "Here's another" in decoded_data
584+
585+
def test_execute_with_emojis(self, http_client, patched_version_and_sys, patched_request):
586+
"""Test that emojis are handled correctly in request bodies.
587+
588+
Emojis are multi-byte UTF-8 characters that could cause encoding issues
589+
if not handled properly.
590+
"""
591+
mock_response = Mock()
592+
mock_response.json.return_value = {"success": True}
593+
mock_response.headers = {"X-Test-Header": "test"}
594+
mock_response.status_code = 200
595+
patched_request.return_value = mock_response
596+
597+
request_body = {
598+
"subject": "Hello 👋 World 🌍",
599+
"body": "Great job! 🎉 Keep up the good work 💪 See you soon 😊",
600+
"emoji_only": "🔥🚀✨💯",
601+
"mixed": "Meeting at 3pm 📅 Don't forget! ⏰",
602+
}
603+
604+
response_json, response_headers = http_client._execute(
605+
method="POST",
606+
path="/messages/send",
607+
request_body=request_body,
608+
)
609+
610+
assert response_json == {"success": True}
611+
call_kwargs = patched_request.call_args[1]
612+
sent_data = call_kwargs["data"]
613+
614+
# All emojis should be preserved in UTF-8 encoded bytes
615+
assert isinstance(sent_data, bytes)
616+
decoded_data = sent_data.decode("utf-8")
617+
assert "Hello 👋 World 🌍" in decoded_data
618+
assert "🎉" in decoded_data
619+
assert "💪" in decoded_data
620+
assert "😊" in decoded_data
621+
assert "🔥🚀✨💯" in decoded_data
622+
assert "📅" in decoded_data
623+
assert "⏰" in decoded_data
624+
625+
def test_execute_with_nan_and_infinity(self, http_client, patched_version_and_sys, patched_request):
626+
"""Test that NaN and Infinity float values are handled correctly.
627+
628+
The requests library's json= parameter uses allow_nan=False which raises
629+
ValueError for NaN/Infinity. Our implementation uses json.dumps with
630+
allow_nan=True to maintain backward compatibility.
631+
"""
632+
mock_response = Mock()
633+
mock_response.json.return_value = {"success": True}
634+
mock_response.headers = {"X-Test-Header": "test"}
635+
mock_response.status_code = 200
636+
patched_request.return_value = mock_response
637+
638+
request_body = {
639+
"nan_value": float("nan"),
640+
"infinity": float("inf"),
641+
"neg_infinity": float("-inf"),
642+
"normal": 42.5,
643+
}
644+
645+
# This should NOT raise ValueError
646+
response_json, response_headers = http_client._execute(
647+
method="POST",
648+
path="/data",
649+
request_body=request_body,
650+
)
651+
652+
assert response_json == {"success": True}
653+
call_kwargs = patched_request.call_args[1]
654+
sent_data = call_kwargs["data"]
655+
656+
# The data should be UTF-8 encoded bytes with NaN/Infinity serialized
657+
assert isinstance(sent_data, bytes)
658+
decoded_data = sent_data.decode("utf-8")
659+
# json.dumps with allow_nan=True produces NaN, Infinity, -Infinity (JS-style)
660+
assert "NaN" in decoded_data
661+
assert "Infinity" in decoded_data
662+
assert "-Infinity" in decoded_data
663+
assert "42.5" in decoded_data
521664

522665
def test_execute_with_multipart_data_not_affected(self, http_client, patched_version_and_sys, patched_request):
523666
"""Test that multipart/form-data is not affected by the change."""

0 commit comments

Comments
 (0)