@@ -48,10 +48,8 @@ read_py_str(
4848 uintptr_t address ,
4949 Py_ssize_t max_len
5050) {
51- PyObject * result = NULL ;
52- char * buf = NULL ;
53-
54- // Read the entire PyUnicodeObject at once
51+ // Read the entire PyUnicodeObject at once; for short strings the data
52+ // is inline right after the header and we'll already have (some of) it.
5553 char unicode_obj [SIZEOF_UNICODE_OBJ ];
5654 int res = _Py_RemoteDebug_PagedReadRemoteMemory (
5755 & unwinder -> handle ,
@@ -61,7 +59,7 @@ read_py_str(
6159 );
6260 if (res < 0 ) {
6361 set_exception_cause (unwinder , PyExc_RuntimeError , "Failed to read PyUnicodeObject" );
64- goto err ;
62+ return NULL ;
6563 }
6664
6765 Py_ssize_t len = GET_MEMBER (Py_ssize_t , unicode_obj , unwinder -> debug_offsets .unicode_object .length );
@@ -72,36 +70,94 @@ read_py_str(
7270 return NULL ;
7371 }
7472
75- buf = (char * )PyMem_RawMalloc (len + 1 );
76- if (buf == NULL ) {
77- PyErr_NoMemory ();
78- set_exception_cause (unwinder , PyExc_MemoryError , "Failed to allocate buffer for string reading" );
73+ // Inspect state to pick the right data offset and character width.
74+ // We rely on the remote process sharing this Python version's
75+ // PyASCIIObject layout, the same assumption already used for `length`.
76+ struct _PyUnicodeObject_state state = GET_MEMBER (
77+ struct _PyUnicodeObject_state ,
78+ unicode_obj ,
79+ unwinder -> debug_offsets .unicode_object .state );
80+
81+ if (!state .compact ) {
82+ PyErr_Format (PyExc_RuntimeError ,
83+ "Cannot read non-compact Unicode object at 0x%lx" , address );
84+ set_exception_cause (unwinder , PyExc_RuntimeError ,
85+ "Legacy (non-compact) Unicode objects are not supported" );
7986 return NULL ;
8087 }
8188
82- size_t offset = (size_t )unwinder -> debug_offsets .unicode_object .asciiobject_size ;
83- res = _Py_RemoteDebug_PagedReadRemoteMemory (& unwinder -> handle , address + offset , len , buf );
84- if (res < 0 ) {
85- set_exception_cause (unwinder , PyExc_RuntimeError , "Failed to read string data from remote memory" );
86- goto err ;
89+ int kind = (int )state .kind ;
90+ Py_UCS4 max_char ;
91+ switch (kind ) {
92+ case PyUnicode_1BYTE_KIND :
93+ max_char = state .ascii ? 0x7F : 0xFF ;
94+ break ;
95+ case PyUnicode_2BYTE_KIND :
96+ max_char = 0xFFFF ;
97+ break ;
98+ case PyUnicode_4BYTE_KIND :
99+ max_char = 0x10FFFF ;
100+ break ;
101+ default :
102+ PyErr_Format (PyExc_RuntimeError ,
103+ "Invalid Unicode kind %d at 0x%lx" , kind , address );
104+ set_exception_cause (unwinder , PyExc_RuntimeError ,
105+ "Invalid kind in remote Unicode object" );
106+ return NULL ;
87107 }
88- buf [len ] = '\0' ;
89108
90- result = PyUnicode_FromStringAndSize (buf , len );
109+ size_t header_size = state .ascii
110+ ? (size_t )unwinder -> debug_offsets .unicode_object .asciiobject_size
111+ : (size_t )unwinder -> debug_offsets .unicode_object .compactunicodeobject_size ;
112+
113+ // len * kind is bounded by max_len * 4 (kind <= 4, len <= max_len), so
114+ // the multiplication can't overflow for any caller-sane max_len, but the
115+ // explicit cap here keeps a corrupted remote `length` from later turning
116+ // into a giant allocation.
117+ size_t nbytes = (size_t )len * (size_t )kind ;
118+ if ((size_t )len > (SIZE_MAX / 4 ) || nbytes > (size_t )max_len * 4 ) {
119+ PyErr_Format (PyExc_RuntimeError ,
120+ "Implausible Unicode byte size %zu at 0x%lx" , nbytes , address );
121+ set_exception_cause (unwinder , PyExc_RuntimeError ,
122+ "Garbage byte size in remote Unicode object" );
123+ return NULL ;
124+ }
125+
126+ PyObject * result = PyUnicode_New (len , max_char );
91127 if (result == NULL ) {
92- set_exception_cause (unwinder , PyExc_RuntimeError , "Failed to create PyUnicode from remote string data" );
93- goto err ;
128+ set_exception_cause (unwinder , PyExc_RuntimeError , "Failed to allocate PyUnicode for remote string" );
129+ return NULL ;
130+ }
131+ if (nbytes == 0 ) {
132+ return result ;
94133 }
95134
96- PyMem_RawFree (buf );
97- assert (result != NULL );
98- return result ;
135+ void * data = PyUnicode_DATA (result );
99136
100- err :
101- if (buf != NULL ) {
102- PyMem_RawFree (buf );
137+ // Reuse data already present in the header read; only round-trip for
138+ // whatever spills past it.
139+ size_t inline_avail = (header_size < SIZEOF_UNICODE_OBJ )
140+ ? SIZEOF_UNICODE_OBJ - header_size
141+ : 0 ;
142+ size_t inline_bytes = nbytes < inline_avail ? nbytes : inline_avail ;
143+ if (inline_bytes > 0 ) {
144+ memcpy (data , unicode_obj + header_size , inline_bytes );
103145 }
104- return NULL ;
146+
147+ if (nbytes > inline_bytes ) {
148+ res = _Py_RemoteDebug_PagedReadRemoteMemory (
149+ & unwinder -> handle ,
150+ address + header_size + inline_bytes ,
151+ nbytes - inline_bytes ,
152+ (char * )data + inline_bytes );
153+ if (res < 0 ) {
154+ Py_DECREF (result );
155+ set_exception_cause (unwinder , PyExc_RuntimeError , "Failed to read string data from remote memory" );
156+ return NULL ;
157+ }
158+ }
159+
160+ return result ;
105161}
106162
107163PyObject *
0 commit comments