To: vim_dev@googlegroups.com Subject: Patch 8.2.4695 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.4695 Problem: JSON encoding could be faster. Solution: Optimize encoding JSON strings. (closes #10086) Files: src/json.c, src/testdir/test_json.vim *** ../vim-8.2.4694/src/json.c 2022-04-04 15:16:50.738014123 +0100 --- src/json.c 2022-04-05 15:02:26.659250519 +0100 *************** *** 114,150 **** } #endif static void write_string(garray_T *gap, char_u *str) { char_u *res = str; char_u numbuf[NUMBUFLEN]; if (res == NULL) - ga_concat(gap, (char_u *)"\"\""); - else { ! #if defined(USE_ICONV) ! vimconv_T conv; ! char_u *converted = NULL; ! if (!enc_utf8) ! { ! // Convert the text from 'encoding' to utf-8, the JSON string is ! // always utf-8. ! conv.vc_type = CONV_NONE; ! convert_setup(&conv, p_enc, (char_u*)"utf-8"); ! if (conv.vc_type != CONV_NONE) ! converted = res = string_convert(&conv, res, NULL); ! convert_setup(&conv, NULL, NULL); ! } #endif ! ga_append(gap, '"'); ! while (*res != NUL) { ! int c; ! // always use utf-8 encoding, ignore 'encoding' ! c = utf_ptr2char(res); switch (c) { --- 114,185 ---- } #endif + /* + * Lookup table to quickly know if the given ASCII character must be escaped. + */ + static const char ascii_needs_escape[128] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0. + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1. + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + }; + + /* + * Encode the utf-8 encoded string "str" into "gap". + */ static void write_string(garray_T *gap, char_u *str) { char_u *res = str; char_u numbuf[NUMBUFLEN]; + char_u *from; + #if defined(USE_ICONV) + vimconv_T conv; + char_u *converted = NULL; + #endif + int c; if (res == NULL) { ! ga_concat(gap, (char_u *)"\"\""); ! return; ! } ! #if defined(USE_ICONV) ! if (!enc_utf8) ! { ! // Convert the text from 'encoding' to utf-8, because a JSON string is ! // always utf-8. ! conv.vc_type = CONV_NONE; ! convert_setup(&conv, p_enc, (char_u*)"utf-8"); ! if (conv.vc_type != CONV_NONE) ! converted = res = string_convert(&conv, res, NULL); ! convert_setup(&conv, NULL, NULL); ! } #endif ! ga_append(gap, '"'); ! // `from` is the beginning of a sequence of bytes we can directly copy from ! // the input string, avoiding the overhead associated to decoding/encoding ! // them. ! from = res; ! while ((c = *res) != NUL) ! { ! // always use utf-8 encoding, ignore 'encoding' ! if (c < 0x80) { ! if (!ascii_needs_escape[c]) ! { ! res += 1; ! continue; ! } ! ! if (res != from) ! ga_concat_len(gap, from, res - from); ! from = res + 1; switch (c) { *************** *** 164,188 **** ga_append(gap, c); break; default: ! if (c >= 0x20) ! { ! numbuf[utf_char2bytes(c, numbuf)] = NUL; ! ga_concat(gap, numbuf); ! } ! else ! { ! vim_snprintf((char *)numbuf, NUMBUFLEN, ! "\\u%04lx", (long)c); ! ga_concat(gap, numbuf); ! } } ! res += utf_ptr2len(res); } ! ga_append(gap, '"'); #if defined(USE_ICONV) ! vim_free(converted); #endif - } } /* --- 199,241 ---- ga_append(gap, c); break; default: ! vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx", ! (long)c); ! ga_concat(gap, numbuf); } ! ! res += 1; } ! else ! { ! int l = utf_ptr2len(res); ! ! if (l > 1) ! { ! res += l; ! continue; ! } ! ! // Invalid utf-8 sequence, replace it with the Unicode replacement ! // character U+FFFD. ! if (res != from) ! ga_concat_len(gap, from, res - from); ! from = res + 1; ! ! numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL; ! ga_concat(gap, numbuf); ! ! res += l; ! } ! } ! ! if (res != from) ! ga_concat_len(gap, from, res - from); ! ! ga_append(gap, '"'); #if defined(USE_ICONV) ! vim_free(converted); #endif } /* *** ../vim-8.2.4694/src/testdir/test_json.vim 2021-02-08 20:53:05.592963320 +0000 --- src/testdir/test_json.vim 2022-04-05 14:55:22.151639261 +0100 *************** *** 107,112 **** --- 107,115 ---- call assert_equal('"café"', json_encode("caf\xe9")) let &encoding = save_encoding + " Invalid utf-8 sequences are replaced with U+FFFD (replacement character) + call assert_equal('"foo' . "\ufffd" . '"', json_encode("foo\xAB")) + call assert_fails('echo json_encode(function("tr"))', 'E1161: Cannot json encode a func') call assert_fails('echo json_encode([function("tr")])', 'E1161: Cannot json encode a func') *** ../vim-8.2.4694/src/version.c 2022-04-05 15:07:08.210791582 +0100 --- src/version.c 2022-04-05 14:56:43.963566990 +0100 *************** *** 748,749 **** --- 748,751 ---- { /* Add new patch number below this line */ + /**/ + 4695, /**/ -- There are only two hard things in programming: Cache invalidation, naming things and off-by-one errors. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// \\\ \\\ sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///