To: vim_dev@googlegroups.com Subject: Patch 8.2.1461 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.1461 Problem: Vim9: string indexes are counted in bytes. Solution: Use character indexes. (closes #6574) Files: runtime/doc/eval.txt, src/eval.c, src/proto/eval.pro, src/vim9execute.c, src/eval.c, src/testdir/test_vim9_expr.vim *** ../vim-8.2.1460/runtime/doc/eval.txt 2020-08-09 14:03:51.541367942 +0200 --- runtime/doc/eval.txt 2020-08-15 18:24:40.799380817 +0200 *************** *** 1128,1146 **** expr8[expr1] item of String or |List| *expr-[]* *E111* *E909* *subscript* If expr8 is a Number or String this results in a String that contains the ! expr1'th single byte from expr8. expr8 is used as a String, expr1 as a ! Number. This doesn't recognize multi-byte encodings, see `byteidx()` for ! an alternative, or use `split()` to turn the string into a list of characters. ! ! Index zero gives the first byte. This is like it works in C. Careful: ! text column numbers start with one! Example, to get the byte under the ! cursor: > :let c = getline(".")[col(".") - 1] If the length of the String is less than the index, the result is an empty String. A negative index always results in an empty string (reason: backward ! compatibility). Use [-1:] to get the last byte. If expr8 is a |List| then it results the item at index expr1. See |list-index| for possible index values. If the index is out of range this results in an --- 1131,1155 ---- expr8[expr1] item of String or |List| *expr-[]* *E111* *E909* *subscript* + In legacy Vim script: If expr8 is a Number or String this results in a String that contains the ! expr1'th single byte from expr8. expr8 is used as a String (a number is ! automatically converted to a String), expr1 as a Number. This doesn't ! recognize multi-byte encodings, see `byteidx()` for an alternative, or use ! `split()` to turn the string into a list of characters. Example, to get the ! byte under the cursor: > :let c = getline(".")[col(".") - 1] + In Vim9 script: + If expr8 is a String this results in a String that contains the expr1'th + single character from expr8. To use byte indexes use |strpart()|. + + Index zero gives the first byte or character. Careful: text column numbers + start with one! + If the length of the String is less than the index, the result is an empty String. A negative index always results in an empty string (reason: backward ! compatibility). Use [-1:] to get the last byte or character. If expr8 is a |List| then it results the item at index expr1. See |list-index| for possible index values. If the index is out of range this results in an *************** *** 1154,1163 **** expr8[expr1a : expr1b] substring or sublist *expr-[:]* ! If expr8 is a Number or String this results in the substring with the bytes ! from expr1a to and including expr1b. expr8 is used as a String, expr1a and ! expr1b are used as a Number. This doesn't recognize multi-byte encodings, see ! |byteidx()| for computing the indexes. If expr1a is omitted zero is used. If expr1b is omitted the length of the string minus one is used. --- 1163,1178 ---- expr8[expr1a : expr1b] substring or sublist *expr-[:]* ! If expr8 is a String this results in the substring with the bytes from expr1a ! to and including expr1b. expr8 is used as a String, expr1a and expr1b are ! used as a Number. ! ! In legacy Vim script the indexes are byte indexes. This doesn't recognize ! multi-byte encodings, see |byteidx()| for computing the indexes. If expr8 is ! a Number it is first converted to a String. ! ! In Vim9 script the indexes are character indexes. To use byte indexes use ! |strpart()|. If expr1a is omitted zero is used. If expr1b is omitted the length of the string minus one is used. *** ../vim-8.2.1460/src/eval.c 2020-08-15 16:33:24.501747305 +0200 --- src/eval.c 2020-08-15 18:32:40.363506218 +0200 *************** *** 3718,3723 **** --- 3718,3727 ---- else s = vim_strnsave(s + n1, n2 - n1 + 1); } + else if (in_vim9script()) + { + s = char_from_string(s, n1); + } else { // The resulting variable is a string of a single *************** *** 5285,5290 **** --- 5289,5318 ---- } /* + * Return the character "str[index]" where "index" is the character index. If + * "index" is out of range NULL is returned. + */ + char_u * + char_from_string(char_u *str, varnumber_T index) + { + size_t nbyte = 0; + varnumber_T nchar = index; + size_t slen; + + if (str == NULL || index < 0) + return NULL; + slen = STRLEN(str); + while (nchar > 0 && nbyte < slen) + { + nbyte += MB_CPTR2LEN(str + nbyte); + --nchar; + } + if (nbyte >= slen) + return NULL; + return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte)); + } + + /* * Handle: * - expr[expr], expr[expr:expr] subscript * - ".name" lookup *** ../vim-8.2.1460/src/proto/eval.pro 2020-07-27 21:43:24.137946109 +0200 --- src/proto/eval.pro 2020-08-15 17:08:27.359565655 +0200 *************** *** 59,64 **** --- 59,65 ---- int eval_isnamec(int c); int eval_isnamec1(int c); int eval_isdictc(int c); + char_u *char_from_string(char_u *str, varnumber_T index); int handle_subscript(char_u **arg, typval_T *rettv, evalarg_T *evalarg, int verbose); int item_copy(typval_T *from, typval_T *to, int deep, int copyID); void echo_one(typval_T *rettv, int with_space, int *atstart, int *needclr); *** ../vim-8.2.1460/src/vim9execute.c 2020-08-15 16:33:24.497747330 +0200 --- src/vim9execute.c 2020-08-15 17:11:00.345567711 +0200 *************** *** 2233,2239 **** case ISN_STRINDEX: { - char_u *s; varnumber_T n; char_u *res; --- 2233,2238 ---- *************** *** 2245,2251 **** emsg(_(e_stringreq)); goto on_error; } - s = tv->vval.v_string; tv = STACK_TV_BOT(-1); if (tv->v_type != VAR_NUMBER) --- 2244,2249 ---- *************** *** 2259,2270 **** // The resulting variable is a string of a single // character. If the index is too big or negative the // result is empty. - if (n < 0 || n >= (varnumber_T)STRLEN(s)) - res = NULL; - else - res = vim_strnsave(s + n, 1); --ectx.ec_stack.ga_len; tv = STACK_TV_BOT(-1); vim_free(tv->vval.v_string); tv->vval.v_string = res; } --- 2257,2265 ---- // The resulting variable is a string of a single // character. If the index is too big or negative the // result is empty. --ectx.ec_stack.ga_len; tv = STACK_TV_BOT(-1); + res = char_from_string(tv->vval.v_string, n); vim_free(tv->vval.v_string); tv->vval.v_string = res; } *** ../vim-8.2.1460/src/eval.c 2020-08-15 16:33:24.501747305 +0200 --- src/eval.c 2020-08-15 18:32:40.363506218 +0200 *************** *** 3718,3723 **** --- 3718,3727 ---- else s = vim_strnsave(s + n1, n2 - n1 + 1); } + else if (in_vim9script()) + { + s = char_from_string(s, n1); + } else { // The resulting variable is a string of a single *************** *** 5285,5290 **** --- 5289,5318 ---- } /* + * Return the character "str[index]" where "index" is the character index. If + * "index" is out of range NULL is returned. + */ + char_u * + char_from_string(char_u *str, varnumber_T index) + { + size_t nbyte = 0; + varnumber_T nchar = index; + size_t slen; + + if (str == NULL || index < 0) + return NULL; + slen = STRLEN(str); + while (nchar > 0 && nbyte < slen) + { + nbyte += MB_CPTR2LEN(str + nbyte); + --nchar; + } + if (nbyte >= slen) + return NULL; + return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte)); + } + + /* * Handle: * - expr[expr], expr[expr:expr] subscript * - ".name" lookup *** ../vim-8.2.1460/src/testdir/test_vim9_expr.vim 2020-08-15 16:33:24.501747305 +0200 --- src/testdir/test_vim9_expr.vim 2020-08-15 17:16:48.649794487 +0200 *************** *** 2075,2086 **** enddef def Test_expr7_subscript() ! let text = 'abcdef' ! assert_equal('', text[-1]) ! assert_equal('a', text[0]) ! assert_equal('e', text[4]) ! assert_equal('f', text[5]) ! assert_equal('', text[6]) enddef def Test_expr7_subscript_linebreak() --- 2075,2102 ---- enddef def Test_expr7_subscript() ! let lines =<< trim END ! let text = 'abcdef' ! assert_equal('', text[-1]) ! assert_equal('a', text[0]) ! assert_equal('e', text[4]) ! assert_equal('f', text[5]) ! assert_equal('', text[6]) ! ! text = 'ábçdëf' ! assert_equal('', text[-999]) ! assert_equal('', text[-1]) ! assert_equal('á', text[0]) ! assert_equal('b', text[1]) ! assert_equal('ç', text[2]) ! assert_equal('d', text[3]) ! assert_equal('ë', text[4]) ! assert_equal('f', text[5]) ! assert_equal('', text[6]) ! assert_equal('', text[999]) ! END ! CheckDefSuccess(lines) ! CheckScriptSuccess(['vim9script'] + lines) enddef def Test_expr7_subscript_linebreak() *** ../vim-8.2.1460/src/version.c 2020-08-15 16:33:24.505747282 +0200 --- src/version.c 2020-08-15 17:08:22.507633662 +0200 *************** *** 756,757 **** --- 756,759 ---- { /* Add new patch number below this line */ + /**/ + 1461, /**/ -- hundred-and-one symptoms of being an internet addict: 210. When you get a divorce, you don't care about who gets the children, but discuss endlessly who can use the email address. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///