To: vim_dev@googlegroups.com Subject: Patch 8.2.2813 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.2813 Problem: Cannot grep using fuzzy matching. Solution: Add the "f" flag to :vimgrep. (Yegappan Lakshmanan, closes #8152) Files: runtime/doc/quickfix.txt, src/ex_cmds.c, src/proto/search.pro, src/quickfix.c, src/search.c, src/vim.h, src/testdir/test_quickfix.vim *** ../vim-8.2.2812/runtime/doc/quickfix.txt 2021-01-31 17:02:06.258490157 +0100 --- runtime/doc/quickfix.txt 2021-04-26 20:57:53.242860143 +0200 *************** *** 1008,1014 **** 5.1 using Vim's internal grep *:vim* *:vimgrep* *E682* *E683* ! :vim[grep][!] /{pattern}/[g][j] {file} ... Search for {pattern} in the files {file} ... and set the error list to the matches. Files matching 'wildignore' are ignored; files in 'suffixes' are --- 1008,1014 ---- 5.1 using Vim's internal grep *:vim* *:vimgrep* *E682* *E683* ! :vim[grep][!] /{pattern}/[g][j][f] {file} ... Search for {pattern} in the files {file} ... and set the error list to the matches. Files matching 'wildignore' are ignored; files in 'suffixes' are *************** *** 1025,1030 **** --- 1025,1042 ---- 'smartcase' is not used. If {pattern} is empty (e.g. // is specified), the last used search pattern is used. |last-pattern| + + When the 'f' flag is specified, fuzzy string matching + is used to find matching lines. In this case, + {pattern} is treated as a literal string instead of a + regular expression. See |matchfuzzy()| for more info + about fuzzy matching. + + |QuickFixCmdPre| and |QuickFixCmdPost| are triggered. + A file that is opened for matching may use a buffer + number, but it is reused if possible to avoid + consuming buffer numbers. + :{count}vim[grep] ... When a number is put before the command this is used as the maximum number of matches to find. Use *************** *** 1053,1072 **** :vimgrep Error *.c < *:lv* *:lvimgrep* ! :lv[imgrep][!] /{pattern}/[g][j] {file} ... :lv[imgrep][!] {pattern} {file} ... Same as ":vimgrep", except the location list for the current window is used instead of the quickfix list. *:vimgrepa* *:vimgrepadd* ! :vimgrepa[dd][!] /{pattern}/[g][j] {file} ... :vimgrepa[dd][!] {pattern} {file} ... Just like ":vimgrep", but instead of making a new list of errors the matches are appended to the current list. *:lvimgrepa* *:lvimgrepadd* ! :lvimgrepa[dd][!] /{pattern}/[g][j] {file} ... :lvimgrepa[dd][!] {pattern} {file} ... Same as ":vimgrepadd", except the location list for the current window is used instead of the quickfix --- 1065,1084 ---- :vimgrep Error *.c < *:lv* *:lvimgrep* ! :lv[imgrep][!] /{pattern}/[g][j][f] {file} ... :lv[imgrep][!] {pattern} {file} ... Same as ":vimgrep", except the location list for the current window is used instead of the quickfix list. *:vimgrepa* *:vimgrepadd* ! :vimgrepa[dd][!] /{pattern}/[g][j][f] {file} ... :vimgrepa[dd][!] {pattern} {file} ... Just like ":vimgrep", but instead of making a new list of errors the matches are appended to the current list. *:lvimgrepa* *:lvimgrepadd* ! :lvimgrepa[dd][!] /{pattern}/[g][j][f] {file} ... :lvimgrepa[dd][!] {pattern} {file} ... Same as ":vimgrepadd", except the location list for the current window is used instead of the quickfix *** ../vim-8.2.2812/src/ex_cmds.c 2021-04-19 16:48:44.435055499 +0200 --- src/ex_cmds.c 2021-04-26 20:49:47.376741284 +0200 *************** *** 5288,5301 **** ++p; // Find the flags ! while (*p == 'g' || *p == 'j') { if (flags != NULL) { if (*p == 'g') *flags |= VGR_GLOBAL; ! else *flags |= VGR_NOJUMP; } ++p; } --- 5288,5303 ---- ++p; // Find the flags ! while (*p == 'g' || *p == 'j' || *p == 'f') { if (flags != NULL) { if (*p == 'g') *flags |= VGR_GLOBAL; ! else if (*p == 'j') *flags |= VGR_NOJUMP; + else + *flags |= VGR_FUZZY; } ++p; } *** ../vim-8.2.2812/src/proto/search.pro 2020-09-22 20:33:30.437223175 +0200 --- src/proto/search.pro 2021-04-26 20:49:47.376741284 +0200 *************** *** 36,41 **** --- 36,42 ---- spat_T *get_spat(int idx); int get_spat_last_idx(void); void f_searchcount(typval_T *argvars, typval_T *rettv); + int fuzzy_match(char_u *str, char_u *pat_arg, int matchseq, int *outScore, int_u *matches, int maxMatches); void f_matchfuzzy(typval_T *argvars, typval_T *rettv); void f_matchfuzzypos(typval_T *argvars, typval_T *rettv); /* vim: set ft=c : */ *** ../vim-8.2.2812/src/quickfix.c 2021-02-06 19:19:38.921813939 +0100 --- src/quickfix.c 2021-04-26 20:49:47.376741284 +0200 *************** *** 5912,5917 **** --- 5912,5918 ---- qf_list_T *qfl, char_u *fname, buf_T *buf, + char_u *spat, regmmatch_T *regmatch, long *tomatch, int duplicate_name, *************** *** 5920,5964 **** int found_match = FALSE; long lnum; colnr_T col; for (lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; ++lnum) { col = 0; ! while (vim_regexec_multi(regmatch, curwin, buf, lnum, ! col, NULL, NULL) > 0) { ! // Pass the buffer number so that it gets used even for a ! // dummy buffer, unless duplicate_name is set, then the ! // buffer will be wiped out below. ! if (qf_add_entry(qfl, ! NULL, // dir ! fname, ! NULL, ! duplicate_name ? 0 : buf->b_fnum, ! ml_get_buf(buf, ! regmatch->startpos[0].lnum + lnum, FALSE), ! regmatch->startpos[0].lnum + lnum, ! regmatch->startpos[0].col + 1, ! FALSE, // vis_col ! NULL, // search pattern ! 0, // nr ! 0, // type ! TRUE // valid ! ) == QF_FAIL) { ! got_int = TRUE; ! break; } - found_match = TRUE; - if (--*tomatch == 0) - break; - if ((flags & VGR_GLOBAL) == 0 - || regmatch->endpos[0].lnum > 0) - break; - col = regmatch->endpos[0].col - + (col == regmatch->endpos[0].col); - if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, FALSE))) - break; } line_breakcheck(); if (got_int) --- 5921,6011 ---- int found_match = FALSE; long lnum; colnr_T col; + int pat_len = STRLEN(spat); for (lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; ++lnum) { col = 0; ! if (!(flags & VGR_FUZZY)) { ! // Regular expression match ! while (vim_regexec_multi(regmatch, curwin, buf, lnum, ! col, NULL, NULL) > 0) { ! // Pass the buffer number so that it gets used even for a ! // dummy buffer, unless duplicate_name is set, then the ! // buffer will be wiped out below. ! if (qf_add_entry(qfl, ! NULL, // dir ! fname, ! NULL, ! duplicate_name ? 0 : buf->b_fnum, ! ml_get_buf(buf, ! regmatch->startpos[0].lnum + lnum, FALSE), ! regmatch->startpos[0].lnum + lnum, ! regmatch->startpos[0].col + 1, ! FALSE, // vis_col ! NULL, // search pattern ! 0, // nr ! 0, // type ! TRUE // valid ! ) == QF_FAIL) ! { ! got_int = TRUE; ! break; ! } ! found_match = TRUE; ! if (--*tomatch == 0) ! break; ! if ((flags & VGR_GLOBAL) == 0 ! || regmatch->endpos[0].lnum > 0) ! break; ! col = regmatch->endpos[0].col ! + (col == regmatch->endpos[0].col); ! if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, FALSE))) ! break; ! } ! } ! else ! { ! char_u *str = ml_get_buf(buf, lnum, FALSE); ! int score; ! int_u matches[MAX_FUZZY_MATCHES]; ! int_u sz = sizeof(matches) / sizeof(matches[0]); ! ! // Fuzzy string match ! while (fuzzy_match(str + col, spat, FALSE, &score, matches, sz) > 0) ! { ! // Pass the buffer number so that it gets used even for a ! // dummy buffer, unless duplicate_name is set, then the ! // buffer will be wiped out below. ! if (qf_add_entry(qfl, ! NULL, // dir ! fname, ! NULL, ! duplicate_name ? 0 : buf->b_fnum, ! str, ! lnum, ! matches[0] + col + 1, ! FALSE, // vis_col ! NULL, // search pattern ! 0, // nr ! 0, // type ! TRUE // valid ! ) == QF_FAIL) ! { ! got_int = TRUE; ! break; ! } ! found_match = TRUE; ! if (--*tomatch == 0) ! break; ! if ((flags & VGR_GLOBAL) == 0) ! break; ! col = matches[pat_len - 1] + col + 1; ! if (col > (colnr_T)STRLEN(str)) ! break; } } line_breakcheck(); if (got_int) *************** *** 6163,6169 **** // Try for a match in all lines of the buffer. // For ":1vimgrep" look for first match only. found_match = vgr_match_buflines(qf_get_curlist(qi), ! fname, buf, &cmd_args->regmatch, &cmd_args->tomatch, duplicate_name, cmd_args->flags); if (using_dummy) --- 6210,6216 ---- // Try for a match in all lines of the buffer. // For ":1vimgrep" look for first match only. found_match = vgr_match_buflines(qf_get_curlist(qi), ! fname, buf, cmd_args->spat, &cmd_args->regmatch, &cmd_args->tomatch, duplicate_name, cmd_args->flags); if (using_dummy) *** ../vim-8.2.2812/src/search.c 2021-04-02 18:55:52.058322772 +0200 --- src/search.c 2021-04-26 20:49:47.376741284 +0200 *************** *** 4285,4294 **** #define SCORE_NONE -9999 #define FUZZY_MATCH_RECURSION_LIMIT 10 - // Maximum number of characters that can be fuzzy matched - #define MAXMATCHES 256 - - typedef int_u matchidx_T; /* * Compute a score for a fuzzy matched string. The matching character locations --- 4285,4290 ---- *************** *** 4298,4304 **** fuzzy_match_compute_score( char_u *str, int strSz, ! matchidx_T *matches, int numMatches) { int score; --- 4294,4300 ---- fuzzy_match_compute_score( char_u *str, int strSz, ! int_u *matches, int numMatches) { int score; *************** *** 4306,4312 **** int unmatched; int i; char_u *p = str; ! matchidx_T sidx = 0; // Initialize score score = 100; --- 4302,4308 ---- int unmatched; int i; char_u *p = str; ! int_u sidx = 0; // Initialize score score = 100; *************** *** 4324,4334 **** // Apply ordering bonuses for (i = 0; i < numMatches; ++i) { ! matchidx_T currIdx = matches[i]; if (i > 0) { ! matchidx_T prevIdx = matches[i - 1]; // Sequential if (currIdx == (prevIdx + 1)) --- 4320,4330 ---- // Apply ordering bonuses for (i = 0; i < numMatches; ++i) { ! int_u currIdx = matches[i]; if (i > 0) { ! int_u prevIdx = matches[i - 1]; // Sequential if (currIdx == (prevIdx + 1)) *************** *** 4386,4404 **** fuzzy_match_recursive( char_u *fuzpat, char_u *str, ! matchidx_T strIdx, int *outScore, char_u *strBegin, int strLen, ! matchidx_T *srcMatches, ! matchidx_T *matches, int maxMatches, int nextMatch, int *recursionCount) { // Recursion params int recursiveMatch = FALSE; ! matchidx_T bestRecursiveMatches[MAXMATCHES]; int bestRecursiveScore = 0; int first_match; int matched; --- 4382,4400 ---- fuzzy_match_recursive( char_u *fuzpat, char_u *str, ! int_u strIdx, int *outScore, char_u *strBegin, int strLen, ! int_u *srcMatches, ! int_u *matches, int maxMatches, int nextMatch, int *recursionCount) { // Recursion params int recursiveMatch = FALSE; ! int_u bestRecursiveMatches[MAX_FUZZY_MATCHES]; int bestRecursiveScore = 0; int first_match; int matched; *************** *** 4409,4415 **** return 0; // Detect end of strings ! if (*fuzpat == '\0' || *str == '\0') return 0; // Loop through fuzpat and str looking for a match --- 4405,4411 ---- return 0; // Detect end of strings ! if (*fuzpat == NUL || *str == NUL) return 0; // Loop through fuzpat and str looking for a match *************** *** 4425,4431 **** // Found match if (vim_tolower(c1) == vim_tolower(c2)) { ! matchidx_T recursiveMatches[MAXMATCHES]; int recursiveScore = 0; char_u *next_char; --- 4421,4427 ---- // Found match if (vim_tolower(c1) == vim_tolower(c2)) { ! int_u recursiveMatches[MAX_FUZZY_MATCHES]; int recursiveScore = 0; char_u *next_char; *************** *** 4455,4461 **** if (!recursiveMatch || recursiveScore > bestRecursiveScore) { memcpy(bestRecursiveMatches, recursiveMatches, ! MAXMATCHES * sizeof(recursiveMatches[0])); bestRecursiveScore = recursiveScore; } recursiveMatch = TRUE; --- 4451,4457 ---- if (!recursiveMatch || recursiveScore > bestRecursiveScore) { memcpy(bestRecursiveMatches, recursiveMatches, ! MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0])); bestRecursiveScore = recursiveScore; } recursiveMatch = TRUE; *************** *** 4506,4524 **** * normalized and varies with pattern. * Recursion is limited internally (default=10) to prevent degenerate cases * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). ! * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES ! * characters. * * Returns TRUE if 'pat_arg' matches 'str'. Also returns the match score in * 'outScore' and the matching character positions in 'matches'. */ ! static int fuzzy_match( char_u *str, char_u *pat_arg, int matchseq, int *outScore, ! matchidx_T *matches, int maxMatches) { int recursionCount = 0; --- 4502,4520 ---- * normalized and varies with pattern. * Recursion is limited internally (default=10) to prevent degenerate cases * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). ! * Uses char_u for match indices. Therefore patterns are limited to ! * MAX_FUZZY_MATCHES characters. * * Returns TRUE if 'pat_arg' matches 'str'. Also returns the match score in * 'outScore' and the matching character positions in 'matches'. */ ! int fuzzy_match( char_u *str, char_u *pat_arg, int matchseq, int *outScore, ! int_u *matches, int maxMatches) { int recursionCount = 0; *************** *** 4630,4636 **** listitem_T *li; long i = 0; int found_match = FALSE; ! matchidx_T matches[MAXMATCHES]; len = list_len(items); if (len == 0) --- 4626,4632 ---- listitem_T *li; long i = 0; int found_match = FALSE; ! int_u matches[MAX_FUZZY_MATCHES]; len = list_len(items); if (len == 0) *************** *** 4847,4853 **** return; } } ! if ((di = dict_find(d, (char_u *)"matchseq", -1)) != NULL) matchseq = TRUE; } --- 4843,4849 ---- return; } } ! if (dict_find(d, (char_u *)"matchseq", -1) != NULL) matchseq = TRUE; } *** ../vim-8.2.2812/src/vim.h 2021-04-16 19:58:15.915249728 +0200 --- src/vim.h 2021-04-26 20:49:47.380741268 +0200 *************** *** 2457,2462 **** --- 2457,2463 ---- // flags for skip_vimgrep_pat() #define VGR_GLOBAL 1 #define VGR_NOJUMP 2 + #define VGR_FUZZY 4 // behavior for bad character, "++bad=" argument #define BAD_REPLACE '?' // replace it with '?' (default) *************** *** 2711,2714 **** --- 2712,2718 ---- #define EVAL_VAR_NOAUTOLOAD 2 // do not use script autoloading #define EVAL_VAR_IMPORT 4 // may return special variable for import + // Maximum number of characters that can be fuzzy matched + #define MAX_FUZZY_MATCHES 256 + #endif // VIM__H *** ../vim-8.2.2812/src/testdir/test_quickfix.vim 2021-03-05 20:58:19.240480141 +0100 --- src/testdir/test_quickfix.vim 2021-04-26 20:49:47.380741268 +0200 *************** *** 32,38 **** command! -count -nargs=* -bang Xnfile cnfile command! -nargs=* -bang Xpfile cpfile command! -nargs=* Xexpr cexpr ! command! -count -nargs=* Xvimgrep vimgrep command! -nargs=* Xvimgrepadd vimgrepadd command! -nargs=* Xgrep grep command! -nargs=* Xgrepadd grepadd --- 32,38 ---- command! -count -nargs=* -bang Xnfile cnfile command! -nargs=* -bang Xpfile cpfile command! -nargs=* Xexpr cexpr ! command! -count=999 -nargs=* Xvimgrep vimgrep command! -nargs=* Xvimgrepadd vimgrepadd command! -nargs=* Xgrep grep command! -nargs=* Xgrepadd grepadd *************** *** 69,75 **** command! -count -nargs=* -bang Xnfile lnfile command! -nargs=* -bang Xpfile lpfile command! -nargs=* Xexpr lexpr ! command! -count -nargs=* Xvimgrep lvimgrep command! -nargs=* Xvimgrepadd lvimgrepadd command! -nargs=* Xgrep lgrep command! -nargs=* Xgrepadd lgrepadd --- 69,75 ---- command! -count -nargs=* -bang Xnfile lnfile command! -nargs=* -bang Xpfile lpfile command! -nargs=* Xexpr lexpr ! command! -count=999 -nargs=* Xvimgrep lvimgrep command! -nargs=* Xvimgrepadd lvimgrepadd command! -nargs=* Xgrep lgrep command! -nargs=* Xgrepadd lgrepadd *************** *** 5372,5375 **** --- 5372,5421 ---- set swapfile endfunc + " Test for the :vimgrep 'f' flag (fuzzy match) + func Xvimgrep_fuzzy_match(cchar) + call s:setup_commands(a:cchar) + + Xvimgrep /three one/f Xfile* + let l = g:Xgetlist() + call assert_equal(2, len(l)) + call assert_equal(['Xfile1', 1, 9, 'one two three'], + \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text]) + call assert_equal(['Xfile2', 2, 1, 'three one two'], + \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text]) + + Xvimgrep /the/f Xfile* + let l = g:Xgetlist() + call assert_equal(3, len(l)) + call assert_equal(['Xfile1', 1, 9, 'one two three'], + \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text]) + call assert_equal(['Xfile2', 2, 1, 'three one two'], + \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text]) + call assert_equal(['Xfile2', 4, 4, 'aaathreeaaa'], + \ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text]) + + Xvimgrep /aaa/fg Xfile* + let l = g:Xgetlist() + call assert_equal(4, len(l)) + call assert_equal(['Xfile1', 2, 1, 'aaaaaa'], + \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text]) + call assert_equal(['Xfile1', 2, 4, 'aaaaaa'], + \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text]) + call assert_equal(['Xfile2', 4, 1, 'aaathreeaaa'], + \ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text]) + call assert_equal(['Xfile2', 4, 9, 'aaathreeaaa'], + \ [bufname(l[3].bufnr), l[3].lnum, l[3].col, l[3].text]) + + call assert_fails('Xvimgrep /xyz/fg Xfile*', 'E480:') + endfunc + + func Test_vimgrep_fuzzy_match() + call writefile(['one two three', 'aaaaaa'], 'Xfile1') + call writefile(['one', 'three one two', 'two', 'aaathreeaaa'], 'Xfile2') + call Xvimgrep_fuzzy_match('c') + call Xvimgrep_fuzzy_match('l') + call delete('Xfile1') + call delete('Xfile2') + endfunc + " vim: shiftwidth=2 sts=2 expandtab *** ../vim-8.2.2812/src/version.c 2021-04-26 20:32:54.918399234 +0200 --- src/version.c 2021-04-26 21:12:50.033856714 +0200 *************** *** 752,753 **** --- 752,755 ---- { /* Add new patch number below this line */ + /**/ + 2813, /**/ -- If Microsoft would build a car... ... You'd have to press the "Start" button to turn the engine off. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// \\\ \\\ sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///