To: vim_dev@googlegroups.com Subject: Patch 8.2.0894 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.0894 Problem: :mkspell can take very long if the word count is high. Solution: Use long to avoid negative numbers. Increase the limits by 20% if the compression did not have effect. Files: src/spellfile.c *** ../vim-8.2.0893/src/spellfile.c 2020-04-12 19:37:13.526297236 +0200 --- src/spellfile.c 2020-06-03 20:45:57.235918358 +0200 *************** *** 1995,2001 **** static wordnode_T *get_wordnode(spellinfo_T *spin); static void free_wordnode(spellinfo_T *spin, wordnode_T *n); static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); ! static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot); static int node_equal(wordnode_T *n1, wordnode_T *n2); static void clear_node(wordnode_T *node); static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); --- 1995,2001 ---- static wordnode_T *get_wordnode(spellinfo_T *spin); static void free_wordnode(spellinfo_T *spin, wordnode_T *n); static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); ! static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot); static int node_equal(wordnode_T *n1, wordnode_T *n2); static void clear_node(wordnode_T *node); static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); *************** *** 2019,2030 **** #define CONDIT_AFF 8 // word already has an affix /* ! * Tunable parameters for when the tree is compressed. See 'mkspellmem'. */ static long compress_start = 30000; // memory / SBLOCKSIZE static long compress_inc = 100; // memory / SBLOCKSIZE static long compress_added = 500000; // word count /* * Check the 'mkspellmem' option. Return FAIL if it's wrong. * Sets "sps_flags". --- 2019,2036 ---- #define CONDIT_AFF 8 // word already has an affix /* ! * Tunable parameters for when the tree is compressed. Filled from the ! * 'mkspellmem' option. */ static long compress_start = 30000; // memory / SBLOCKSIZE static long compress_inc = 100; // memory / SBLOCKSIZE static long compress_added = 500000; // word count + // Actually used values. These can change if compression doesn't result in + // reducing the size. + static long used_compress_inc; + static long used_compress_added; + /* * Check the 'mkspellmem' option. Return FAIL if it's wrong. * Sets "sps_flags". *************** *** 4534,4540 **** { if (--spin->si_compress_cnt == 1) // Did enough words to lower the block count limit. ! spin->si_blocks_cnt += compress_inc; } /* --- 4540,4546 ---- { if (--spin->si_compress_cnt == 1) // Did enough words to lower the block count limit. ! spin->si_blocks_cnt += used_compress_inc; } /* *************** *** 4543,4551 **** * need that room, thus only compress in the following situations: * 1. When not compressed before (si_compress_cnt == 0): when using * "compress_start" blocks. ! * 2. When compressed before and used "compress_inc" blocks before ! * adding "compress_added" words (si_compress_cnt > 1). ! * 3. When compressed before, added "compress_added" words * (si_compress_cnt == 1) and the number of free nodes drops below the * maximum word length. */ --- 4549,4557 ---- * need that room, thus only compress in the following situations: * 1. When not compressed before (si_compress_cnt == 0): when using * "compress_start" blocks. ! * 2. When compressed before and used "used_compress_inc" blocks before ! * adding "used_compress_added" words (si_compress_cnt > 1). ! * 3. When compressed before, added "used_compress_added" words * (si_compress_cnt == 1) and the number of free nodes drops below the * maximum word length. */ *************** *** 4556,4566 **** #endif { // Decrement the block counter. The effect is that we compress again ! // when the freed up room has been used and another "compress_inc" ! // blocks have been allocated. Unless "compress_added" words have // been added, then the limit is put back again. ! spin->si_blocks_cnt -= compress_inc; ! spin->si_compress_cnt = compress_added; if (spin->si_verbose) { --- 4562,4572 ---- #endif { // Decrement the block counter. The effect is that we compress again ! // when the freed up room has been used and another "used_compress_inc" ! // blocks have been allocated. Unless "used_compress_added" words have // been added, then the limit is put back again. ! spin->si_blocks_cnt -= used_compress_inc; ! spin->si_compress_cnt = used_compress_added; if (spin->si_verbose) { *************** *** 4655,4663 **** wordtree_compress(spellinfo_T *spin, wordnode_T *root) { hashtab_T ht; ! int n; ! int tot = 0; ! int perc; // Skip the root itself, it's not actually used. The first sibling is the // start of the tree. --- 4661,4669 ---- wordtree_compress(spellinfo_T *spin, wordnode_T *root) { hashtab_T ht; ! long n; ! long tot = 0; ! long perc; // Skip the root itself, it's not actually used. The first sibling is the // start of the tree. *************** *** 4666,4671 **** --- 4672,4685 ---- hash_init(&ht); n = node_compress(spin, root->wn_sibling, &ht, &tot); + if (tot == 0) + { + // Compression did not have effect. Increase the limits by 20% to + // avoid wasting time on compression, memory will be used anyway. + used_compress_inc += used_compress_inc / 5; + used_compress_added += used_compress_added / 5; + } + #ifndef SPELL_PRINTTREE if (spin->si_verbose || p_verbose > 2) #endif *************** *** 4677,4683 **** else perc = (tot - n) * 100 / tot; vim_snprintf((char *)IObuff, IOSIZE, ! _("Compressed %d of %d nodes; %d (%d%%) remaining"), n, tot, tot - n, perc); spell_message(spin, IObuff); } --- 4691,4697 ---- else perc = (tot - n) * 100 / tot; vim_snprintf((char *)IObuff, IOSIZE, ! _("Compressed %ld of %ld nodes; %ld (%ld%%) remaining"), n, tot, tot - n, perc); spell_message(spin, IObuff); } *************** *** 4692,4703 **** * Compress a node, its siblings and its children, depth first. * Returns the number of compressed nodes. */ ! static int node_compress( spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, ! int *tot) // total count of nodes before compressing, // incremented while going through the tree { wordnode_T *np; --- 4706,4717 ---- * Compress a node, its siblings and its children, depth first. * Returns the number of compressed nodes. */ ! static long node_compress( spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, ! long *tot) // total count of nodes before compressing, // incremented while going through the tree { wordnode_T *np; *************** *** 4705,4713 **** wordnode_T *child; hash_T hash; hashitem_T *hi; ! int len = 0; unsigned nr, n; ! int compressed = 0; /* * Go through the list of siblings. Compress each child and then try --- 4719,4727 ---- wordnode_T *child; hash_T hash; hashitem_T *hi; ! long len = 0; unsigned nr, n; ! long compressed = 0; /* * Go through the list of siblings. Compress each child and then try *************** *** 5899,5904 **** --- 5913,5920 ---- ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); hash_init(&spin.si_commonwords); spin.si_newcompID = 127; // start compound ID at first maximum + used_compress_inc = compress_inc; + used_compress_added = compress_added; // default: fnames[0] is output file, following are input files innames = &fnames[1]; *** ../vim-8.2.0893/src/version.c 2020-06-03 19:55:31.115092047 +0200 --- src/version.c 2020-06-03 20:47:04.507391282 +0200 *************** *** 748,749 **** --- 748,751 ---- { /* Add new patch number below this line */ + /**/ + 894, /**/ -- hundred-and-one symptoms of being an internet addict: 270. You are subscribed to a mailing list for every piece of software you use. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///