鬼車の5.9.2で未修正のバグについて

鬼車の最新版である5.9.2にはいくつかバグが存在する。以下に述べる修正はRuby 1.9で行われたパッチを他の実装のために公開する物であり、Ruby'sまたは鬼車のライセンスとする。

r29102

commit b3545895d1bb5a72e1311022c69b882d6ce90033
Author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date:   Thu Aug 26 01:50:07 2010 +0000

    * regint.h (OnigStackIndex): the type should be intptr_t.
      Original Oniguruma assumes the size of long and that of void *
      are equal, but it's not true on LLP64 platform: mswin64.
      originally patched by shintaro kuwamoto [ruby-dev:42133]
    
    git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29102 b2dd03c8-39d4-4d8f-98ff-823fe69b080e

diff --git a/regint.h b/regint.h
index 633cd88..ca9f2b1 100644
--- a/regint.h
+++ b/regint.h
@@ -720,7 +720,7 @@ typedef struct {
   BBuf*  mbuf;   /* multi-byte info or NULL */
 } CClassNode;
 
-typedef long OnigStackIndex;
+typedef intptr_t OnigStackIndex;
 
 typedef struct _OnigStackType {
   unsigned int type;

このパッチの原案はkuwamotoさんによるものだが、[ruby-dev:42141]にてRuby'sまたは鬼車のライセンスとする了解を得ている。

r28648

commit b4608406f17097508d4666ceacf8764a746efa3e
Author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date:   Thu Jul 15 06:55:42 2010 +0000

    * regexec.c (onig_search): don't skip non-ANYCHARs when
      .* fails to match. This causes to fail matching
      ANYCHAR_STAR with LOOK_BEHIND. This fix is workaround
      and disable the optimization. [ruby-dev:41851]
    
    git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@28648 b2dd03c8-39d4-4d8f-98ff-823fe69b080e

diff --git a/regexec.c b/regexec.c
index 24422b1..c78d8ea 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3636,11 +3636,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
             MATCH_AND_RETURN_CHECK(orig_range);
             prev = s;
             s += enclen(reg->enc, s, end);
-
-            while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
-              prev = s;
-              s += enclen(reg->enc, s, end);
-            }
           } while (s < range);
           goto mismatch;
         }

このパッチによって報告されているバグは直るが、副作用として.*の最適化が出来なくなる。

r26796

commit 76a5c00be6ee10310571f6dac5147c212f5be5d9
Author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date:   Tue Mar 2 09:40:27 2010 +0000

    * regcomp.c (noname_disable_map): add NT_ANCHOR case.
      Without this change, captured groups in anchors (look-ahead,
      look-behind, and so on) are not removed and
      unintended invalid backref error occur. [ruby-core:28235]
    
    * regcomp.c (renumber_by_map): ditto.
    
    git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26796 b2dd03c8-39d4-4d8f-98ff-823fe69b080e

diff --git a/regcomp.c b/regcomp.c
index 57c0262..b681e66 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1794,6 +1794,20 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
     }
     break;
 
+  case NT_ANCHOR:
+    {
+      AnchorNode* an = NANCHOR(node);
+      switch (an->type) {
+      case ANCHOR_PREC_READ:
+      case ANCHOR_PREC_READ_NOT:
+      case ANCHOR_LOOK_BEHIND:
+      case ANCHOR_LOOK_BEHIND_NOT:
+       r = noname_disable_map(&(an->target), map, counter);
+       break;
+      }
+    }
+    break;
+
   default:
     break;
   }
@@ -1852,6 +1866,20 @@ renumber_by_map(Node* node, GroupNumRemap* map)
     r = renumber_node_backref(node, map);
     break;
 
+  case NT_ANCHOR:
+    {
+      AnchorNode* an = NANCHOR(node);
+      switch (an->type) {
+      case ANCHOR_PREC_READ:
+      case ANCHOR_PREC_READ_NOT:
+      case ANCHOR_LOOK_BEHIND:
+      case ANCHOR_LOOK_BEHIND_NOT:
+       r = renumber_by_map(an->target, map);
+       break;
+      }
+    }
+    break;
+
   default:
     break;
   }

r29939

diff --git a/regcomp.c b/regcomp.c
index 4508bcf..aaa5698 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3692,6 +3692,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
   int type;
   int r = 0;
 
+restart:
   type = NTYPE(node);
   switch (type) {
   case NT_LIST:
@@ -3906,6 +3907,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
 	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
 	  r = setup_look_behind(node, reg, env);
 	  if (r != 0) return r;
+	  if (NTYPE(node) != NT_ANCHOR) goto restart;
 	  r = setup_tree(an->target, reg, state, env);
 	}
 	break;
@@ -3918,6 +3920,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
 	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
 	  r = setup_look_behind(node, reg, env);
 	  if (r != 0) return r;
+	  if (NTYPE(node) != NT_ANCHOR) goto restart;
 	  r = setup_tree(an->target, reg, (state | IN_NOT), env);
 	}
 	break;
@@ -5351,6 +5354,15 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
   if (r != 0) goto err;
 
+#ifdef ONIG_DEBUG_PARSE_TREE
+# if 0
+  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
+  if (!onig_is_prelude()) {
+    print_tree(stderr, root);
+  }
+# endif
+#endif
+
 #ifdef USE_NAMED_GROUP
   /* mixed use named group and no-named group */
   if (scan_env.num_named > 0 &&