鬼車の5.9.2で未修正のバグについて
鬼車の最新版である5.9.2にはいくつかバグが存在する。以下に述べる修正はRuby 1.9で行われたパッチを他の実装のために公開する物であり、Ruby'sまたは鬼車のライセンスとする。
r29102
commit b3545895d1bb5a72e1311022c69b882d6ce90033 Author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> Date: Thu Aug 26 01:50:07 2010 +0000 * regint.h (OnigStackIndex): the type should be intptr_t. Original Oniguruma assumes the size of long and that of void * are equal, but it's not true on LLP64 platform: mswin64. originally patched by shintaro kuwamoto [ruby-dev:42133] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29102 b2dd03c8-39d4-4d8f-98ff-823fe69b080e diff --git a/regint.h b/regint.h index 633cd88..ca9f2b1 100644 --- a/regint.h +++ b/regint.h @@ -720,7 +720,7 @@ typedef struct { BBuf* mbuf; /* multi-byte info or NULL */ } CClassNode; -typedef long OnigStackIndex; +typedef intptr_t OnigStackIndex; typedef struct _OnigStackType { unsigned int type;
このパッチの原案はkuwamotoさんによるものだが、[ruby-dev:42141]にてRuby'sまたは鬼車のライセンスとする了解を得ている。
r28648
commit b4608406f17097508d4666ceacf8764a746efa3e Author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> Date: Thu Jul 15 06:55:42 2010 +0000 * regexec.c (onig_search): don't skip non-ANYCHARs when .* fails to match. This causes to fail matching ANYCHAR_STAR with LOOK_BEHIND. This fix is workaround and disable the optimization. [ruby-dev:41851] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@28648 b2dd03c8-39d4-4d8f-98ff-823fe69b080e diff --git a/regexec.c b/regexec.c index 24422b1..c78d8ea 100644 --- a/regexec.c +++ b/regexec.c @@ -3636,11 +3636,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, MATCH_AND_RETURN_CHECK(orig_range); prev = s; s += enclen(reg->enc, s, end); - - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s, end); - } } while (s < range); goto mismatch; }
このパッチによって報告されているバグは直るが、副作用として.*の最適化が出来なくなる。
r26796
commit 76a5c00be6ee10310571f6dac5147c212f5be5d9 Author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> Date: Tue Mar 2 09:40:27 2010 +0000 * regcomp.c (noname_disable_map): add NT_ANCHOR case. Without this change, captured groups in anchors (look-ahead, look-behind, and so on) are not removed and unintended invalid backref error occur. [ruby-core:28235] * regcomp.c (renumber_by_map): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26796 b2dd03c8-39d4-4d8f-98ff-823fe69b080e diff --git a/regcomp.c b/regcomp.c index 57c0262..b681e66 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1794,6 +1794,20 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) } break; + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = noname_disable_map(&(an->target), map, counter); + break; + } + } + break; + default: break; } @@ -1852,6 +1866,20 @@ renumber_by_map(Node* node, GroupNumRemap* map) r = renumber_node_backref(node, map); break; + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = renumber_by_map(an->target, map); + break; + } + } + break; + default: break; }
r29939
diff --git a/regcomp.c b/regcomp.c index 4508bcf..aaa5698 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3692,6 +3692,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) int type; int r = 0; +restart: type = NTYPE(node); switch (type) { case NT_LIST: @@ -3906,6 +3907,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); if (r != 0) return r; + if (NTYPE(node) != NT_ANCHOR) goto restart; r = setup_tree(an->target, reg, state, env); } break; @@ -3918,6 +3920,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); if (r != 0) return r; + if (NTYPE(node) != NT_ANCHOR) goto restart; r = setup_tree(an->target, reg, (state | IN_NOT), env); } break; @@ -5351,6 +5354,15 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); if (r != 0) goto err; +#ifdef ONIG_DEBUG_PARSE_TREE +# if 0 + fprintf(stderr, "ORIGINAL PARSE TREE:\n"); + if (!onig_is_prelude()) { + print_tree(stderr, root); + } +# endif +#endif + #ifdef USE_NAMED_GROUP /* mixed use named group and no-named group */ if (scan_env.num_named > 0 &&