[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commits] r7504 - in /trunk/libc: ./ posix/
- To: commits@xxxxxxxxxx
- Subject: [commits] r7504 - in /trunk/libc: ./ posix/
- From: maxim@xxxxxxxxxx
- Date: Tue, 09 Dec 2008 15:32:39 -0000
Author: maxim
Date: Tue Dec 9 07:32:39 2008
New Revision: 7504
Log:
Support smaller regex implementation.
* option-groups.def (OPTION_POSIX_REGEXP_GLIBC): New option group.
* option-groups.defaults (OPTION_POSIX_REGEXP_GLIBC): Set default.
* posix/regexec.c: Move a piece of code to ...
* posix/regexec-compat.c: New file.
* posix/Makefile (tests): Same for runptests, bug-regex11,
bug-regex13, bug-regex16, tst-regex2, tst-rxspencer, tst-pcre,
tst-boost, tst-regex, bug-regex17, bug-regex18, bug-regex19,
bug-regex20, bug-regex22, bug-regex14-mem,
tst-rxspence-mem, tst-pcre-mem and tst-boost-mem.
* posix/xregex.c: New file ported from libiberty.
Added:
trunk/libc/posix/regexec-compat.c
trunk/libc/posix/xregex.c
Modified:
trunk/libc/ChangeLog.eglibc
trunk/libc/option-groups.def
trunk/libc/option-groups.defaults
trunk/libc/posix/Makefile
trunk/libc/posix/regex.h
trunk/libc/posix/regexec.c
Modified: trunk/libc/ChangeLog.eglibc
==============================================================================
--- trunk/libc/ChangeLog.eglibc (original)
+++ trunk/libc/ChangeLog.eglibc Tue Dec 9 07:32:39 2008
@@ -1,3 +1,18 @@
+2008-12-09 Maxim Kuvyrkov <maxim@xxxxxxxxxxxxxxxx>
+
+ Support smaller regex implementation.
+
+ * option-groups.def (OPTION_POSIX_REGEXP_GLIBC): New option group.
+ * option-groups.defaults (OPTION_POSIX_REGEXP_GLIBC): Set default.
+ * posix/regexec.c: Move a piece of code to ...
+ * posix/regexec-compat.c: New file.
+ * posix/Makefile (tests): Same for runptests, bug-regex11,
+ bug-regex13, bug-regex16, tst-regex2, tst-rxspencer, tst-pcre,
+ tst-boost, tst-regex, bug-regex17, bug-regex18, bug-regex19,
+ bug-regex20, bug-regex22, bug-regex14-mem,
+ tst-rxspence-mem, tst-pcre-mem and tst-boost-mem.
+ * posix/xregex.c: New file ported from libiberty.
+
2008-11-19 Nathan Sidwell <nathan@xxxxxxxxxxxxxxxx>
* sysdeps/unix/sysv/linux/sparc/bits/siginfo.h (struct sigevent):
Modified: trunk/libc/option-groups.def
==============================================================================
--- trunk/libc/option-groups.def (original)
+++ trunk/libc/option-groups.def Tue Dec 9 07:32:39 2008
@@ -696,6 +696,17 @@
<regexp.h> header file, 'compile', 'step', and 'advance', is
omitted.
+config OPTION_POSIX_REGEXP_GLIBC
+ bool "Regular expressions from GLIBC"
+ depends OPTION_POSIX_REGEXP
+ help
+ This option group specifies which regular expression
+ library to use. The choice is between regex
+ implementation from GLIBC and regex implementation from
+ libiberty. The GLIBC variant is fully POSIX conformant and
+ optimized for speed; regex from libiberty is more than twice
+ as small while still is enough for most practical purposes.
+
config OPTION_POSIX_WIDE_CHAR_DEVICE_IO
bool "Input and output functions for wide characters"
depends OPTION_POSIX_C_LANG_WIDE_CHAR
Modified: trunk/libc/option-groups.defaults
==============================================================================
--- trunk/libc/option-groups.defaults (original)
+++ trunk/libc/option-groups.defaults Tue Dec 9 07:32:39 2008
@@ -33,4 +33,5 @@
OPTION_EGLIBC_WORDEXP = y
OPTION_POSIX_C_LANG_WIDE_CHAR = y
OPTION_POSIX_REGEXP = y
+OPTION_POSIX_REGEXP_GLIBC = y
OPTION_POSIX_WIDE_CHAR_DEVICE_IO = y
Modified: trunk/libc/posix/Makefile
==============================================================================
--- trunk/libc/posix/Makefile (original)
+++ trunk/libc/posix/Makefile Tue Dec 9 07:32:39 2008
@@ -61,7 +61,13 @@
get_child_max sched_cpucount sched_cpualloc sched_cpufree
routines-$(OPTION_EGLIBC_INET) += getaddrinfo gai_strerror
+
+ifeq (y,$(OPTION_POSIX_REGEXP_GLIBC))
routines-$(OPTION_POSIX_REGEXP) += regex
+else
+routines-$(OPTION_POSIX_REGEXP) += xregex
+endif
+
routines-$(OPTION_EGLIBC_SPAWN) += \
spawn_faction_init spawn_faction_destroy spawn_faction_addclose \
spawn_faction_addopen spawn_faction_adddup2 \
@@ -78,18 +84,18 @@
include ../Makeconfig
aux := init-posix environ
-tests := tstgetopt testfnm runtests runptests \
+tests := tstgetopt testfnm runtests \
tst-preadwrite tst-preadwrite64 test-vfork regexbug1 \
tst-getlogin tst-mmap tst-truncate \
tst-truncate64 tst-fork tst-dir \
tst-chmod bug-regex2 bug-regex3 bug-regex4 \
tst-gnuglob bug-regex6 bug-regex7 \
- bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \
- bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
+ bug-regex8 bug-regex9 bug-regex10 bug-regex12 \
+ bug-regex14 bug-regex15 \
bug-regex21 bug-regex24 \
bug-regex27 bug-regex28 \
- tst-nice tst-nanosleep tst-regex2 \
- transbug tst-rxspencer tst-pcre tst-boost \
+ tst-nice tst-nanosleep \
+ transbug \
tst-vfork1 tst-vfork2 tst-vfork3 tst-waitid \
bug-glob1 bug-glob2 tst-sysconf \
tst-execvp1 tst-execvp2 tst-execlp1 tst-execlp2 \
@@ -98,12 +104,18 @@
tst-execvp3 tst-execvp4 \
tst-fnmatch2 tst-cpucount tst-cpuset
tests-$(OPTION_EGLIBC_LOCALE_CODE) \
- += tst-fnmatch tst-regex tst-regexloc bug-regex1 bug-regex5 \
- bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
- bug-regex22 bug-regex23 bug-regex25 bug-regex26
+ += tst-fnmatch tst-regexloc bug-regex1 bug-regex5 \
+ bug-regex23 bug-regex25
tests-$(OPTION_EGLIBC_INET) \
+= tst-getaddrinfo bug-ga1 tst-getaddrinfo2 \
tst-rfc3484 tst-rfc3484-2 tst-rfc3484-3 tst-getaddrinfo3
+tests-$(OPTION_POSIX_REGEXP_GLIBC) \
+ += runptests bug-regex11 bug-regex13 bug-regex16 \
+ tst-regex2 tst-rxspencer tst-pcre tst-boost
+ifeq (yy,$(OPTION_EGLIBC_LOCALE_CODE)$(OPTION_POSIX_REGEXP_GLIBC))
+tests += tst-regex bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
+ bug-regex22 bug-regex26
+endif
xtests-$(OPTION_EGLIBC_INET) += bug-ga2
ifeq (yes,$(build-shared))
test-srcs := globtest
@@ -242,10 +254,14 @@
# XXX Please note that for now we ignore the result of this test.
tests: $(objpfx)annexc.out
# eglibc: ifeq (no,$(cross-compiling))
-tests: $(objpfx)bug-regex2-mem $(objpfx)bug-regex14-mem \
- $(objpfx)bug-regex21-mem $(objpfx)tst-rxspencer-mem \
- $(objpfx)tst-pcre-mem $(objpfx)tst-boost-mem $(objpfx)tst-getconf.out \
+tests: $(objpfx)bug-regex2-mem \
+ $(objpfx)bug-regex21-mem \
+ $(objpfx)tst-getconf.out \
$(objpfx)bug-glob2-mem $(objpfx)tst-vfork3-mem
+ifeq (y,($OPTION_POSIX_REGEXP_GLIBC))
+tests: $(objpfx)bug-regex14-mem $(objpfx)tst-rxspencer-mem \
+ $(objpfx)tst-pcre-mem $(objpfx)tst-boost-mem
+endif
xtests: $(objpfx)bug-ga2-mem
# eglibc: endif
Modified: trunk/libc/posix/regex.h
==============================================================================
--- trunk/libc/posix/regex.h (original)
+++ trunk/libc/posix/regex.h Tue Dec 9 07:32:39 2008
@@ -23,6 +23,7 @@
#define _REGEX_H 1
#include <sys/types.h>
+#include <gnu/option-groups.h>
/* Allow the use in C++ code. */
#ifdef __cplusplus
@@ -158,6 +159,8 @@
treated as 'a\{1'. */
# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+/* EGLIBC: Old regex implementation does not support these. */
+# ifdef __OPTION_POSIX_REGEXP_GLIBC
/* If this bit is set, then ignore case when matching.
If not set, then case is significant. */
# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
@@ -174,6 +177,7 @@
/* If this bit is set, then no_sub will be set to 1 during
re_compile_pattern. */
# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+# endif /* __OPTION_POSIX_REGEXP_GLIBC */
#endif
/* This global variable defines the particular regexp syntax to use (for
@@ -230,8 +234,13 @@
(RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
| RE_INTERVALS | RE_NO_EMPTY_RANGES)
+#ifdef __OPTION_POSIX_REGEXP_GLIBC
#define RE_SYNTAX_POSIX_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
+#else
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+#endif
/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
@@ -297,9 +306,11 @@
/* Like REG_NOTBOL, except for the end-of-line. */
#define REG_NOTEOL (1 << 1)
+#ifdef __OPTION_POSIX_REGEXP_GLIBC
/* Use PMATCH[0] to delimit the start and end of the search in the
buffer. */
#define REG_STARTEND (1 << 2)
+#endif
/* If any error codes are removed, changed, or added, update the
Added: trunk/libc/posix/regexec-compat.c
==============================================================================
--- trunk/libc/posix/regexec-compat.c (added)
+++ trunk/libc/posix/regexec-compat.c Tue Dec 9 07:32:39 2008
@@ -1,0 +1,39 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@xxxxxxxxxxxxxx>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef _LIBC
+# include <shlib-compat.h>
+versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
+
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+__typeof__ (__regexec) __compat_regexec;
+
+int
+attribute_compat_text_section
+__compat_regexec (const regex_t *__restrict preg,
+ const char *__restrict string, size_t nmatch,
+ regmatch_t pmatch[], int eflags)
+{
+ return regexec (preg, string, nmatch, pmatch,
+ eflags & (REG_NOTBOL | REG_NOTEOL));
+}
+compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
+# endif
+#endif
Modified: trunk/libc/posix/regexec.c
==============================================================================
--- trunk/libc/posix/regexec.c (original)
+++ trunk/libc/posix/regexec.c Tue Dec 9 07:32:39 2008
@@ -254,25 +254,9 @@
return err != REG_NOERROR;
}
-#ifdef _LIBC
-# include <shlib-compat.h>
-versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
-
-# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
-__typeof__ (__regexec) __compat_regexec;
-
-int
-attribute_compat_text_section
-__compat_regexec (const regex_t *__restrict preg,
- const char *__restrict string, size_t nmatch,
- regmatch_t pmatch[], int eflags)
-{
- return regexec (preg, string, nmatch, pmatch,
- eflags & (REG_NOTBOL | REG_NOTEOL));
-}
-compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
-# endif
-#endif
+/* EGLIBC: The code that used to be here was move to a separate file
+ so that it can be shared with xregex.c. */
+#include "regexec-compat.c"
/* Entry points for GNU code. */
Added: trunk/libc/posix/xregex.c
==============================================================================
--- trunk/libc/posix/xregex.c (added)
+++ trunk/libc/posix/xregex.c Tue Dec 9 07:32:39 2008
@@ -1,0 +1,8212 @@
+/* Extended regular expression matching and search library,
+ version 0.12.
+ (Implements POSIX draft P1003.2/D11.2, except for some of the
+ internationalization features.)
+
+ Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ 2002, 2005 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301 USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
+ #pragma alloca
+#endif
+
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
+#ifndef INSIDE_RECURSION
+# ifdef HAVE_CONFIG_H
+# include <config.h>
+# endif
+#endif
+
+/*#include <ansidecl.h>*/
+
+#ifndef INSIDE_RECURSION
+
+# if defined STDC_HEADERS && !defined emacs
+# include <stddef.h>
+# else
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+# include <sys/types.h>
+# endif
+
+# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+
+/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+# if defined _LIBC || WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
+# include <wchar.h>
+# include <wctype.h>
+# endif
+
+# ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+# define btowc __btowc
+
+/* We are also using some library internals. */
+# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
+# include <langinfo.h>
+# include <locale/coll-lookup.h>
+# endif
+
+/* This is for other GNU distributions with internationalized messages. */
+# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+# undef gettext
+# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
+# endif
+# else
+# define gettext(msgid) (msgid)
+# endif
+
+# ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+# define gettext_noop(String) String
+# endif
+
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+# ifdef emacs
+
+# include "lisp.h"
+# include "buffer.h"
+# include "syntax.h"
+
+# else /* not emacs */
+
+/* If we are not linking with Emacs proper,
+ we can't use the relocating allocator
+ even if config.h says that we can. */
+# undef REL_ALLOC
+
+# if defined STDC_HEADERS || defined _LIBC
+# include <stdlib.h>
+# else
+char *malloc ();
+char *realloc ();
+# endif
+
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
+ If nothing else has been done, use the method below. */
+# ifdef INHIBIT_STRING_HEADER
+# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
+# if !defined bzero && !defined bcopy
+# undef INHIBIT_STRING_HEADER
+# endif
+# endif
+# endif
+
+/* This is the normal way of making sure we have a bcopy and a bzero.
+ This is used in most programs--a few other programs avoid this
+ by defining INHIBIT_STRING_HEADER. */
+# ifndef INHIBIT_STRING_HEADER
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+# include <string.h>
+# ifndef bzero
+# ifndef _LIBC
+# define bzero(s, n) (memset (s, '\0', n), (s))
+# else
+# define bzero(s, n) __bzero (s, n)
+# endif
+# endif
+# else
+# include <strings.h>
+# ifndef memcmp
+# define memcmp(s1, s2, n) bcmp (s1, s2, n)
+# endif
+# ifndef memcpy
+# define memcpy(d, s, n) (bcopy (s, d, n), (d))
+# endif
+# endif
+# endif
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+# ifndef Sword
+# define Sword 1
+# endif
+
+# ifdef SWITCH_ENUM_BUG
+# define SWITCH_ENUM_CAST(x) ((int)(x))
+# else
+# define SWITCH_ENUM_CAST(x) (x)
+# endif
+
+# endif /* not emacs */
+
+# if defined _LIBC || HAVE_LIMITS_H
+# include <limits.h>
+# endif
+
+# ifndef MB_LEN_MAX
+# define MB_LEN_MAX 1
+# endif
+
+/* Get the interface, including the syntax bits. */
+# include "regex.h"
+
+/* isalpha etc. are used for the character classes. */
+# include <ctype.h>
+
+/* Jim Meyering writes:
+
+ "... Some ctype macros are valid only for character codes that
+ isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+ using /bin/cc or gcc but without giving an ansi option). So, all
+ ctype uses should be through macros like ISPRINT... If
+ STDC_HEADERS is defined, then autoconf has verified that the ctype
+ macros don't need to be guarded with references to isascii. ...
+ Defining isascii to 1 should let any compiler worth its salt
+ eliminate the && through constant folding."
+ Solaris defines some of these symbols so we must undefine them first. */
+
+# undef ISASCII
+# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
+# define ISASCII(c) 1
+# else
+# define ISASCII(c) isascii(c)
+# endif
+
+# ifdef isblank
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
+# else
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+# endif
+# ifdef isgraph
+# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# else
+# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# endif
+
+# undef ISPRINT
+# define ISPRINT(c) (ISASCII (c) && isprint (c))
+# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+# define ISALNUM(c) (ISASCII (c) && isalnum (c))
+# define ISALPHA(c) (ISASCII (c) && isalpha (c))
+# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+# define ISLOWER(c) (ISASCII (c) && islower (c))
+# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+# define ISSPACE(c) (ISASCII (c) && isspace (c))
+# define ISUPPER(c) (ISASCII (c) && isupper (c))
+# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+# ifdef _tolower
+# define TOLOWER(c) _tolower(c)
+# else
+# define TOLOWER(c) tolower(c)
+# endif
+
+# ifndef NULL
+# define NULL (void *)0
+# endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+# undef SIGN_EXTEND_CHAR
+# if __STDC__
+# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+# else /* not __STDC__ */
+/* As in Harbison and Steele. */
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+# endif
+
+# ifndef emacs
+/* How many characters in the character set. */
+# define CHAR_SET_SIZE 256
+
+# ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+# else /* not SYNTAX_TABLE */
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void init_syntax_once (void);
+
+static void
+init_syntax_once (void)
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 0; c < CHAR_SET_SIZE; ++c)
+ if (ISALNUM (c))
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+# endif /* not SYNTAX_TABLE */
+
+# define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
+
+# endif /* emacs */
+
+/* Integer type for pointers. */
+# if !defined _LIBC && !defined HAVE_UINTPTR_T
+typedef unsigned long int uintptr_t;
+# endif
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+# ifdef REGEX_MALLOC
+
+# define REGEX_ALLOCATE malloc
+# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE free
+
+# else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+# ifndef alloca
+
+/* Make alloca work the best possible way. */
+# ifdef __GNUC__
+# define alloca __builtin_alloca
+# else /* not __GNUC__ */
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# endif /* HAVE_ALLOCA_H */
+# endif /* not __GNUC__ */
+
+# endif /* not alloca */
+
+# define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+# define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ memcpy (destination, source, osize))
+
+/* No need to do anything to free, after alloca. */
+# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+
+# endif /* not REGEX_MALLOC */
+
+/* Define how to allocate the failure stack. */
+
+# if defined REL_ALLOC && defined REGEX_MALLOC
+
+# define REGEX_ALLOCATE_STACK(size) \
+ r_alloc (&failure_stack_ptr, (size))
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ r_re_alloc (&failure_stack_ptr, (nsize))
+# define REGEX_FREE_STACK(ptr) \
+ r_alloc_free (&failure_stack_ptr)
+
+# else /* not using relocating allocator */
+
+# ifdef REGEX_MALLOC
+
+# define REGEX_ALLOCATE_STACK malloc
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE_STACK free
+
+# else /* not REGEX_MALLOC */
+
+# define REGEX_ALLOCATE_STACK alloca
+
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ REGEX_REALLOCATE (source, osize, nsize)
+/* No need to explicitly free anything. */
+# define REGEX_FREE_STACK(arg)
+
+# endif /* not REGEX_MALLOC */
+# endif /* not using relocating allocator */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+# define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+# define RETALLOC_IF(addr, n, t) \
+ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
+# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+# define BYTEWIDTH 8 /* In bits. */
+
+# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+# undef MAX
+# undef MIN
+# define MAX(a, b) ((a) > (b) ? (a) : (b))
+# define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+# define false 0
+# define true 1
+
+static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp);
+
+static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int pos,
+ struct re_registers *regs,
+ int stop);
+static int byte_re_search_2 (struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs, int stop);
+static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
+
+#ifdef MBS_SUPPORT
+static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp);
+
+
+static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
+ const char *cstring1, int csize1,
+ const char *cstring2, int csize2,
+ int pos,
+ struct re_registers *regs,
+ int stop,
+ wchar_t *string1, int size1,
+ wchar_t *string2, int size2,
+ int *mbs_offset1, int *mbs_offset2);
+static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs, int stop);
+static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
+#endif
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Succeed right away--no more backtracking. */
+ succeed,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn,
+
+# ifdef MBS_SUPPORT
+ /* Same as exactn, but contains binary data. */
+ exactn_bin,
+# endif
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ /* ifdef MBS_SUPPORT, following element is length of character
+ classes, length of collating symbols, length of equivalence
+ classes, length of character ranges, and length of characters.
+ Next, character class element, collating symbols elements,
+ equivalence class elements, range elements, and character
+ elements follow.
+ See regex_compile function. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+# ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+# endif /* emacs */
+} re_opcode_t;
+#endif /* not INSIDE_RECURSION */
+
+
+#ifdef BYTE
+# define CHAR_T char
+# define UCHAR_T unsigned char
+# define COMPILED_BUFFER_VAR bufp->buffer
+# define OFFSET_ADDRESS_SIZE 2
+# define PREFIX(name) byte_##name
+# define ARG_PREFIX(name) name
+# define PUT_CHAR(c) putchar (c)
+#else
+# ifdef WCHAR
+# define CHAR_T wchar_t
+# define UCHAR_T wchar_t
+# define COMPILED_BUFFER_VAR wc_buffer
+# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
+# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
+# define PREFIX(name) wcs_##name
+# define ARG_PREFIX(name) c##name
+/* Should we use wide stream?? */
+# define PUT_CHAR(c) printf ("%C", c);
+# define TRUE 1
+# define FALSE 0
+# else
+# ifdef MBS_SUPPORT
+# define WCHAR
+# define INSIDE_RECURSION
+# include "xregex.c"
+# undef INSIDE_RECURSION
+# endif
+# define BYTE
+# define INSIDE_RECURSION
+# include "xregex.c"
+# undef INSIDE_RECURSION
+# endif
+#endif
+
+#ifdef INSIDE_RECURSION
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
+
+# ifdef WCHAR
+# define STORE_NUMBER(destination, number) \
+ do { \
+ *(destination) = (UCHAR_T)(number); \
+ } while (0)
+# else /* BYTE */
+# define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+# endif /* WCHAR */
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
+
+# define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += OFFSET_ADDRESS_SIZE; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
+
+# ifdef WCHAR
+# define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source); \
+ } while (0)
+# else /* BYTE */
+# define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+# endif
+
+# ifdef DEBUG
+static void PREFIX(extract_number) (int *dest, UCHAR_T *source);
+static void
+PREFIX(extract_number) (int *dest, UCHAR_T *source)
+{
+# ifdef WCHAR
+ *dest = *source;
+# else /* BYTE */
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+# endif
+}
+
+# ifndef EXTRACT_MACROS /* To debug the macros. */
+# undef EXTRACT_NUMBER
+# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
+# endif /* not EXTRACT_MACROS */
+
+# endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+# define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += OFFSET_ADDRESS_SIZE; \
+ } while (0)
+
+# ifdef DEBUG
+static void PREFIX(extract_number_and_incr) (int *destination,
+ UCHAR_T **source);
+static void
+PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
+{
+ PREFIX(extract_number) (destination, *source);
+ *source += OFFSET_ADDRESS_SIZE;
+}
+
+# ifndef EXTRACT_MACROS
+# undef EXTRACT_NUMBER_AND_INCR
+# define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ PREFIX(extract_number_and_incr) (&dest, &src)
+# endif /* not EXTRACT_MACROS */
+
+# endif /* DEBUG */
+
+
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+# ifdef DEBUG
+
+# ifndef DEFINED_ONCE
+
+/* We use standard I/O for debugging. */
+# include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+# include <assert.h>
+
+static int debug;
+
+# define DEBUG_STATEMENT(e) e
+# define DEBUG_PRINT1(x) if (debug) printf (x)
+# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+# endif /* not DEFINED_ONCE */
+
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
+
+
+/* Print the fastmap in human-readable form. */
+
+# ifndef DEFINED_ONCE
+void
+print_fastmap (char *fastmap)
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ putchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ putchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+# endif /* not DEFINED_ONCE */
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
+{
+ int mcnt, mcnt2;
+ UCHAR_T *p1;
+ UCHAR_T *p = start;
+ UCHAR_T *pend = end;
+
+ if (start == NULL)
+ {
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+# ifdef _LIBC
+ printf ("%td:\t", p - start);
+# else
+ printf ("%ld:\t", (long int) (p - start));
+# endif
+
+ switch ((re_opcode_t) *p++)
+ {
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
+ {
+ putchar ('/');
+ PUT_CHAR (*p++);
+ }
+ while (--mcnt);
+ break;
+
+# ifdef MBS_SUPPORT
+ case exactn_bin:
+ mcnt = *p++;
+ printf ("/exactn_bin/%d", mcnt);
+ do
+ {
+ printf("/%lx", (long int) *p++);
+ }
+ while (--mcnt);
+ break;
+# endif /* MBS_SUPPORT */
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%ld", (long int) *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+# ifdef WCHAR
+ int i, length;
+ wchar_t *workp = p;
+ printf ("/charset [%s",
+ (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
+ p += 5;
+ length = *workp++; /* the length of char_classes */
+ for (i=0 ; i<length ; i++)
+ printf("[:%lx:]", (long int) *p++);
+ length = *workp++; /* the length of collating_symbol */
+ for (i=0 ; i<length ;)
+ {
+ printf("[.");
+ while(*p != 0)
+ PUT_CHAR((i++,*p++));
+ i++,p++;
+ printf(".]");
+ }
+ length = *workp++; /* the length of equivalence_class */
+ for (i=0 ; i<length ;)
+ {
+ printf("[=");
+ while(*p != 0)
+ PUT_CHAR((i++,*p++));
+ i++,p++;
+ printf("=]");
+ }
+ length = *workp++; /* the length of char_range */
+ for (i=0 ; i<length ; i++)
+ {
+ wchar_t range_start = *p++;
+ wchar_t range_end = *p++;
+ printf("%C-%C", range_start, range_end);
+ }
+ length = *workp++; /* the length of char */
+ for (i=0 ; i<length ; i++)
+ printf("%C", *p++);
+ putchar (']');
+# else
+ register int c, last = -100;
+ register int in_range = 0;
+
+ printf ("/charset [%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < 256; c++)
+ if (c / 8 < *p
+ && (p[1 + (c/8)] & (1 << (c % 8))))
+ {
+ /* Are we starting a range? */
+ if (last + 1 == c && ! in_range)
+ {
+ putchar ('-');
+ in_range = 1;
+ }
+ /* Have we broken a range? */
+ else if (last + 1 != c && in_range)
+ {
+ putchar (last);
+ in_range = 0;
+ }
+
+ if (! in_range)
+ putchar (c);
+
+ last = c;
+ }
+
+ if (in_range)
+ putchar (last);
+
+ putchar (']');
+
+ p += 1 + *p;
+# endif /* WCHAR */
+ }
+ break;
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+# ifdef _LIBC
+ printf ("/on_failure_jump to %td", p + mcnt - start);
+# else
+ printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
+# endif
+ break;
+
+ case on_failure_keep_string_jump:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+# ifdef _LIBC
+ printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
+# else
+ printf ("/on_failure_keep_string_jump to %ld",
+ (long int) (p + mcnt - start));
+# endif
+ break;
+
+ case dummy_failure_jump:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+# ifdef _LIBC
+ printf ("/dummy_failure_jump to %td", p + mcnt - start);
+# else
+ printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
+# endif
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+# ifdef _LIBC
+ printf ("/maybe_pop_jump to %td", p + mcnt - start);
+# else
+ printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
+# endif
+ break;
+
+ case pop_failure_jump:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+# ifdef _LIBC
+ printf ("/pop_failure_jump to %td", p + mcnt - start);
+# else
+ printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
+# endif
+ break;
+
+ case jump_past_alt:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+# ifdef _LIBC
+ printf ("/jump_past_alt to %td", p + mcnt - start);
+# else
+ printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
+# endif
+ break;
+
+ case jump:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+# ifdef _LIBC
+ printf ("/jump to %td", p + mcnt - start);
+# else
+ printf ("/jump to %ld", (long int) (p + mcnt - start));
+# endif
+ break;
+
+ case succeed_n:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+ p1 = p + mcnt;
+ PREFIX(extract_number_and_incr) (&mcnt2, &p);
+# ifdef _LIBC
+ printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
+# else
+ printf ("/succeed_n to %ld, %d times",
+ (long int) (p1 - start), mcnt2);
+# endif
+ break;
+
+ case jump_n:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+ p1 = p + mcnt;
+ PREFIX(extract_number_and_incr) (&mcnt2, &p);
+ printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
+ break;
+
+ case set_number_at:
+ PREFIX(extract_number_and_incr) (&mcnt, &p);
+ p1 = p + mcnt;
+ PREFIX(extract_number_and_incr) (&mcnt2, &p);
+# ifdef _LIBC
+ printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
+# else
+ printf ("/set_number_at location %ld to %d",
+ (long int) (p1 - start), mcnt2);
+# endif
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+ break;
+
+# ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+# endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%ld", (long int) *(p-1));
+ }
+
+ putchar ('\n');
+ }
+
+# ifdef _LIBC
+ printf ("%td:\tend of pattern.\n", p - start);
+# else
+ printf ("%ld:\tend of pattern.\n", (long int) (p - start));
+# endif
+}
+
+
+void
+PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
+{
+ UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
+
+ PREFIX(print_partial_compiled_pattern) (buffer, buffer
+ + bufp->used / sizeof(UCHAR_T));
+ printf ("%ld bytes used/%ld bytes allocated.\n",
+ bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+# ifdef _LIBC
+ printf ("re_nsub: %Zd\t", bufp->re_nsub);
+# else
+ printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
+# endif
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %lx\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void
+PREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
+ int size1, const CHAR_T *string2, int size2)
+{
+ int this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
+ {
+ int cnt;
+
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ PUT_CHAR (string1[this_char]);
+
+ where = string2;
+ }
+
+ cnt = 0;
+ for (this_char = where - string2; this_char < size2; this_char++)
+ {
+ PUT_CHAR (string2[this_char]);
+ if (++cnt > 100)
+ {
+ fputs ("...", stdout);
+ break;
+ }
+ }
+ }
+}
+
+# ifndef DEFINED_ONCE
+void
+printchar (int c)
+{
+ putc (c, stderr);
+}
+# endif
+
+# else /* not DEBUG */
+
+# ifndef DEFINED_ONCE
+# undef assert
+# define assert(e)
+
+# define DEBUG_STATEMENT(e)
+# define DEBUG_PRINT1(x)
+# define DEBUG_PRINT2(x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4)
+# endif /* not DEFINED_ONCE */
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+# endif /* not DEBUG */
+
+
+
+# ifdef WCHAR
+/* This convert a multibyte string to a wide character string.
+ And write their correspondances to offset_buffer(see below)
+ and write whether each wchar_t is binary data to is_binary.
+ This assume invalid multibyte sequences as binary data.
+ We assume offset_buffer and is_binary is already allocated
+ enough space. */
+
+static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
+ size_t len, int *offset_buffer,
+ char *is_binary);
+static size_t
+convert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
+ int *offset_buffer, char *is_binary)
+ /* It hold correspondances between src(char string) and
+ dest(wchar_t string) for optimization.
+ e.g. src = "xxxyzz"
+ dest = {'X', 'Y', 'Z'}
+ (each "xxx", "y" and "zz" represent one multibyte character
+ corresponding to 'X', 'Y' and 'Z'.)
+ offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
+ = {0, 3, 4, 6}
+ */
+{
+ wchar_t *pdest = dest;
+ const unsigned char *psrc = src;
+ size_t wc_count = 0;
+
+ mbstate_t mbs;
+ int i, consumed;
+ size_t mb_remain = len;
+ size_t mb_count = 0;
+
+ /* Initialize the conversion state. */
+ memset (&mbs, 0, sizeof (mbstate_t));
+
+ offset_buffer[0] = 0;
+ for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
+ psrc += consumed)
+ {
+#ifdef _LIBC
+ consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
+#else
+ consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
+#endif
+
+ if (consumed <= 0)
+ /* failed to convert. maybe src contains binary data.
+ So we consume 1 byte manualy. */
+ {
+ *pdest = *psrc;
+ consumed = 1;
+ is_binary[wc_count] = TRUE;
+ }
+ else
+ is_binary[wc_count] = FALSE;
+ /* In sjis encoding, we use yen sign as escape character in
+ place of reverse solidus. So we convert 0x5c(yen sign in
+ sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
+ solidus in UCS2). */
+ if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
+ *pdest = (wchar_t) *psrc;
+
+ offset_buffer[wc_count + 1] = mb_count += consumed;
+ }
+
+ /* Fill remain of the buffer with sentinel. */
+ for (i = wc_count + 1 ; i <= len ; i++)
+ offset_buffer[i] = mb_count + 1;
+
+ return wc_count;
+}
+
+# endif /* WCHAR */
+
+#else /* not INSIDE_RECURSION */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (reg_syntax_t syntax)
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+# ifdef DEBUG
+ if (syntax & RE_DEBUG)
+ debug = 1;
+ else if (debug) /* was on but now is not */
+ debug = 0;
+# endif /* DEBUG */
+ return ret;
+}
+# ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+# endif
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+static const char *re_error_msgid[] =
+ {
+ gettext_noop ("Success"), /* REG_NOERROR */
+ gettext_noop ("No match"), /* REG_NOMATCH */
+ gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
+ gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
+ gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
+ gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
+ gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
+ gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
+ gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
+ gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
+ gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
+ gettext_noop ("Invalid range end"), /* REG_ERANGE */
+ gettext_noop ("Memory exhausted"), /* REG_ESPACE */
+ gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
+ gettext_noop ("Premature end of regular expression"), /* REG_EEND */
+ gettext_noop ("Regular expression too big"), /* REG_ESIZE */
+ gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+ };
+
+#endif /* INSIDE_RECURSION */
+
+#ifndef DEFINED_ONCE
+/* Avoiding alloca during matching, to placate r_alloc. */
+
+/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
+ searching and matching functions should not call alloca. On some
+ systems, alloca is implemented in terms of malloc, and if we're
+ using the relocating allocator routines, then malloc could cause a
+ relocation, which might (if the strings being searched are in the
+ ralloc heap) shift the data out from underneath the regexp
+ routines.
+
+ Here's another reason to avoid allocation: Emacs
+ processes input from X in a signal handler; processing X input may
+ call malloc; if input arrives while a matching routine is calling
+ malloc, then we're scrod. But Emacs can't just block input while
+ calling matching routines; then we don't notice interrupts when
+ they come in. So, Emacs blocks input around all regexp calls
+ except the matching calls, which it leaves unprotected, in the
+ faith that they will not malloc. */
+
+/* Normally, this is fine. */
+# define MATCH_MAY_ALLOCATE
+
+/* When using GNU C, we are not REALLY using the C alloca, no matter
+ what config.h may say. So don't take precautions for it. */
+# ifdef __GNUC__
+# undef C_ALLOCA
+# endif
+
+/* The match routines may not allocate if (1) they would do it with malloc
+ and (2) it's not safe for them to use malloc.
+ Note that if REL_ALLOC is defined, matching would not use malloc for the
+ failure stack, but we would still use it for the register vectors;
+ so REL_ALLOC should not affect this. */
+# if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
+# undef MATCH_MAY_ALLOCATE
+# endif
+#endif /* not DEFINED_ONCE */
+
+#ifdef INSIDE_RECURSION
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE_STACK. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+# ifndef INIT_FAILURE_ALLOC
+# define INIT_FAILURE_ALLOC 5
+# endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+
+
+# ifndef DEFINED_ONCE
+
+# ifdef INT_IS_16BIT
+# define RE_M_F_TYPE long int
+# else
+# define RE_M_F_TYPE int
+# endif /* INT_IS_16BIT */
+
+# ifdef MATCH_MAY_ALLOCATE
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+# define RE_M_F_DEFAULT 4000
+# else
+# define RE_M_F_DEFAULT 2000
+# endif /* MATCH_MAY_ALLOCATE */
+
+# include <shlib-compat.h>
+
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
+RE_M_F_TYPE re_max_failures = RE_M_F_DEFAULT;
+# else
+RE_M_F_TYPE re_max_failures attribute_hidden = RE_M_F_DEFAULT;
+# endif /* SHLIB_COMPAT */
+
+# undef RE_M_F_TYPE
+# undef RE_M_F_DEFAULT
+
+# endif /* DEFINED_ONCE */
+
+# ifdef INT_IS_16BIT
+
+union PREFIX(fail_stack_elt)
+{
+ UCHAR_T *pointer;
+ long int integer;
+};
+
+typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
+
+typedef struct
+{
+ PREFIX(fail_stack_elt_t) *stack;
+ unsigned long int size;
+ unsigned long int avail; /* Offset of next open position. */
+} PREFIX(fail_stack_type);
+
+# else /* not INT_IS_16BIT */
+
+union PREFIX(fail_stack_elt)
+{
+ UCHAR_T *pointer;
+ int integer;
+};
+
+typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
+
+typedef struct
+{
+ PREFIX(fail_stack_elt_t) *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} PREFIX(fail_stack_type);
+
+# endif /* INT_IS_16BIT */
+
+# ifndef DEFINED_ONCE
+# define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+# define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+# define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+# endif
+
+
+/* Define macros to initialize and free the failure stack.
+ Do `return -2' if the alloc fails. */
+
+# ifdef MATCH_MAY_ALLOCATE
+# define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
+# else
+# define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.avail = 0; \
+ } while (0)
+
+# define RESET_FAIL_STACK()
+# endif
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE_STACK requires `destination' be declared. */
+
+# define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
+ ? 0 \
+ : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \
+ REGEX_REALLOCATE_STACK ((fail_stack).stack, \
+ (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \
+ ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push pointer POINTER on FAIL_STACK.
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+# define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
+ ? 0 \
+ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
+ 1))
+
+/* Push a pointer value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+# define PUSH_FAILURE_POINTER(item) \
+ fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
+
+/* This pushes an integer-valued item onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+# define PUSH_FAILURE_INT(item) \
+ fail_stack.stack[fail_stack.avail++].integer = (item)
+
+/* Push a fail_stack_elt_t value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+# define PUSH_FAILURE_ELT(item) \
+ fail_stack.stack[fail_stack.avail++] = (item)
+
+/* These three POP... operations complement the three PUSH... operations.
+ All assume that `fail_stack' is nonempty. */
+# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
+# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
+# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+# ifdef DEBUG
+# define DEBUG_PUSH PUSH_FAILURE_INT
+# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
+# else
+# define DEBUG_PUSH(item)
+# define DEBUG_POP(item_addr)
+# endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
+ be declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ /* Can't be int, since there is not a shred of a guarantee that int \
+ is wide enough to hold a value of something to which pointer can \
+ be assigned */ \
+ active_reg_t this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ if (1) \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: %p\n ", \
+ reg_info[this_reg].word.pointer); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
+ PUSH_FAILURE_INT (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
+ PUSH_FAILURE_INT (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_POINTER (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_POINTER (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+# ifndef DEFINED_ONCE
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+# define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+# ifdef DEBUG
+# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+# else
+# define NUM_NONREG_ITEMS 4
+# endif
+
+/* We push at most this many items on the stack. */
+/* We used to use (num_regs - 1), which is the number of registers
+ this regexp will save; but that was changed to 5
+ to avoid stack overflow for a regexp with lots of parens. */
+# define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+# define NUM_FAILURE_ITEMS \
+ (((0 \
+ ? 0 : highest_active_reg - lowest_active_reg + 1) \
+ * NUM_REG_ITEMS) \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+# define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+# endif /* not DEFINED_ONCE */
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (unsigned failure_id;) \
+ active_reg_t this_reg; \
+ const UCHAR_T *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_POINTER (); \
+ if (string_temp != NULL) \
+ str = (const CHAR_T *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string %p: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (UCHAR_T *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
+ \
+ low_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
+ \
+ if (1) \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ELT (); \
+ DEBUG_PRINT2 (" info: %p\n", \
+ reg_info[this_reg].word.pointer); \
+ \
+ regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
+ } \
+ else \
+ { \
+ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
+ { \
+ reg_info[this_reg].word.integer = 0; \
+ regend[this_reg] = 0; \
+ regstart[this_reg] = 0; \
+ } \
+ highest_active_reg = high_reg; \
+ } \
+ \
+ set_regs_matched_done = 0; \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+
+/* Structure for per-register (a.k.a. per-group) information.
+ Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+
+
+/* Declarations and macros for re_match_2. */
+
+typedef union
+{
+ PREFIX(fail_stack_elt_t) word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+# define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} PREFIX(register_info_type);
+
+# ifndef DEFINED_ONCE
+# define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+# define IS_ACTIVE(R) ((R).bits.is_active)
+# define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+# define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+# define SET_REGS_MATCHED() \
+ do \
+ { \
+ if (!set_regs_matched_done) \
+ { \
+ active_reg_t r; \
+ set_regs_matched_done = 1; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ } \
+ while (0)
+# endif /* not DEFINED_ONCE */
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+static CHAR_T PREFIX(reg_unset_dummy);
+# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
+# define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+/* Subroutine declarations and macros for regex_compile. */
+static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
+static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
+ int arg1, int arg2);
+static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
+ int arg, UCHAR_T *end);
+static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
+ int arg1, int arg2, UCHAR_T *end);
+static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
+ const CHAR_T *p,
+ reg_syntax_t syntax);
+static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
+ const CHAR_T *pend,
+ reg_syntax_t syntax);
+# ifdef WCHAR
+static reg_errcode_t wcs_compile_range (CHAR_T range_start,
+ const CHAR_T **p_ptr,
+ const CHAR_T *pend,
+ char *translate,
+ reg_syntax_t syntax,
+ UCHAR_T *b,
+ CHAR_T *char_set);
+static void insert_space (int num, CHAR_T *loc, CHAR_T *end);
+# else /* BYTE */
+static reg_errcode_t byte_compile_range (unsigned int range_start,
+ const char **p_ptr,
+ const char *pend,
+ RE_TRANSLATE_TYPE translate,
+ reg_syntax_t syntax,
+ unsigned char *b);
+# endif /* WCHAR */
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
+ because it is impossible to allocate 4GB array for some encodings
+ which have 4 byte character_set like UCS4. */
+# ifndef PATFETCH
+# ifdef WCHAR
+# define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (UCHAR_T) *p++; \
+ if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \
+ } while (0)
+# else /* BYTE */
+# define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = (unsigned char) translate[c]; \
+ } while (0)
+# endif /* WCHAR */
+# endif
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+# define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (UCHAR_T) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+# define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
+ because it is impossible to allocate 4GB array for some encodings
+ which have 4 byte character_set like UCS4. */
+
+# ifndef TRANSLATE
+# ifdef WCHAR
+# define TRANSLATE(d) \
+ ((translate && ((UCHAR_T) (d)) <= 0xff) \
+ ? (char) translate[(unsigned char) (d)] : (d))
+# else /* BYTE */
+# define TRANSLATE(d) \
+ (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
+# endif /* WCHAR */
+# endif
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+# define INIT_BUF_SIZE (32 * sizeof(UCHAR_T))
+
+/* Make sure we have at least N more bytes of space in buffer. */
+# ifdef WCHAR
+# define GET_BUFFER_SPACE(n) \
+ while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \
+ + (n)*sizeof(CHAR_T)) > bufp->allocated) \
+ EXTEND_BUFFER ()
+# else /* BYTE */
+# define GET_BUFFER_SPACE(n) \
+ while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
+ EXTEND_BUFFER ()
+# endif /* WCHAR */
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+# define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (UCHAR_T) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+# define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (UCHAR_T) (c1); \
+ *b++ = (UCHAR_T) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+# define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (UCHAR_T) (c1); \
+ *b++ = (UCHAR_T) (c2); \
+ *b++ = (UCHAR_T) (c3); \
+ } while (0)
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+# define STORE_JUMP(op, loc, to) \
+ PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
+
+/* Likewise, for a two-argument jump. */
+# define STORE_JUMP2(op, loc, to, arg) \
+ PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+# define INSERT_JUMP(op, loc, to) \
+ PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+# define INSERT_JUMP2(op, loc, to, arg) \
+ PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
+ arg, b)
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+/* Any other compiler which, like MSC, has allocation limit below 2^16
+ bytes will have to use approach similar to what was done below for
+ MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
+ reallocating to 0 bytes. Such thing is not going to work too well.
+ You have been warned!! */
+# ifndef DEFINED_ONCE
+# if defined _MSC_VER && !defined WIN32
+/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
+ The REALLOC define eliminates a flurry of conversion warnings,
+ but is not required. */
+# define MAX_BUF_SIZE 65500L
+# define REALLOC(p,s) realloc ((p), (size_t) (s))
+# else
+# define MAX_BUF_SIZE (1L << 16)
+# define REALLOC(p,s) realloc ((p), (s))
+# endif
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+# if __BOUNDED_POINTERS__
+# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
+# define MOVE_BUFFER_POINTER(P) \
+ (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
+# define ELSE_EXTEND_BUFFER_HIGH_BOUND \
+ else \
+ { \
+ SET_HIGH_BOUND (b); \
+ SET_HIGH_BOUND (begalt); \
+ if (fixup_alt_jump) \
+ SET_HIGH_BOUND (fixup_alt_jump); \
+ if (laststart) \
+ SET_HIGH_BOUND (laststart); \
+ if (pending_exact) \
+ SET_HIGH_BOUND (pending_exact); \
+ }
+# else
+# define MOVE_BUFFER_POINTER(P) (P) += incr
+# define ELSE_EXTEND_BUFFER_HIGH_BOUND
+# endif
+# endif /* not DEFINED_ONCE */
+
+# ifdef WCHAR
+# define EXTEND_BUFFER() \
+ do { \
+ UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
+ int wchar_count; \
+ if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ /* How many characters the new buffer can have? */ \
+ wchar_count = bufp->allocated / sizeof(UCHAR_T); \
+ if (wchar_count == 0) wchar_count = 1; \
+ /* Truncate the buffer to CHAR_T align. */ \
+ bufp->allocated = wchar_count * sizeof(UCHAR_T); \
+ RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \
+ bufp->buffer = (char*)COMPILED_BUFFER_VAR; \
+ if (COMPILED_BUFFER_VAR == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != COMPILED_BUFFER_VAR) \
+ { \
+ int incr = COMPILED_BUFFER_VAR - old_buffer; \
+ MOVE_BUFFER_POINTER (b); \
+ MOVE_BUFFER_POINTER (begalt); \
+ if (fixup_alt_jump) \
+ MOVE_BUFFER_POINTER (fixup_alt_jump); \
+ if (laststart) \
+ MOVE_BUFFER_POINTER (laststart); \
+ if (pending_exact) \
+ MOVE_BUFFER_POINTER (pending_exact); \
+ } \
+ ELSE_EXTEND_BUFFER_HIGH_BOUND \
+ } while (0)
+# else /* BYTE */
+# define EXTEND_BUFFER() \
+ do { \
+ UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \
+ bufp->allocated); \
+ if (COMPILED_BUFFER_VAR == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != COMPILED_BUFFER_VAR) \
+ { \
+ int incr = COMPILED_BUFFER_VAR - old_buffer; \
+ MOVE_BUFFER_POINTER (b); \
+ MOVE_BUFFER_POINTER (begalt); \
+ if (fixup_alt_jump) \
+ MOVE_BUFFER_POINTER (fixup_alt_jump); \
+ if (laststart) \
+ MOVE_BUFFER_POINTER (laststart); \
+ if (pending_exact) \
+ MOVE_BUFFER_POINTER (pending_exact); \
+ } \
+ ELSE_EXTEND_BUFFER_HIGH_BOUND \
+ } while (0)
+# endif /* WCHAR */
+
+# ifndef DEFINED_ONCE
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+# define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+/* int may be not enough when sizeof(int) == 2. */
+typedef long pattern_offset_t;
+
+typedef struct
+{
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+# define INIT_COMPILE_STACK_SIZE 32
+
+# define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+# define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+# define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+# endif /* not DEFINED_ONCE */
+
+/* Set the bit for character C in a list. */
+# ifndef DEFINED_ONCE
+# define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+# endif /* DEFINED_ONCE */
+
+/* Get the next unsigned number in the uncompiled pattern. */
+# define GET_UNSIGNED_NUMBER(num) \
+ { \
+ while (p != pend) \
+ { \
+ PATFETCH (c); \
+ if (c < '0' || c > '9') \
+ break; \
+ if (num <= RE_DUP_MAX) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ } \
+ } \
+ }
+
+# ifndef DEFINED_ONCE
+# if defined _LIBC || WIDE_CHAR_SUPPORT
+/* The GNU C library provides support for user-defined character classes
+ and the functions from ISO C amendement 1. */
+# ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+ problem. Use a reasonable default value. */
+# define CHAR_CLASS_MAX_LENGTH 256
+# endif
+
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
+# else
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+# define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+# endif
+# endif /* DEFINED_ONCE */
+
+# ifndef MATCH_MAY_ALLOCATE
+
+/* If we cannot allocate large objects within re_match_2_internal,
+ we make the fail stack and register vectors global.
+ The fail stack, we grow to the maximum size when a regexp
+ is compiled.
+ The register vectors, we adjust in size each time we
+ compile a regexp, according to the number of registers it needs. */
+
+static PREFIX(fail_stack_type) fail_stack;
+
+/* Size with which the following vectors are currently allocated.
+ That is so we can make them bigger as needed,
+ but never make them smaller. */
+# ifdef DEFINED_ONCE
+static int regs_allocated_size;
+
+static const char ** regstart, ** regend;
+static const char ** old_regstart, ** old_regend;
+static const char **best_regstart, **best_regend;
+static const char **reg_dummy;
+# endif /* DEFINED_ONCE */
+
+static PREFIX(register_info_type) *PREFIX(reg_info);
+static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
+
+/* Make the register vectors big enough for NUM_REGS registers,
+ but don't make them smaller. */
+
+static void
+PREFIX(regex_grow_registers) (int num_regs)
+{
+ if (num_regs > regs_allocated_size)
+ {
+ RETALLOC_IF (regstart, num_regs, const char *);
+ RETALLOC_IF (regend, num_regs, const char *);
+ RETALLOC_IF (old_regstart, num_regs, const char *);
+ RETALLOC_IF (old_regend, num_regs, const char *);
+ RETALLOC_IF (best_regstart, num_regs, const char *);
+ RETALLOC_IF (best_regend, num_regs, const char *);
+ RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
+ RETALLOC_IF (reg_dummy, num_regs, const char *);
+ RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
+
+ regs_allocated_size = num_regs;
+ }
+}
+
+# endif /* not MATCH_MAY_ALLOCATE */
+
+# ifndef DEFINED_ONCE
+static boolean group_in_compile_stack (compile_stack_type compile_stack,
+ regnum_t regnum);
+# endif /* not DEFINED_ONCE */
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+/* Return, freeing storage we allocated. */
+# ifdef WCHAR
+# define FREE_STACK_RETURN(value) \
+ return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
+# else
+# define FREE_STACK_RETURN(value) \
+ return (free (compile_stack.stack), value)
+# endif /* WCHAR */
+
+static reg_errcode_t
+PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
+ size_t ARG_PREFIX(size), reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp)
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register UCHAR_T c, c1;
+
+#ifdef WCHAR
+ /* A temporary space to keep wchar_t pattern and compiled pattern. */
+ CHAR_T *pattern, *COMPILED_BUFFER_VAR;
+ size_t size;
+ /* offset buffer for optimization. See convert_mbs_to_wc. */
+ int *mbs_offset = NULL;
+ /* It hold whether each wchar_t is binary data or not. */
+ char *is_binary = NULL;
+ /* A flag whether exactn is handling binary data or not. */
+ char is_exactn_bin = FALSE;
+#endif /* WCHAR */
+
+ /* A random temporary spot in PATTERN. */
+ const CHAR_T *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register UCHAR_T *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+#ifdef WCHAR
+ const CHAR_T *p;
+ const CHAR_T *pend;
+#else /* BYTE */
+ const CHAR_T *p = pattern;
+ const CHAR_T *pend = pattern + size;
+#endif /* WCHAR */
+
+ /* How to translate the characters in the pattern. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ UCHAR_T *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ UCHAR_T *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ UCHAR_T *begalt;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ UCHAR_T *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef WCHAR
+ /* Initialize the wchar_t PATTERN and offset_buffer. */
+ p = pend = pattern = TALLOC(csize + 1, CHAR_T);
+ mbs_offset = TALLOC(csize + 1, int);
+ is_binary = TALLOC(csize + 1, char);
+ if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
+ {
+ free(pattern);
+ free(mbs_offset);
+ free(is_binary);
+ return REG_ESPACE;
+ }
+ pattern[csize] = L'\0'; /* sentinel */
+ size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
+ pend = p + size;
+ if (size < 0)
+ {
+ free(pattern);
+ free(mbs_offset);
+ free(is_binary);
+ return REG_BADPAT;
+ }
+#endif
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
+ {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ PUT_CHAR (pattern[debug_count]);
+ putchar ('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ {
+#ifdef WCHAR
+ free(pattern);
+ free(mbs_offset);
+ free(is_binary);
+#endif
+ return REG_ESPACE;
+ }
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined emacs && !defined SYNTAX_TABLE
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+#ifdef WCHAR
+ /* Free bufp->buffer and allocate an array for wchar_t pattern
+ buffer. */
+ free(bufp->buffer);
+ COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
+ UCHAR_T);
+#else
+ RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
+#endif /* WCHAR */
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
+ UCHAR_T);
+ }
+
+ if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
+#ifdef WCHAR
+ bufp->buffer = (char*)COMPILED_BUFFER_VAR;
+#endif /* WCHAR */
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+#ifdef WCHAR
+ else
+ COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
+#endif
+
+ begalt = b = COMPILED_BUFFER_VAR;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || PREFIX(at_begline_loc_p) (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || PREFIX(at_endline_loc_p) (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && zero_times_ok
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart -
+ (1 + OFFSET_ADDRESS_SIZE));
+
+ /* We've added more stuff to the buffer. */
+ b += 1 + OFFSET_ADDRESS_SIZE;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
+ 'b + 3'. */
+ GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 1 + OFFSET_ADDRESS_SIZE);
+ pending_exact = 0;
+ b += 1 + OFFSET_ADDRESS_SIZE;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart +
+ 2 + 2 * OFFSET_ADDRESS_SIZE);
+ b += 1 + OFFSET_ADDRESS_SIZE;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+#ifdef WCHAR
+ CHAR_T range_start = 0xffffffff;
+#else
+ unsigned int range_start = 0xffffffff;
+#endif
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+#ifdef WCHAR
+ /* We assume a charset(_not) structure as a wchar_t array.
+ charset[0] = (re_opcode_t) charset(_not)
+ charset[1] = l (= length of char_classes)
+ charset[2] = m (= length of collating_symbols)
+ charset[3] = n (= length of equivalence_classes)
+ charset[4] = o (= length of char_ranges)
+ charset[5] = p (= length of chars)
+
+ charset[6] = char_class (wctype_t)
+ charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
+ ...
+ charset[l+5] = char_class (wctype_t)
+
[... 5551 lines stripped ...]