From 14aaf5e8c66b8c6811af669fb3d3fcb724473d69 Mon Sep 17 00:00:00 2001 From: ph10 Date: Wed, 14 Nov 2018 16:59:19 +0000 Subject: [PATCH] Unconditionally use inttypes.h instead of trying for stdint.h (simplification) and remove the now unnecessary inclusion in pcre2_internal.h. --- CMakeLists.txt | 15 +---- ChangeLog | 9 +++ Makefile.am | 2 +- NON-AUTOTOOLS-BUILD | 15 +++-- configure.ac | 5 -- doc/html/NON-AUTOTOOLS-BUILD.txt | 15 +++-- src/config.h.generic | 6 +- src/pcre2.h.generic | 104 ++++++++++++++++--------------- src/pcre2.h.in | 19 ++---- src/pcre2_internal.h | 11 +--- 10 files changed, 94 insertions(+), 107 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb61221..e653e9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,8 +80,9 @@ # 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30 # 2017-04-08 PH added HEAP_LIMIT # 2017-06-15 ZH added SUPPORT_JIT_SEALLOC support -# 2018-06-19 PH added checks for stdint.h and inttypes.h +# 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed) # 2018-06-27 PH added Daniel's patch to increase the stack for MSVC +# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h PROJECT(PCRE2 C) @@ -115,18 +116,6 @@ CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H) CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H) CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H) -IF(HAVE_INTTYPES_H) - SET(PCRE2_HAVE_INTTYPES_H 1) -ELSE(HAVE_INTTYPES_H) - SET(PCRE2_HAVE_INTTYPES_H 0) -ENDIF(HAVE_INTTYPES_H) - -IF(HAVE_STDINT_H) - SET(PCRE2_HAVE_STDINT_H 1) -ELSE(HAVE_STDINT_H) - SET(PCRE2_HAVE_STDINT_H 0) -ENDIF(HAVE_STDINT_H) - CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY) CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE) CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR) diff --git a/ChangeLog b/ChangeLog index bfa6715..843e108 100644 --- a/ChangeLog +++ b/ChangeLog @@ -59,6 +59,15 @@ units clear. 15. Updated the VMS-specific code in pcre2test on the advice of a VMS user. +16. Removed the unnecessary inclusion of stdint.h (or inttypes.h) from +pcre2_internal.h as it is now included by pcre2.h. Also, change 17 for 10.32 +below was unnecessarily complicated, as inttypes.h is a Standard C header, +which is defined to be a superset of stdint.h. Instead of conditionally +including stdint.h or inttypes.h, pcre2.h now unconditionally includes +inttypes.h. This supports environments that do not have stdint.h but do have +inttypes.h, which are known to exist. A note in the autotools documentation +says (November 2018) that there are none known that are the other way round. + Version 10.32 10-September-2018 ------------------------------- diff --git a/Makefile.am b/Makefile.am index a591c52..17facba 100644 --- a/Makefile.am +++ b/Makefile.am @@ -233,7 +233,7 @@ noinst_PROGRAMS = # and 'make maintainer-clean'. CLEANFILES = -DISTCLEANFILES = src/config.h.in~ config.h pcre2.h.generic +DISTCLEANFILES = src/config.h.in~ MAINTAINERCLEANFILES = # Additional files to bundle with the distribution, over and above what diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD index c6f674f..118bc2b 100644 --- a/NON-AUTOTOOLS-BUILD +++ b/NON-AUTOTOOLS-BUILD @@ -47,8 +47,8 @@ can skip ahead to the CMake section. environment. In particular, you can alter the definition of the NEWLINE macro to specify what character(s) you want to be interpreted as line terminators by default. - - When you compile any of the PCRE2 modules, you must specify + + When you subsequently compile any of the PCRE2 modules, you must specify -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the sources. @@ -61,6 +61,11 @@ can skip ahead to the CMake section. configure/make world, this is handled automatically.) When upgrading to a new release, you are strongly advised to review src/config.h.generic before re-using what you had previously. + + Note also that the src/config.h.generic file is created from a config.h + that was generated by Autotools, which automatically includes settings of + a number of macros that are not actually used by PCRE2 (for example, + HAVE_MEMORY_H). (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h. @@ -396,6 +401,6 @@ Everything in that location, source and executable, is in EBCDIC and native z/OS file formats. The port provides an API for LE languages such as COBOL and for the z/OS and z/VM versions of the Rexx languages. -=========================== -Last Updated: 19 April 2018 -=========================== +============================== +Last Updated: 14 November 2018 +============================== diff --git a/configure.ac b/configure.ac index 361b1a1..f912661 100644 --- a/configure.ac +++ b/configure.ac @@ -451,11 +451,6 @@ AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h) AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1]) AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1]) -AC_CHECK_HEADERS([stdint.h], [PCRE2_HAVE_STDINT_H=1], [PCRE2_HAVE_STDINT_H=0]) -AC_CHECK_HEADERS([inttypes.h], [PCRE2_HAVE_INTTYPES_H=1], [PCRE2_HAVE_INTTYPES_H=0]) -AC_SUBST([PCRE2_HAVE_STDINT_H]) -AC_SUBST([PCRE2_HAVE_INTTYPES_H]) - # Conditional compilation AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes") AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes") diff --git a/doc/html/NON-AUTOTOOLS-BUILD.txt b/doc/html/NON-AUTOTOOLS-BUILD.txt index c6f674f..118bc2b 100644 --- a/doc/html/NON-AUTOTOOLS-BUILD.txt +++ b/doc/html/NON-AUTOTOOLS-BUILD.txt @@ -47,8 +47,8 @@ can skip ahead to the CMake section. environment. In particular, you can alter the definition of the NEWLINE macro to specify what character(s) you want to be interpreted as line terminators by default. - - When you compile any of the PCRE2 modules, you must specify + + When you subsequently compile any of the PCRE2 modules, you must specify -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the sources. @@ -61,6 +61,11 @@ can skip ahead to the CMake section. configure/make world, this is handled automatically.) When upgrading to a new release, you are strongly advised to review src/config.h.generic before re-using what you had previously. + + Note also that the src/config.h.generic file is created from a config.h + that was generated by Autotools, which automatically includes settings of + a number of macros that are not actually used by PCRE2 (for example, + HAVE_MEMORY_H). (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h. @@ -396,6 +401,6 @@ Everything in that location, source and executable, is in EBCDIC and native z/OS file formats. The port provides an API for LE languages such as COBOL and for the z/OS and z/VM versions of the Rexx languages. -=========================== -Last Updated: 19 April 2018 -=========================== +============================== +Last Updated: 14 November 2018 +============================== diff --git a/src/config.h.generic b/src/config.h.generic index 89a52ef..5ef53d3 100644 --- a/src/config.h.generic +++ b/src/config.h.generic @@ -214,7 +214,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.32" +#define PACKAGE_STRING "PCRE2 10.33-RC1" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -223,7 +223,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.32" +#define PACKAGE_VERSION "10.33-RC1" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -343,7 +343,7 @@ sure both macros are undefined; an emulation function will then be used. */ #endif /* Version number of package */ -#define VERSION "10.32" +#define VERSION "10.33-RC1" /* Define to 1 if on MINIX. */ /* #undef _MINIX */ diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic index f6ed686..7e01fbd 100644 --- a/src/pcre2.h.generic +++ b/src/pcre2.h.generic @@ -42,15 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 32 -#define PCRE2_PRERELEASE -#define PCRE2_DATE 2018-09-10 - -/* For the benefit of systems without stdint.h, an alternative is to use -inttypes.h. The existence of these headers is checked by configure or CMake. */ - -#define PCRE2_HAVE_STDINT_H 1 -#define PCRE2_HAVE_INTTYPES_H 1 +#define PCRE2_MINOR 33 +#define PCRE2_PRERELEASE -RC1 +#define PCRE2_DATE 2018-09-14 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate @@ -87,18 +81,15 @@ set, we ensure here that it has no effect. */ #define PCRE2_CALL_CONVENTION #endif -/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure -that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither -header, the relevant values must be provided by some other means. */ +/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and +uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do +not have stdint.h, which is why we use inttypes.h, which according to the C +standard is a superset of stdint.h. If none of these headers are available, +the relevant values must be provided by some other means. */ #include #include - -#if PCRE2_HAVE_STDINT_H -#include -#elif PCRE2_HAVE_INTTYPES_H #include -#endif /* Allow for C++ users compiling this directly. */ @@ -158,6 +149,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */ #define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */ #define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */ +#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ /* These are for pcre2_jit_compile(). */ @@ -166,36 +158,27 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_JIT_PARTIAL_HARD 0x00000004u #define PCRE2_JIT_INVALID_UTF 0x00000100u -/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note -that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these -functions (though pcre2_jit_match() ignores the latter since it bypasses all -sanity checks). */ +/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and +pcre2_substitute(). Some are allowed only for one of the functions, and in +these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and +PCRE2_NO_UTF_CHECK can also be passed to these functions (though +pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */ -#define PCRE2_NOTBOL 0x00000001u -#define PCRE2_NOTEOL 0x00000002u -#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */ -#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */ -#define PCRE2_PARTIAL_SOFT 0x00000010u -#define PCRE2_PARTIAL_HARD 0x00000020u - -/* These are additional options for pcre2_dfa_match(). */ - -#define PCRE2_DFA_RESTART 0x00000040u -#define PCRE2_DFA_SHORTEST 0x00000080u - -/* These are additional options for pcre2_substitute(), which passes any others -through to pcre2_match(). */ - -#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u -#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u -#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u -#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u -#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u - -/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(), -ignored for pcre2_jit_match(). */ - -#define PCRE2_NO_JIT 0x00002000u +#define PCRE2_NOTBOL 0x00000001u +#define PCRE2_NOTEOL 0x00000002u +#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */ +#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */ +#define PCRE2_PARTIAL_SOFT 0x00000010u +#define PCRE2_PARTIAL_HARD 0x00000020u +#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */ +#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */ +#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */ +#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */ +#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u /* Options for pcre2_pattern_convert(). */ @@ -319,6 +302,8 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192 #define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193 #define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194 +#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 +#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 /* "Expected" matching error codes: no match and partial match. */ @@ -505,10 +490,10 @@ typedef struct pcre2_real_jit_stack pcre2_jit_stack; \ typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *); -/* The structure for passing out data via the pcre_callout_function. We use a -structure so that new fields can be added on the end in future versions, -without changing the API of the function, thereby allowing old clients to work -without modification. Define the generic version in a macro; the width-specific +/* The structures for passing out data via callout functions. We use structures +so that new fields can be added on the end in future versions, without changing +the API of the function, thereby allowing old clients to work without +modification. Define the generic versions in a macro; the width-specific versions are generated from this macro below. */ /* Flags for the callout_flags field. These are cleared after a callout. */ @@ -550,7 +535,19 @@ typedef struct pcre2_callout_enumerate_block { \ PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \ PCRE2_SPTR callout_string; /* String compiled into pattern */ \ /* ------------------------------------------------------------------ */ \ -} pcre2_callout_enumerate_block; +} pcre2_callout_enumerate_block; \ +\ +typedef struct pcre2_substitute_callout_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + PCRE2_SPTR input; /* Pointer to input subject string */ \ + PCRE2_SPTR output; /* Pointer to output buffer */ \ + PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \ + PCRE2_SIZE *ovector; /* Pointer to current ovector */ \ + uint32_t oveccount; /* Count of pairs set in ovector */ \ + uint32_t subscount; /* Substitution number */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_substitute_callout_block; /* List the generic forms of all other functions in macros, which will be @@ -605,6 +602,9 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_callout(pcre2_match_context *, \ int (*)(pcre2_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_substitute_callout(pcre2_match_context *, \ + int (*)(pcre2_substitute_callout_block *, void *), void *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -808,6 +808,7 @@ pcre2_compile are called by application code. */ #define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) #define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_) +#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_) #define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_) #define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_) #define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_) @@ -873,6 +874,7 @@ pcre2_compile are called by application code. */ #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) +#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) #define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) #define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 7af28d4..bd1b04f 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -46,12 +46,6 @@ POSSIBILITY OF SUCH DAMAGE. #define PCRE2_PRERELEASE @PCRE2_PRERELEASE@ #define PCRE2_DATE @PCRE2_DATE@ -/* For the benefit of systems without stdint.h, an alternative is to use -inttypes.h. The existence of these headers is checked by configure or CMake. */ - -#define PCRE2_HAVE_STDINT_H @PCRE2_HAVE_STDINT_H@ -#define PCRE2_HAVE_INTTYPES_H @PCRE2_HAVE_INTTYPES_H@ - /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate export setting is defined in pcre2_internal.h, which includes this file. So we @@ -87,18 +81,15 @@ set, we ensure here that it has no effect. */ #define PCRE2_CALL_CONVENTION #endif -/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure -that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither -header, the relevant values must be provided by some other means. */ +/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and +uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do +not have stdint.h, which is why we use inttypes.h, which according to the C +standard is a superset of stdint.h. If none of these headers are available, +the relevant values must be provided by some other means. */ #include #include - -#if PCRE2_HAVE_STDINT_H -#include -#elif PCRE2_HAVE_INTTYPES_H #include -#endif /* Allow for C++ users compiling this directly. */ diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 4f50eef..b4efcf0 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -148,16 +148,7 @@ pcre2_match() because of the way it backtracks. */ /* When checking for integer overflow in pcre2_compile(), we need to handle large integers. If a 64-bit integer type is available, we can use that. Otherwise we have to cast to double, which of course requires floating point -arithmetic. Handle this by defining a macro for the appropriate type. If -stdint.h is available, include it; it may define INT64_MAX. Systems that do not -have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set -by "configure". */ - -#if defined HAVE_STDINT_H -#include -#elif defined HAVE_INTTYPES_H -#include -#endif +arithmetic. Handle this by defining a macro for the appropriate type. */ #if defined INT64_MAX || defined int64_t #define INT64_OR_DOUBLE int64_t