Merge pull request #22197 from oskarirauta/zsh
zsh: use libpcre2 instead of libpcre
This commit is contained in:
commit
b158d62460
6 changed files with 1069 additions and 2 deletions
|
@ -9,7 +9,7 @@ include $(TOPDIR)/rules.mk
|
||||||
|
|
||||||
PKG_NAME:=zsh
|
PKG_NAME:=zsh
|
||||||
PKG_VERSION:=5.9
|
PKG_VERSION:=5.9
|
||||||
PKG_RELEASE:=1
|
PKG_RELEASE:=2
|
||||||
|
|
||||||
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.xz
|
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.xz
|
||||||
PKG_SOURCE_URL:=@SF/zsh
|
PKG_SOURCE_URL:=@SF/zsh
|
||||||
|
@ -20,10 +20,19 @@ PKG_LICENSE:=ZSH
|
||||||
PKG_LICENSE_FILES:=LICENCE
|
PKG_LICENSE_FILES:=LICENCE
|
||||||
PKG_CPE_ID:=cpe:/a:zsh_project:zsh
|
PKG_CPE_ID:=cpe:/a:zsh_project:zsh
|
||||||
|
|
||||||
|
PKG_FIXUP:=autoreconf
|
||||||
PKG_BUILD_PARALLEL:=1
|
PKG_BUILD_PARALLEL:=1
|
||||||
PKG_INSTALL:=1
|
PKG_INSTALL:=1
|
||||||
PKG_BUILD_FLAGS:=gc-sections lto
|
PKG_BUILD_FLAGS:=gc-sections lto
|
||||||
|
|
||||||
|
# zsh include custom macro in the default aclocal.m4
|
||||||
|
# When autoreconf PKG_FIXUP is used, if PKG_REMOVE_FILES
|
||||||
|
# is not defined, it's set to remove the file aclocak.m4
|
||||||
|
# by default resulting in problem with the custom macro
|
||||||
|
# AC_PROG_LN
|
||||||
|
# To prevent this, declare empty PKG_REMOVE_FILES
|
||||||
|
PKG_REMOVE_FILES:=
|
||||||
|
|
||||||
include $(INCLUDE_DIR)/package.mk
|
include $(INCLUDE_DIR)/package.mk
|
||||||
|
|
||||||
define Package/zsh
|
define Package/zsh
|
||||||
|
@ -31,7 +40,7 @@ define Package/zsh
|
||||||
CATEGORY:=Utilities
|
CATEGORY:=Utilities
|
||||||
SUBMENU:=Shells
|
SUBMENU:=Shells
|
||||||
TITLE:=The Z shell
|
TITLE:=The Z shell
|
||||||
DEPENDS:=+libcap +libncurses +libncursesw +libpcre +librt
|
DEPENDS:=+libcap +libncurses +libncursesw +libpcre2 +librt
|
||||||
URL:=https://www.zsh.org/
|
URL:=https://www.zsh.org/
|
||||||
endef
|
endef
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
From 1b421e4978440234fb73117c8505dad1ccc68d46 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
|
||||||
|
Date: Mon, 26 Sep 2022 10:52:50 +0900
|
||||||
|
Subject: [PATCH] 50658 + test: Enable to switch between C/UTF-8 locales in
|
||||||
|
PCRE
|
||||||
|
|
||||||
|
---
|
||||||
|
ChangeLog | 5 +++++
|
||||||
|
Src/Modules/pcre.c | 10 ++--------
|
||||||
|
Test/V07pcre.ztst | 11 +++++++++++
|
||||||
|
3 files changed, 18 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
# diff --git a/ChangeLog b/ChangeLog
|
||||||
|
# index 48c65d01b..77345c050 100644
|
||||||
|
# --- a/ChangeLog
|
||||||
|
# +++ b/ChangeLog
|
||||||
|
# @@ -1,3 +1,8 @@
|
||||||
|
# +2022-09-26 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
|
||||||
|
# +
|
||||||
|
# + * 50658 + test: Src/Modules/pcre.c, Test/V07pcre.ztst: Enable to
|
||||||
|
# + switch between C/UTF-8 locales in PCRE
|
||||||
|
# +
|
||||||
|
# 2022-09-25 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||||
|
|
||||||
|
# * 50648: Functions/Misc/zcalc: Julian Prein: Use ZCALC_HISTFILE
|
||||||
|
--- a/Src/Modules/pcre.c
|
||||||
|
+++ b/Src/Modules/pcre.c
|
||||||
|
@@ -47,8 +47,6 @@ zpcre_utf8_enabled(void)
|
||||||
|
#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
||||||
|
static int have_utf8_pcre = -1;
|
||||||
|
|
||||||
|
- /* value can toggle based on MULTIBYTE, so don't
|
||||||
|
- * be too eager with caching */
|
||||||
|
if (have_utf8_pcre < -1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
@@ -56,15 +54,11 @@ zpcre_utf8_enabled(void)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ((have_utf8_pcre == -1) &&
|
||||||
|
- (!strcmp(nl_langinfo(CODESET), "UTF-8"))) {
|
||||||
|
-
|
||||||
|
- if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
|
||||||
|
+ (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))) {
|
||||||
|
have_utf8_pcre = -2; /* erk, failed to ask */
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (have_utf8_pcre < 0)
|
||||||
|
- return 0;
|
||||||
|
- return have_utf8_pcre;
|
||||||
|
+ return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8"));
|
||||||
|
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
--- a/Test/V07pcre.ztst
|
||||||
|
+++ b/Test/V07pcre.ztst
|
||||||
|
@@ -174,3 +174,14 @@
|
||||||
|
echo $match[2] )
|
||||||
|
0:regression for segmentation fault, workers/38307
|
||||||
|
>test
|
||||||
|
+
|
||||||
|
+ LANG_SAVE=$LANG
|
||||||
|
+ [[ é =~ '^.\z' ]]; echo $?
|
||||||
|
+ LANG=C
|
||||||
|
+ [[ é =~ '^..\z' ]]; echo $?
|
||||||
|
+ LANG=$LANG_SAVE
|
||||||
|
+ [[ é =~ '^.\z' ]]; echo $?
|
||||||
|
+0:swich between C/UTF-8 locales
|
||||||
|
+>0
|
||||||
|
+>0
|
||||||
|
+>0
|
540
utils/zsh/patches/002-51723-migrate-pcre-module-to-pcre2.patch
Normal file
540
utils/zsh/patches/002-51723-migrate-pcre-module-to-pcre2.patch
Normal file
|
@ -0,0 +1,540 @@
|
||||||
|
From b62e911341c8ec7446378b477c47da4256053dc0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oliver Kiddle <opk@zsh.org>
|
||||||
|
Date: Sat, 13 May 2023 00:53:32 +0200
|
||||||
|
Subject: [PATCH] 51723: migrate pcre module to pcre2
|
||||||
|
|
||||||
|
---
|
||||||
|
ChangeLog | 3 +
|
||||||
|
Src/Modules/pcre.c | 223 ++++++++++++++++++---------------------------
|
||||||
|
Test/V07pcre.ztst | 13 ++-
|
||||||
|
configure.ac | 20 ++--
|
||||||
|
4 files changed, 110 insertions(+), 149 deletions(-)
|
||||||
|
|
||||||
|
# diff --git a/ChangeLog b/ChangeLog
|
||||||
|
# index f5c77f801..285b73b2c 100644
|
||||||
|
# --- a/ChangeLog
|
||||||
|
# +++ b/ChangeLog
|
||||||
|
# @@ -1,5 +1,8 @@
|
||||||
|
# 2023-05-13 Oliver Kiddle <opk@zsh.org>
|
||||||
|
|
||||||
|
# + * 51723: Src/Modules/pcre.c, Test/V07pcre.ztst, configure.ac:
|
||||||
|
# + migrate pcre module to pcre2
|
||||||
|
# +
|
||||||
|
# * Felipe Contreras: 50612: Misc/vcs_info-examples: fix typo
|
||||||
|
|
||||||
|
# * github #98: Vidhan Bhatt: Completion/Darwin/Command/_shortcuts:
|
||||||
|
--- a/Src/Modules/pcre.c
|
||||||
|
+++ b/Src/Modules/pcre.c
|
||||||
|
@@ -34,11 +34,11 @@
|
||||||
|
#define CPCRE_PLAIN 0
|
||||||
|
|
||||||
|
/**/
|
||||||
|
-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
|
||||||
|
-#include <pcre.h>
|
||||||
|
+#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H)
|
||||||
|
+#define PCRE2_CODE_UNIT_WIDTH 8
|
||||||
|
+#include <pcre2.h>
|
||||||
|
|
||||||
|
-static pcre *pcre_pattern;
|
||||||
|
-static pcre_extra *pcre_hints;
|
||||||
|
+static pcre2_code *pcre_pattern;
|
||||||
|
|
||||||
|
/**/
|
||||||
|
static int
|
||||||
|
@@ -54,8 +54,8 @@ zpcre_utf8_enabled(void)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ((have_utf8_pcre == -1) &&
|
||||||
|
- (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))) {
|
||||||
|
- have_utf8_pcre = -2; /* erk, failed to ask */
|
||||||
|
+ (pcre2_config(PCRE2_CONFIG_UNICODE, &have_utf8_pcre))) {
|
||||||
|
+ have_utf8_pcre = -2; /* erk, failed to ask */
|
||||||
|
}
|
||||||
|
|
||||||
|
return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8"));
|
||||||
|
@@ -69,47 +69,38 @@ zpcre_utf8_enabled(void)
|
||||||
|
static int
|
||||||
|
bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
|
||||||
|
{
|
||||||
|
- int pcre_opts = 0, pcre_errptr, target_len;
|
||||||
|
- const char *pcre_error;
|
||||||
|
+ uint32_t pcre_opts = 0;
|
||||||
|
+ int target_len;
|
||||||
|
+ int pcre_error;
|
||||||
|
+ PCRE2_SIZE pcre_offset;
|
||||||
|
char *target;
|
||||||
|
|
||||||
|
- if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED;
|
||||||
|
- if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS;
|
||||||
|
- if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
|
||||||
|
- if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
|
||||||
|
- if(OPT_ISSET(ops,'s')) pcre_opts |= PCRE_DOTALL;
|
||||||
|
+ if (OPT_ISSET(ops, 'a')) pcre_opts |= PCRE2_ANCHORED;
|
||||||
|
+ if (OPT_ISSET(ops, 'i')) pcre_opts |= PCRE2_CASELESS;
|
||||||
|
+ if (OPT_ISSET(ops, 'm')) pcre_opts |= PCRE2_MULTILINE;
|
||||||
|
+ if (OPT_ISSET(ops, 'x')) pcre_opts |= PCRE2_EXTENDED;
|
||||||
|
+ if (OPT_ISSET(ops, 's')) pcre_opts |= PCRE2_DOTALL;
|
||||||
|
|
||||||
|
if (zpcre_utf8_enabled())
|
||||||
|
- pcre_opts |= PCRE_UTF8;
|
||||||
|
-
|
||||||
|
-#ifdef HAVE_PCRE_STUDY
|
||||||
|
- if (pcre_hints)
|
||||||
|
-#ifdef PCRE_CONFIG_JIT
|
||||||
|
- pcre_free_study(pcre_hints);
|
||||||
|
-#else
|
||||||
|
- pcre_free(pcre_hints);
|
||||||
|
-#endif
|
||||||
|
- pcre_hints = NULL;
|
||||||
|
-#endif
|
||||||
|
+ pcre_opts |= PCRE2_UTF;
|
||||||
|
|
||||||
|
if (pcre_pattern)
|
||||||
|
- pcre_free(pcre_pattern);
|
||||||
|
+ pcre2_code_free(pcre_pattern);
|
||||||
|
pcre_pattern = NULL;
|
||||||
|
|
||||||
|
target = ztrdup(*args);
|
||||||
|
unmetafy(target, &target_len);
|
||||||
|
|
||||||
|
- if ((int)strlen(target) != target_len) {
|
||||||
|
- zwarnnam(nam, "embedded NULs in PCRE pattern terminate pattern");
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL);
|
||||||
|
+ pcre_pattern = pcre2_compile((PCRE2_SPTR) target, (PCRE2_SIZE) target_len,
|
||||||
|
+ pcre_opts, &pcre_error, &pcre_offset, NULL);
|
||||||
|
|
||||||
|
free(target);
|
||||||
|
|
||||||
|
if (pcre_pattern == NULL)
|
||||||
|
{
|
||||||
|
- zwarnnam(nam, "error in regex: %s", pcre_error);
|
||||||
|
+ PCRE2_UCHAR buffer[256];
|
||||||
|
+ pcre2_get_error_message(pcre_error, buffer, sizeof(buffer));
|
||||||
|
+ zwarnnam(nam, "error in regex: %s", buffer);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -117,67 +108,48 @@ bin_pcre_compile(char *nam, char **args,
|
||||||
|
}
|
||||||
|
|
||||||
|
/**/
|
||||||
|
-#ifdef HAVE_PCRE_STUDY
|
||||||
|
-
|
||||||
|
-/**/
|
||||||
|
static int
|
||||||
|
bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int func))
|
||||||
|
{
|
||||||
|
- const char *pcre_error;
|
||||||
|
-
|
||||||
|
if (pcre_pattern == NULL)
|
||||||
|
{
|
||||||
|
zwarnnam(nam, "no pattern has been compiled for study");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- if (pcre_hints)
|
||||||
|
-#ifdef PCRE_CONFIG_JIT
|
||||||
|
- pcre_free_study(pcre_hints);
|
||||||
|
-#else
|
||||||
|
- pcre_free(pcre_hints);
|
||||||
|
-#endif
|
||||||
|
- pcre_hints = NULL;
|
||||||
|
|
||||||
|
- pcre_hints = pcre_study(pcre_pattern, 0, &pcre_error);
|
||||||
|
- if (pcre_error != NULL)
|
||||||
|
- {
|
||||||
|
- zwarnnam(nam, "error while studying regex: %s", pcre_error);
|
||||||
|
- return 1;
|
||||||
|
+ int jit = 0;
|
||||||
|
+ if (!pcre2_config(PCRE2_CONFIG_JIT, &jit) && jit) {
|
||||||
|
+ if (pcre2_jit_compile(pcre_pattern, PCRE2_JIT_COMPLETE) < 0) {
|
||||||
|
+ zwarnnam(nam, "error while studying regex");
|
||||||
|
+ return 1;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/**/
|
||||||
|
-#else /* !HAVE_PCRE_STUDY */
|
||||||
|
-
|
||||||
|
-# define bin_pcre_study bin_notavail
|
||||||
|
-
|
||||||
|
-/**/
|
||||||
|
-#endif /* !HAVE_PCRE_STUDY */
|
||||||
|
-
|
||||||
|
-/**/
|
||||||
|
static int
|
||||||
|
-zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar,
|
||||||
|
- char *substravar, int want_offset_pair, int matchedinarr,
|
||||||
|
- int want_begin_end)
|
||||||
|
+zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count,
|
||||||
|
+ char *matchvar, char *substravar, int want_offset_pair,
|
||||||
|
+ int matchedinarr, int want_begin_end)
|
||||||
|
{
|
||||||
|
- char **captures, *match_all, **matches;
|
||||||
|
+ PCRE2_SIZE *ovec;
|
||||||
|
+ char *match_all, **matches;
|
||||||
|
char offset_all[50];
|
||||||
|
int capture_start = 1;
|
||||||
|
|
||||||
|
if (matchedinarr) {
|
||||||
|
- /* bash-style captures[0] entire-matched string in the array */
|
||||||
|
+ /* bash-style ovec[0] entire-matched string in the array */
|
||||||
|
capture_start = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
- /* captures[0] will be entire matched string, [1] first substring */
|
||||||
|
- if (!pcre_get_substring_list(arg, ovec, captured_count, (const char ***)&captures)) {
|
||||||
|
- int nelem = arrlen(captures)-1;
|
||||||
|
+ /* ovec[0] will be entire matched string, [1] first substring */
|
||||||
|
+ ovec = pcre2_get_ovector_pointer(mdata);
|
||||||
|
+ if (ovec) {
|
||||||
|
+ int nelem = captured_count - 1;
|
||||||
|
/* Set to the offsets of the complete match */
|
||||||
|
if (want_offset_pair) {
|
||||||
|
- sprintf(offset_all, "%d %d", ovec[0], ovec[1]);
|
||||||
|
+ sprintf(offset_all, "%ld %ld", ovec[0], ovec[1]);
|
||||||
|
setsparam("ZPCRE_OP", ztrdup(offset_all));
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
@@ -186,7 +158,7 @@ zpcre_get_substrings(char *arg, int *ove
|
||||||
|
* ovec is length 2*(1+capture_list_length)
|
||||||
|
*/
|
||||||
|
if (matchvar) {
|
||||||
|
- match_all = metafy(captures[0], ovec[1] - ovec[0], META_DUP);
|
||||||
|
+ match_all = metafy(arg + ovec[0], ovec[1] - ovec[0], META_DUP);
|
||||||
|
setsparam(matchvar, match_all);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
@@ -201,16 +173,12 @@ zpcre_get_substrings(char *arg, int *ove
|
||||||
|
*/
|
||||||
|
if (substravar &&
|
||||||
|
(!want_begin_end || nelem)) {
|
||||||
|
- char **x, **y;
|
||||||
|
+ char **x;
|
||||||
|
int vec_off, i;
|
||||||
|
- y = &captures[capture_start];
|
||||||
|
matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start));
|
||||||
|
- for (i = capture_start; i < captured_count; i++, y++) {
|
||||||
|
+ for (i = capture_start; i < captured_count; i++) {
|
||||||
|
vec_off = 2*i;
|
||||||
|
- if (*y)
|
||||||
|
- *x++ = metafy(*y, ovec[vec_off+1]-ovec[vec_off], META_DUP);
|
||||||
|
- else
|
||||||
|
- *x++ = NULL;
|
||||||
|
+ *x++ = metafy(arg + ovec[vec_off], ovec[vec_off+1]-ovec[vec_off], META_DUP);
|
||||||
|
}
|
||||||
|
*x = NULL;
|
||||||
|
setaparam(substravar, matches);
|
||||||
|
@@ -247,7 +215,8 @@ zpcre_get_substrings(char *arg, int *ove
|
||||||
|
setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
|
||||||
|
if (nelem) {
|
||||||
|
char **mbegin, **mend, **bptr, **eptr;
|
||||||
|
- int i, *ipair;
|
||||||
|
+ int i;
|
||||||
|
+ size_t *ipair;
|
||||||
|
|
||||||
|
bptr = mbegin = zalloc(sizeof(char*)*(nelem+1));
|
||||||
|
eptr = mend = zalloc(sizeof(char*)*(nelem+1));
|
||||||
|
@@ -287,8 +256,6 @@ zpcre_get_substrings(char *arg, int *ove
|
||||||
|
setaparam("mend", mend);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- pcre_free_substring_list((const char **)captures);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
@@ -314,7 +281,8 @@ getposint(char *instr, char *nam)
|
||||||
|
static int
|
||||||
|
bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
|
||||||
|
{
|
||||||
|
- int ret, capcount, *ovec, ovecsize, c;
|
||||||
|
+ int ret, c;
|
||||||
|
+ pcre2_match_data *pcre_mdata = NULL;
|
||||||
|
char *matched_portion = NULL;
|
||||||
|
char *plaintext = NULL;
|
||||||
|
char *receptacle = NULL;
|
||||||
|
@@ -344,36 +312,30 @@ bin_pcre_match(char *nam, char **args, O
|
||||||
|
/* For the entire match, 'Return' the offset byte positions instead of the matched string */
|
||||||
|
if(OPT_ISSET(ops,'b')) want_offset_pair = 1;
|
||||||
|
|
||||||
|
- if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
|
||||||
|
- {
|
||||||
|
- zwarnnam(nam, "error %d in fullinfo", ret);
|
||||||
|
- return 1;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- ovecsize = (capcount+1)*3;
|
||||||
|
- ovec = zalloc(ovecsize*sizeof(int));
|
||||||
|
-
|
||||||
|
plaintext = ztrdup(*args);
|
||||||
|
unmetafy(plaintext, &subject_len);
|
||||||
|
|
||||||
|
if (offset_start > 0 && offset_start >= subject_len)
|
||||||
|
- ret = PCRE_ERROR_NOMATCH;
|
||||||
|
- else
|
||||||
|
- ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize);
|
||||||
|
+ ret = PCRE2_ERROR_NOMATCH;
|
||||||
|
+ else {
|
||||||
|
+ pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL);
|
||||||
|
+ ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
|
||||||
|
+ offset_start, 0, pcre_mdata, NULL);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
if (ret==0) return_value = 0;
|
||||||
|
- else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
|
||||||
|
+ else if (ret == PCRE2_ERROR_NOMATCH) /* no match */;
|
||||||
|
else if (ret>0) {
|
||||||
|
- zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle,
|
||||||
|
+ zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle,
|
||||||
|
want_offset_pair, 0, 0);
|
||||||
|
return_value = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
- zwarnnam(nam, "error in pcre_exec [%d]", ret);
|
||||||
|
+ zwarnnam(nam, "error in pcre2_match [%d]", ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (ovec)
|
||||||
|
- zfree(ovec, ovecsize*sizeof(int));
|
||||||
|
+ if (pcre_mdata)
|
||||||
|
+ pcre2_match_data_free(pcre_mdata);
|
||||||
|
zsfree(plaintext);
|
||||||
|
|
||||||
|
return return_value;
|
||||||
|
@@ -383,17 +345,19 @@ bin_pcre_match(char *nam, char **args, O
|
||||||
|
static int
|
||||||
|
cond_pcre_match(char **a, int id)
|
||||||
|
{
|
||||||
|
- pcre *pcre_pat;
|
||||||
|
- const char *pcre_err;
|
||||||
|
+ pcre2_code *pcre_pat = NULL;
|
||||||
|
+ int pcre_err;
|
||||||
|
+ PCRE2_SIZE pcre_erroff;
|
||||||
|
char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar, *svar;
|
||||||
|
- int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
|
||||||
|
+ int r = 0, pcre_opts = 0;
|
||||||
|
+ pcre2_match_data *pcre_mdata = NULL;
|
||||||
|
int lhstr_plain_len, rhre_plain_len;
|
||||||
|
int return_value = 0;
|
||||||
|
|
||||||
|
if (zpcre_utf8_enabled())
|
||||||
|
- pcre_opts |= PCRE_UTF8;
|
||||||
|
+ pcre_opts |= PCRE2_UTF;
|
||||||
|
if (isset(REMATCHPCRE) && !isset(CASEMATCH))
|
||||||
|
- pcre_opts |= PCRE_CASELESS;
|
||||||
|
+ pcre_opts |= PCRE2_CASELESS;
|
||||||
|
|
||||||
|
lhstr = cond_str(a,0,0);
|
||||||
|
rhre = cond_str(a,1,0);
|
||||||
|
@@ -401,9 +365,6 @@ cond_pcre_match(char **a, int id)
|
||||||
|
rhre_plain = ztrdup(rhre);
|
||||||
|
unmetafy(lhstr_plain, &lhstr_plain_len);
|
||||||
|
unmetafy(rhre_plain, &rhre_plain_len);
|
||||||
|
- pcre_pat = NULL;
|
||||||
|
- ov = NULL;
|
||||||
|
- ovsize = 0;
|
||||||
|
|
||||||
|
if (isset(BASHREMATCH)) {
|
||||||
|
svar = NULL;
|
||||||
|
@@ -415,27 +376,27 @@ cond_pcre_match(char **a, int id)
|
||||||
|
|
||||||
|
switch(id) {
|
||||||
|
case CPCRE_PLAIN:
|
||||||
|
- if ((int)strlen(rhre_plain) != rhre_plain_len) {
|
||||||
|
- zwarn("embedded NULs in PCRE pattern terminate pattern");
|
||||||
|
- }
|
||||||
|
- pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL);
|
||||||
|
- if (pcre_pat == NULL) {
|
||||||
|
- zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
|
||||||
|
+ if (!(pcre_pat = pcre2_compile((PCRE2_SPTR) rhre_plain,
|
||||||
|
+ (PCRE2_SIZE) rhre_plain_len, pcre_opts,
|
||||||
|
+ &pcre_err, &pcre_erroff, NULL)))
|
||||||
|
+ {
|
||||||
|
+ PCRE2_UCHAR buffer[256];
|
||||||
|
+ pcre2_get_error_message(pcre_err, buffer, sizeof(buffer));
|
||||||
|
+ zwarn("failed to compile regexp /%s/: %s", rhre, buffer);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
- pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
|
||||||
|
- ovsize = (capcnt+1)*3;
|
||||||
|
- ov = zalloc(ovsize*sizeof(int));
|
||||||
|
- r = pcre_exec(pcre_pat, NULL, lhstr_plain, lhstr_plain_len, 0, 0, ov, ovsize);
|
||||||
|
- /* r < 0 => error; r==0 match but not enough size in ov
|
||||||
|
+ pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pat, NULL);
|
||||||
|
+ r = pcre2_match(pcre_pat, (PCRE2_SPTR8) lhstr_plain, lhstr_plain_len,
|
||||||
|
+ 0, 0, pcre_mdata, NULL);
|
||||||
|
+ /* r < 0 => error; r==0 match but not enough size in match data
|
||||||
|
* r > 0 => (r-1) substrings found; r==1 => no substrings
|
||||||
|
*/
|
||||||
|
if (r==0) {
|
||||||
|
- zwarn("reportable zsh problem: pcre_exec() returned 0");
|
||||||
|
+ zwarn("reportable zsh problem: pcre2_match() returned 0");
|
||||||
|
return_value = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
- else if (r==PCRE_ERROR_NOMATCH) {
|
||||||
|
+ else if (r == PCRE2_ERROR_NOMATCH) {
|
||||||
|
return_value = 0; /* no match */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
@@ -444,7 +405,7 @@ cond_pcre_match(char **a, int id)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (r>0) {
|
||||||
|
- zpcre_get_substrings(lhstr_plain, ov, r, svar, avar, 0,
|
||||||
|
+ zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0,
|
||||||
|
isset(BASHREMATCH),
|
||||||
|
!isset(BASHREMATCH));
|
||||||
|
return_value = 1;
|
||||||
|
@@ -457,10 +418,10 @@ cond_pcre_match(char **a, int id)
|
||||||
|
free(lhstr_plain);
|
||||||
|
if(rhre_plain)
|
||||||
|
free(rhre_plain);
|
||||||
|
+ if (pcre_mdata)
|
||||||
|
+ pcre2_match_data_free(pcre_mdata);
|
||||||
|
if (pcre_pat)
|
||||||
|
- pcre_free(pcre_pat);
|
||||||
|
- if (ov)
|
||||||
|
- zfree(ov, ovsize*sizeof(int));
|
||||||
|
+ pcre2_code_free(pcre_pat);
|
||||||
|
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
@@ -489,11 +450,11 @@ static struct builtin bintab[] = {
|
||||||
|
|
||||||
|
static struct features module_features = {
|
||||||
|
bintab, sizeof(bintab)/sizeof(*bintab),
|
||||||
|
-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
|
||||||
|
+#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H)
|
||||||
|
cotab, sizeof(cotab)/sizeof(*cotab),
|
||||||
|
-#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
|
||||||
|
+#else /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */
|
||||||
|
NULL, 0,
|
||||||
|
-#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
|
||||||
|
+#endif /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */
|
||||||
|
NULL, 0,
|
||||||
|
NULL, 0,
|
||||||
|
0
|
||||||
|
@@ -540,19 +501,9 @@ cleanup_(Module m)
|
||||||
|
int
|
||||||
|
finish_(UNUSED(Module m))
|
||||||
|
{
|
||||||
|
-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
|
||||||
|
-#ifdef HAVE_PCRE_STUDY
|
||||||
|
- if (pcre_hints)
|
||||||
|
-#ifdef PCRE_CONFIG_JIT
|
||||||
|
- pcre_free_study(pcre_hints);
|
||||||
|
-#else
|
||||||
|
- pcre_free(pcre_hints);
|
||||||
|
-#endif
|
||||||
|
- pcre_hints = NULL;
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
+#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H)
|
||||||
|
if (pcre_pattern)
|
||||||
|
- pcre_free(pcre_pattern);
|
||||||
|
+ pcre2_code_free(pcre_pattern);
|
||||||
|
pcre_pattern = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
--- a/Test/V07pcre.ztst
|
||||||
|
+++ b/Test/V07pcre.ztst
|
||||||
|
@@ -129,12 +129,17 @@
|
||||||
|
>78884; ZPCRE_OP: 25 30
|
||||||
|
>90210; ZPCRE_OP: 31 36
|
||||||
|
|
||||||
|
-# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed)
|
||||||
|
+# Embedded NULs allowed in plaintext, in RE, pcre supports \0 as two-chars
|
||||||
|
[[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]]
|
||||||
|
print "${#MATCH}; ${#match[1]}; ${#match[2]}"
|
||||||
|
0:ensure ASCII NUL passes in and out of matched plaintext
|
||||||
|
>6; 3; 3
|
||||||
|
|
||||||
|
+# PCRE2 supports NULs also in the RE
|
||||||
|
+ [[ $'a\0b\0c' =~ $'^(.\0)+' ]] && print "${#MATCH}; ${#match[1]}"
|
||||||
|
+0:ensure ASCII NUL works also in the regex
|
||||||
|
+>4; 2
|
||||||
|
+
|
||||||
|
# Ensure the long-form infix operator works
|
||||||
|
[[ foo -pcre-match ^f..$ ]]
|
||||||
|
print $?
|
||||||
|
@@ -181,7 +186,11 @@
|
||||||
|
[[ é =~ '^..\z' ]]; echo $?
|
||||||
|
LANG=$LANG_SAVE
|
||||||
|
[[ é =~ '^.\z' ]]; echo $?
|
||||||
|
-0:swich between C/UTF-8 locales
|
||||||
|
+0:switch between C/UTF-8 locales
|
||||||
|
>0
|
||||||
|
>0
|
||||||
|
>0
|
||||||
|
+
|
||||||
|
+ [[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}"
|
||||||
|
+0:empty capture
|
||||||
|
+>3; 1; 0
|
||||||
|
--- a/configure.ac
|
||||||
|
+++ b/configure.ac
|
||||||
|
@@ -438,7 +438,7 @@ fi],
|
||||||
|
|
||||||
|
dnl Do you want to look for pcre support?
|
||||||
|
AC_ARG_ENABLE(pcre,
|
||||||
|
-AS_HELP_STRING([--enable-pcre],[enable the search for the pcre library (may create run-time library dependencies)]))
|
||||||
|
+AS_HELP_STRING([--enable-pcre],[enable the search for the pcre2 library (may create run-time library dependencies)]))
|
||||||
|
|
||||||
|
dnl Do you want to look for capability support?
|
||||||
|
AC_ARG_ENABLE(cap,
|
||||||
|
@@ -662,13 +662,12 @@ AC_HEADER_SYS_WAIT
|
||||||
|
|
||||||
|
oldcflags="$CFLAGS"
|
||||||
|
if test x$enable_pcre = xyes; then
|
||||||
|
-AC_CHECK_PROG([PCRECONF], pcre-config, pcre-config)
|
||||||
|
-dnl Typically (meaning on this single RedHat 9 box in front of me)
|
||||||
|
-dnl pcre-config --cflags produces a -I output which needs to go into
|
||||||
|
+AC_CHECK_PROG([PCRECONF], pcre2-config, pcre2-config)
|
||||||
|
+dnl pcre2-config --cflags may produce a -I output which needs to go into
|
||||||
|
dnl CPPFLAGS else configure's preprocessor tests don't pick it up,
|
||||||
|
dnl producing a warning.
|
||||||
|
-if test "x$ac_cv_prog_PCRECONF" = xpcre-config; then
|
||||||
|
- CPPFLAGS="$CPPFLAGS `pcre-config --cflags`"
|
||||||
|
+if test "x$ac_cv_prog_PCRECONF" = xpcre2-config; then
|
||||||
|
+ CPPFLAGS="$CPPFLAGS `pcre2-config --cflags`"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
@@ -678,9 +677,10 @@ AC_CHECK_HEADERS(sys/time.h sys/times.h
|
||||||
|
locale.h errno.h stdio.h stdarg.h varargs.h stdlib.h \
|
||||||
|
unistd.h sys/capability.h \
|
||||||
|
utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \
|
||||||
|
- netinet/in_systm.h pcre.h langinfo.h wchar.h stddef.h \
|
||||||
|
+ netinet/in_systm.h langinfo.h wchar.h stddef.h \
|
||||||
|
sys/stropts.h iconv.h ncurses.h ncursesw/ncurses.h \
|
||||||
|
ncurses/ncurses.h)
|
||||||
|
+AC_CHECK_HEADERS([pcre2.h],,,[#define PCRE2_CODE_UNIT_WIDTH 8])
|
||||||
|
if test x$dynamic = xyes; then
|
||||||
|
AC_CHECK_HEADERS(dlfcn.h)
|
||||||
|
AC_CHECK_HEADERS(dl.h)
|
||||||
|
@@ -958,9 +958,7 @@ if test "x$ac_found_iconv" = "xyes"; the
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test x$enable_pcre = xyes; then
|
||||||
|
-dnl pcre-config should probably be employed here
|
||||||
|
-dnl AC_SEARCH_LIBS(pcre_compile, pcre)
|
||||||
|
- LIBS="`$ac_cv_prog_PCRECONF --libs` $LIBS"
|
||||||
|
+ LIBS="`$ac_cv_prog_PCRECONF --libs8` $LIBS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
dnl ---------------------
|
||||||
|
@@ -1323,7 +1321,7 @@ AC_CHECK_FUNCS(strftime strptime mktime
|
||||||
|
pathconf sysconf \
|
||||||
|
tgetent tigetflag tigetnum tigetstr setupterm initscr resize_term \
|
||||||
|
getcchar setcchar waddwstr wget_wch win_wch use_default_colors \
|
||||||
|
- pcre_compile pcre_study pcre_exec \
|
||||||
|
+ pcre2_compile_8 \
|
||||||
|
nl_langinfo \
|
||||||
|
erand48 open_memstream \
|
||||||
|
posix_openpt \
|
|
@ -0,0 +1,180 @@
|
||||||
|
From f3f371deb376478176866fd770fbcf9bc0d0609f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oliver Kiddle <opk@zsh.org>
|
||||||
|
Date: Sat, 13 May 2023 00:56:48 +0200
|
||||||
|
Subject: [PATCH] 51728: assign pcre named capture groups to a hash
|
||||||
|
|
||||||
|
---
|
||||||
|
ChangeLog | 3 +++
|
||||||
|
Doc/Zsh/mod_pcre.yo | 10 ++++++----
|
||||||
|
Src/Modules/pcre.c | 43 +++++++++++++++++++++++++++++++++----------
|
||||||
|
Test/V07pcre.ztst | 14 ++++++++++++++
|
||||||
|
4 files changed, 56 insertions(+), 14 deletions(-)
|
||||||
|
|
||||||
|
# diff --git a/ChangeLog b/ChangeLog
|
||||||
|
# index 285b73b2c..2835a9405 100644
|
||||||
|
# --- a/ChangeLog
|
||||||
|
# +++ b/ChangeLog
|
||||||
|
# @@ -1,5 +1,8 @@
|
||||||
|
# 2023-05-13 Oliver Kiddle <opk@zsh.org>
|
||||||
|
|
||||||
|
# + * 51728: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c,
|
||||||
|
# + Test/V07pcre.ztst: assign pcre named capture groups to a hash
|
||||||
|
# +
|
||||||
|
# * 51723: Src/Modules/pcre.c, Test/V07pcre.ztst, configure.ac:
|
||||||
|
# migrate pcre module to pcre2
|
||||||
|
|
||||||
|
--- a/Doc/Zsh/mod_pcre.yo
|
||||||
|
+++ b/Doc/Zsh/mod_pcre.yo
|
||||||
|
@@ -20,12 +20,12 @@ including those that indicate newline.
|
||||||
|
)
|
||||||
|
findex(pcre_study)
|
||||||
|
item(tt(pcre_study))(
|
||||||
|
-Studies the previously-compiled PCRE which may result in faster
|
||||||
|
-matching.
|
||||||
|
+Requests JIT compilation for the previously-compiled PCRE which
|
||||||
|
+may result in faster matching.
|
||||||
|
)
|
||||||
|
findex(pcre_match)
|
||||||
|
item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \
|
||||||
|
-[ tt(-n) var(offset) ] [ tt(-b) ] var(string))(
|
||||||
|
+[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-b) ] var(string))(
|
||||||
|
Returns successfully if tt(string) matches the previously-compiled
|
||||||
|
PCRE.
|
||||||
|
|
||||||
|
@@ -36,7 +36,9 @@ substrings, unless the tt(-a) option is
|
||||||
|
case it will set the array var(arr). Similarly, the variable
|
||||||
|
tt(MATCH) will be set to the entire matched portion of the
|
||||||
|
string, unless the tt(-v) option is given, in which case the variable
|
||||||
|
-var(var) will be set.
|
||||||
|
+var(var) will be set. Furthermore, any named captures will
|
||||||
|
+be stored in the associative array tt(.pcre.match) unless an
|
||||||
|
+alternative is given with tt(-A).
|
||||||
|
No variables are altered if there is no successful match.
|
||||||
|
A tt(-n) option starts searching for a match from the
|
||||||
|
byte var(offset) position in var(string). If the tt(-b) option is given,
|
||||||
|
--- a/Src/Modules/pcre.c
|
||||||
|
+++ b/Src/Modules/pcre.c
|
||||||
|
@@ -129,14 +129,17 @@ bin_pcre_study(char *nam, UNUSED(char **
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
-zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count,
|
||||||
|
- char *matchvar, char *substravar, int want_offset_pair,
|
||||||
|
- int matchedinarr, int want_begin_end)
|
||||||
|
+zpcre_get_substrings(pcre2_code *pat, char *arg, pcre2_match_data *mdata,
|
||||||
|
+ int captured_count, char *matchvar, char *substravar, char *namedassoc,
|
||||||
|
+ int want_offset_pair, int matchedinarr, int want_begin_end)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE *ovec;
|
||||||
|
char *match_all, **matches;
|
||||||
|
char offset_all[50];
|
||||||
|
int capture_start = 1;
|
||||||
|
+ int vec_off;
|
||||||
|
+ PCRE2_SPTR ntable; /* table of named captures */
|
||||||
|
+ uint32_t ncount, nsize;
|
||||||
|
|
||||||
|
if (matchedinarr) {
|
||||||
|
/* bash-style ovec[0] entire-matched string in the array */
|
||||||
|
@@ -174,7 +177,7 @@ zpcre_get_substrings(char *arg, pcre2_ma
|
||||||
|
if (substravar &&
|
||||||
|
(!want_begin_end || nelem)) {
|
||||||
|
char **x;
|
||||||
|
- int vec_off, i;
|
||||||
|
+ int i;
|
||||||
|
matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start));
|
||||||
|
for (i = capture_start; i < captured_count; i++) {
|
||||||
|
vec_off = 2*i;
|
||||||
|
@@ -184,6 +187,23 @@ zpcre_get_substrings(char *arg, pcre2_ma
|
||||||
|
setaparam(substravar, matches);
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (!pcre2_pattern_info(pat, PCRE2_INFO_NAMECOUNT, &ncount) && ncount
|
||||||
|
+ && !pcre2_pattern_info(pat, PCRE2_INFO_NAMEENTRYSIZE, &nsize)
|
||||||
|
+ && !pcre2_pattern_info(pat, PCRE2_INFO_NAMETABLE, &ntable))
|
||||||
|
+ {
|
||||||
|
+ char **hash, **hashptr;
|
||||||
|
+ uint32_t nidx;
|
||||||
|
+ hashptr = hash = (char **)zshcalloc((ncount+1)*2*sizeof(char *));
|
||||||
|
+ for (nidx = 0; nidx < ncount; nidx++) {
|
||||||
|
+ vec_off = (ntable[nsize * nidx] << 9) + 2 * ntable[nsize * nidx + 1];
|
||||||
|
+ /* would metafy the key but pcre limits characters in the name */
|
||||||
|
+ *hashptr++ = ztrdup((char *) ntable + nsize * nidx + 2);
|
||||||
|
+ *hashptr++ = metafy(arg + ovec[vec_off],
|
||||||
|
+ ovec[vec_off+1]-ovec[vec_off], META_DUP);
|
||||||
|
+ }
|
||||||
|
+ sethparam(namedassoc, hash);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (want_begin_end) {
|
||||||
|
/*
|
||||||
|
* cond-infix rather than builtin; also not bash; so we set a bunch
|
||||||
|
@@ -286,6 +306,7 @@ bin_pcre_match(char *nam, char **args, O
|
||||||
|
char *matched_portion = NULL;
|
||||||
|
char *plaintext = NULL;
|
||||||
|
char *receptacle = NULL;
|
||||||
|
+ char *named = ".pcre.match";
|
||||||
|
int return_value = 1;
|
||||||
|
/* The subject length and offset start are both int values in pcre_exec */
|
||||||
|
int subject_len;
|
||||||
|
@@ -305,6 +326,9 @@ bin_pcre_match(char *nam, char **args, O
|
||||||
|
if(OPT_HASARG(ops,c='v')) {
|
||||||
|
matched_portion = OPT_ARG(ops,c);
|
||||||
|
}
|
||||||
|
+ if (OPT_HASARG(ops, c='A')) {
|
||||||
|
+ named = OPT_ARG(ops, c);
|
||||||
|
+ }
|
||||||
|
if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */
|
||||||
|
if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0)
|
||||||
|
return 1;
|
||||||
|
@@ -326,8 +350,8 @@ bin_pcre_match(char *nam, char **args, O
|
||||||
|
if (ret==0) return_value = 0;
|
||||||
|
else if (ret == PCRE2_ERROR_NOMATCH) /* no match */;
|
||||||
|
else if (ret>0) {
|
||||||
|
- zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle,
|
||||||
|
- want_offset_pair, 0, 0);
|
||||||
|
+ zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, matched_portion,
|
||||||
|
+ receptacle, named, want_offset_pair, 0, 0);
|
||||||
|
return_value = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
@@ -405,9 +429,8 @@ cond_pcre_match(char **a, int id)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (r>0) {
|
||||||
|
- zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0,
|
||||||
|
- isset(BASHREMATCH),
|
||||||
|
- !isset(BASHREMATCH));
|
||||||
|
+ zpcre_get_substrings(pcre_pat, lhstr_plain, pcre_mdata, r, svar, avar,
|
||||||
|
+ ".pcre.match", 0, isset(BASHREMATCH), !isset(BASHREMATCH));
|
||||||
|
return_value = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
@@ -443,7 +466,7 @@ static struct conddef cotab[] = {
|
||||||
|
|
||||||
|
static struct builtin bintab[] = {
|
||||||
|
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL),
|
||||||
|
- BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:n:b", NULL),
|
||||||
|
+ BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:b", NULL),
|
||||||
|
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL)
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/Test/V07pcre.ztst
|
||||||
|
+++ b/Test/V07pcre.ztst
|
||||||
|
@@ -194,3 +194,17 @@
|
||||||
|
[[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}"
|
||||||
|
0:empty capture
|
||||||
|
>3; 1; 0
|
||||||
|
+
|
||||||
|
+ [[ category/name-12345 =~ '(?x)^
|
||||||
|
+ (?<category> [^/]* ) /
|
||||||
|
+ (?<package>
|
||||||
|
+ (?<name> \w+ ) -
|
||||||
|
+ (?<version> \d+ ))$' ]]
|
||||||
|
+ typeset -p1 .pcre.match
|
||||||
|
+0:named captures
|
||||||
|
+>typeset -g -A .pcre.match=(
|
||||||
|
+> [category]=category
|
||||||
|
+> [name]=name
|
||||||
|
+> [package]=name-12345
|
||||||
|
+> [version]=12345
|
||||||
|
+>)
|
|
@ -0,0 +1,156 @@
|
||||||
|
From b4d1c756f50909b4a13e5c8fe5f26f71e9d54f63 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oliver Kiddle <opk@zsh.org>
|
||||||
|
Date: Sat, 13 May 2023 00:59:00 +0200
|
||||||
|
Subject: [PATCH] 51738: support pcre's alternative DFA matching algorithm
|
||||||
|
|
||||||
|
---
|
||||||
|
ChangeLog | 3 +++
|
||||||
|
Doc/Zsh/mod_pcre.yo | 6 ++++-
|
||||||
|
Src/Modules/pcre.c | 53 ++++++++++++++++++++++++++++++---------------
|
||||||
|
Test/V07pcre.ztst | 5 +++++
|
||||||
|
4 files changed, 49 insertions(+), 18 deletions(-)
|
||||||
|
|
||||||
|
# diff --git a/ChangeLog b/ChangeLog
|
||||||
|
# index 2835a9405..18bc4a698 100644
|
||||||
|
# --- a/ChangeLog
|
||||||
|
# +++ b/ChangeLog
|
||||||
|
# @@ -1,5 +1,8 @@
|
||||||
|
# 2023-05-13 Oliver Kiddle <opk@zsh.org>
|
||||||
|
|
||||||
|
# + * 51738: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c,
|
||||||
|
# + Test/V07pcre.ztst: support pcre's DFA matching algorithm
|
||||||
|
# +
|
||||||
|
# * 51728: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c,
|
||||||
|
# Test/V07pcre.ztst: assign pcre named capture groups to a hash
|
||||||
|
|
||||||
|
--- a/Doc/Zsh/mod_pcre.yo
|
||||||
|
+++ b/Doc/Zsh/mod_pcre.yo
|
||||||
|
@@ -25,7 +25,7 @@ may result in faster matching.
|
||||||
|
)
|
||||||
|
findex(pcre_match)
|
||||||
|
item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \
|
||||||
|
-[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-b) ] var(string))(
|
||||||
|
+[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-bd) ] var(string))(
|
||||||
|
Returns successfully if tt(string) matches the previously-compiled
|
||||||
|
PCRE.
|
||||||
|
|
||||||
|
@@ -69,6 +69,10 @@ print -l $accum)
|
||||||
|
)
|
||||||
|
enditem()
|
||||||
|
|
||||||
|
+The option tt(-d) uses the alternative breadth-first DFA search algorithm of
|
||||||
|
+pcre. This sets tt(match), or the array given with tt(-a), to all the matches
|
||||||
|
+found from the same start point in the subject.
|
||||||
|
+
|
||||||
|
The tt(zsh/pcre) module makes available the following test condition:
|
||||||
|
|
||||||
|
startitem()
|
||||||
|
--- a/Src/Modules/pcre.c
|
||||||
|
+++ b/Src/Modules/pcre.c
|
||||||
|
@@ -305,30 +305,29 @@ bin_pcre_match(char *nam, char **args, O
|
||||||
|
pcre2_match_data *pcre_mdata = NULL;
|
||||||
|
char *matched_portion = NULL;
|
||||||
|
char *plaintext = NULL;
|
||||||
|
- char *receptacle = NULL;
|
||||||
|
- char *named = ".pcre.match";
|
||||||
|
+ char *receptacle;
|
||||||
|
+ char *named = NULL;
|
||||||
|
int return_value = 1;
|
||||||
|
/* The subject length and offset start are both int values in pcre_exec */
|
||||||
|
int subject_len;
|
||||||
|
int offset_start = 0;
|
||||||
|
int want_offset_pair = 0;
|
||||||
|
+ int use_dfa = 0;
|
||||||
|
|
||||||
|
if (pcre_pattern == NULL) {
|
||||||
|
zwarnnam(nam, "no pattern has been compiled");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- matched_portion = "MATCH";
|
||||||
|
- receptacle = "match";
|
||||||
|
- if(OPT_HASARG(ops,c='a')) {
|
||||||
|
- receptacle = OPT_ARG(ops,c);
|
||||||
|
- }
|
||||||
|
- if(OPT_HASARG(ops,c='v')) {
|
||||||
|
- matched_portion = OPT_ARG(ops,c);
|
||||||
|
- }
|
||||||
|
- if (OPT_HASARG(ops, c='A')) {
|
||||||
|
- named = OPT_ARG(ops, c);
|
||||||
|
+ if (!(use_dfa = OPT_ISSET(ops, 'd'))) {
|
||||||
|
+ matched_portion = OPT_HASARG(ops, c='v') ? OPT_ARG(ops, c) : "MATCH";
|
||||||
|
+ named = OPT_HASARG(ops, c='A') ? OPT_ARG(ops, c) : ".pcre.match";
|
||||||
|
+ } else if (OPT_HASARG(ops, c='v') || OPT_HASARG(ops, c='A')) {
|
||||||
|
+ zwarnnam(nam, "-d cannot be combined with -%c", c);
|
||||||
|
+ return 1;
|
||||||
|
}
|
||||||
|
+ receptacle = OPT_HASARG(ops, 'a') ? OPT_ARG(ops, 'a') : "match";
|
||||||
|
+
|
||||||
|
if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */
|
||||||
|
if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0)
|
||||||
|
return 1;
|
||||||
|
@@ -341,7 +340,25 @@ bin_pcre_match(char *nam, char **args, O
|
||||||
|
|
||||||
|
if (offset_start > 0 && offset_start >= subject_len)
|
||||||
|
ret = PCRE2_ERROR_NOMATCH;
|
||||||
|
- else {
|
||||||
|
+ else if (use_dfa) {
|
||||||
|
+ PCRE2_SIZE old, wscount = 128, capcount = 128;
|
||||||
|
+ void *workspace = zhalloc(sizeof(int) * wscount);
|
||||||
|
+ pcre_mdata = pcre2_match_data_create(capcount, NULL);
|
||||||
|
+ do {
|
||||||
|
+ ret = pcre2_dfa_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
|
||||||
|
+ offset_start, 0, pcre_mdata, NULL, (int *) workspace, wscount);
|
||||||
|
+ if (ret == PCRE2_ERROR_DFA_WSSIZE) {
|
||||||
|
+ old = wscount;
|
||||||
|
+ wscount += wscount / 2;
|
||||||
|
+ workspace = hrealloc(workspace, sizeof(int) * old, sizeof(int) * wscount);
|
||||||
|
+ } else if (ret == 0) {
|
||||||
|
+ capcount += capcount / 2;
|
||||||
|
+ pcre2_match_data_free(pcre_mdata);
|
||||||
|
+ pcre_mdata = pcre2_match_data_create(capcount, NULL);
|
||||||
|
+ } else
|
||||||
|
+ break;
|
||||||
|
+ } while(1);
|
||||||
|
+ } else {
|
||||||
|
pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL);
|
||||||
|
ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
|
||||||
|
offset_start, 0, pcre_mdata, NULL);
|
||||||
|
@@ -350,12 +367,14 @@ bin_pcre_match(char *nam, char **args, O
|
||||||
|
if (ret==0) return_value = 0;
|
||||||
|
else if (ret == PCRE2_ERROR_NOMATCH) /* no match */;
|
||||||
|
else if (ret>0) {
|
||||||
|
- zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, matched_portion,
|
||||||
|
- receptacle, named, want_offset_pair, 0, 0);
|
||||||
|
+ zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret,
|
||||||
|
+ matched_portion, receptacle, named, want_offset_pair, use_dfa, 0);
|
||||||
|
return_value = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
- zwarnnam(nam, "error in pcre2_match [%d]", ret);
|
||||||
|
+ PCRE2_UCHAR buffer[256];
|
||||||
|
+ pcre2_get_error_message(ret, buffer, sizeof(buffer));
|
||||||
|
+ zwarnnam(nam, "error in pcre matching for /%s/: %s", plaintext, buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pcre_mdata)
|
||||||
|
@@ -466,7 +485,7 @@ static struct conddef cotab[] = {
|
||||||
|
|
||||||
|
static struct builtin bintab[] = {
|
||||||
|
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL),
|
||||||
|
- BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:b", NULL),
|
||||||
|
+ BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:bd", NULL),
|
||||||
|
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL)
|
||||||
|
};
|
||||||
|
|
||||||
|
--- a/Test/V07pcre.ztst
|
||||||
|
+++ b/Test/V07pcre.ztst
|
||||||
|
@@ -208,3 +208,8 @@
|
||||||
|
> [package]=name-12345
|
||||||
|
> [version]=12345
|
||||||
|
>)
|
||||||
|
+
|
||||||
|
+ pcre_compile 'cat(er(pillar)?)?'
|
||||||
|
+ pcre_match -d 'the caterpillar catchment' && print $match
|
||||||
|
+0:pcre_match -d
|
||||||
|
+>caterpillar cater cat
|
|
@ -0,0 +1,111 @@
|
||||||
|
From 10bdbd8b5b0b43445aff23dcd412f25cf6aa328a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
|
||||||
|
Date: Tue, 20 Jun 2023 18:14:27 +0900
|
||||||
|
Subject: [PATCH] 51877: do not build pcre module if pcre2-config is not found
|
||||||
|
|
||||||
|
---
|
||||||
|
ChangeLog | 5 +++++
|
||||||
|
Src/Modules/pcre.mdd | 2 +-
|
||||||
|
configure.ac | 31 +++++++++++++++++++------------
|
||||||
|
3 files changed, 25 insertions(+), 13 deletions(-)
|
||||||
|
|
||||||
|
# diff --git a/ChangeLog b/ChangeLog
|
||||||
|
# index 14349dcf2..e89ffee1b 100644
|
||||||
|
# --- a/ChangeLog
|
||||||
|
# +++ b/ChangeLog
|
||||||
|
# @@ -1,3 +1,8 @@
|
||||||
|
# +2023-06-20 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
|
||||||
|
# +
|
||||||
|
# + * 51877: Src/Modules/pcre.mdd, configure.ac: do not build pcre
|
||||||
|
# + module if pcre2-config is not available.
|
||||||
|
# +
|
||||||
|
# 2023-06-19 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
|
||||||
|
|
||||||
|
# * 51862: Doc/Makefile.in, configure.ac: support texinfo-7.0
|
||||||
|
--- a/Src/Modules/pcre.mdd
|
||||||
|
+++ b/Src/Modules/pcre.mdd
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
name=zsh/pcre
|
||||||
|
-link=`if test x$enable_pcre = xyes && (pcre-config --version >/dev/null 2>/dev/null); then echo dynamic; else echo no; fi`
|
||||||
|
+link=`if test x$enable_pcre = xyes; then echo dynamic; else echo no; fi`
|
||||||
|
load=no
|
||||||
|
|
||||||
|
autofeatures="b:pcre_compile b:pcre_study b:pcre_match"
|
||||||
|
--- a/configure.ac
|
||||||
|
+++ b/configure.ac
|
||||||
|
@@ -440,6 +440,17 @@ dnl Do you want to look for pcre support
|
||||||
|
AC_ARG_ENABLE(pcre,
|
||||||
|
AS_HELP_STRING([--enable-pcre],[enable the search for the pcre2 library (may create run-time library dependencies)]))
|
||||||
|
|
||||||
|
+AC_ARG_VAR(PCRE_CONFIG, [pathname of pcre2-config if it is not in PATH])
|
||||||
|
+if test "x$enable_pcre" = xyes; then
|
||||||
|
+ AC_CHECK_PROG([PCRE_CONFIG], pcre2-config, pcre2-config)
|
||||||
|
+ if test "x$PCRE_CONFIG" = x; then
|
||||||
|
+ enable_pcre=no
|
||||||
|
+ AC_MSG_WARN([pcre2-config not found: pcre module is disabled.])
|
||||||
|
+ AC_MSG_NOTICE(
|
||||||
|
+ [Set PCRE_CONFIG to pathname of pcre2-config if it is not in PATH.])
|
||||||
|
+ fi
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
dnl Do you want to look for capability support?
|
||||||
|
AC_ARG_ENABLE(cap,
|
||||||
|
AS_HELP_STRING([--enable-cap],[enable the search for POSIX capabilities (may require additional headers to be added by hand)]))
|
||||||
|
@@ -660,15 +671,12 @@ AC_HEADER_DIRENT
|
||||||
|
AC_HEADER_STAT
|
||||||
|
AC_HEADER_SYS_WAIT
|
||||||
|
|
||||||
|
-oldcflags="$CFLAGS"
|
||||||
|
-if test x$enable_pcre = xyes; then
|
||||||
|
-AC_CHECK_PROG([PCRECONF], pcre2-config, pcre2-config)
|
||||||
|
dnl pcre2-config --cflags may produce a -I output which needs to go into
|
||||||
|
dnl CPPFLAGS else configure's preprocessor tests don't pick it up,
|
||||||
|
dnl producing a warning.
|
||||||
|
-if test "x$ac_cv_prog_PCRECONF" = xpcre2-config; then
|
||||||
|
- CPPFLAGS="$CPPFLAGS `pcre2-config --cflags`"
|
||||||
|
-fi
|
||||||
|
+if test "x$enable_pcre" = xyes; then
|
||||||
|
+ CPPFLAGS="`$PCRE_CONFIG --cflags` $CPPFLAGS"
|
||||||
|
+ AC_CHECK_HEADERS([pcre2.h],,,[#define PCRE2_CODE_UNIT_WIDTH 8])
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \
|
||||||
|
@@ -680,7 +688,6 @@ AC_CHECK_HEADERS(sys/time.h sys/times.h
|
||||||
|
netinet/in_systm.h langinfo.h wchar.h stddef.h \
|
||||||
|
sys/stropts.h iconv.h ncurses.h ncursesw/ncurses.h \
|
||||||
|
ncurses/ncurses.h)
|
||||||
|
-AC_CHECK_HEADERS([pcre2.h],,,[#define PCRE2_CODE_UNIT_WIDTH 8])
|
||||||
|
if test x$dynamic = xyes; then
|
||||||
|
AC_CHECK_HEADERS(dlfcn.h)
|
||||||
|
AC_CHECK_HEADERS(dl.h)
|
||||||
|
@@ -957,10 +964,6 @@ if test "x$ac_found_iconv" = "xyes"; the
|
||||||
|
[Define as const if the declaration of iconv() needs const.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
-if test x$enable_pcre = xyes; then
|
||||||
|
- LIBS="`$ac_cv_prog_PCRECONF --libs8` $LIBS"
|
||||||
|
-fi
|
||||||
|
-
|
||||||
|
dnl ---------------------
|
||||||
|
dnl CHECK TERMCAP LIBRARY
|
||||||
|
dnl ---------------------
|
||||||
|
@@ -1321,7 +1324,6 @@ AC_CHECK_FUNCS(strftime strptime mktime
|
||||||
|
pathconf sysconf \
|
||||||
|
tgetent tigetflag tigetnum tigetstr setupterm initscr resize_term \
|
||||||
|
getcchar setcchar waddwstr wget_wch win_wch use_default_colors \
|
||||||
|
- pcre2_compile_8 \
|
||||||
|
nl_langinfo \
|
||||||
|
erand48 open_memstream \
|
||||||
|
posix_openpt \
|
||||||
|
@@ -1376,6 +1378,11 @@ if test x$zsh_cv_func_realpath_accepts_n
|
||||||
|
AC_DEFINE(REALPATH_ACCEPTS_NULL)
|
||||||
|
fi
|
||||||
|
|
||||||
|
+if test x$enable_pcre = xyes; then
|
||||||
|
+ LIBS="`$PCRE_CONFIG --libs8` $LIBS"
|
||||||
|
+ AC_CHECK_FUNCS(pcre2_compile_8)
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
if test x$enable_cap = xyes; then
|
||||||
|
AC_CHECK_FUNCS(cap_get_proc)
|
||||||
|
fi
|
Loading…
Reference in a new issue