Add pending patch converting the package to PCRE2. Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
148 lines
5.2 KiB
Diff
148 lines
5.2 KiB
Diff
From 1cbe8c4dd653336c5766dfd75eb379ad37f04249 Mon Sep 17 00:00:00 2001
|
|
From: Christian Marangi <ansuelsmth@gmail.com>
|
|
Date: Thu, 28 Sep 2023 20:59:26 +0200
|
|
Subject: [PATCH] unix-ffi: re: convert to PCRE2
|
|
|
|
PCRE is marked as EOL and won't receive any new security update.
|
|
|
|
Convert the re module to PCRE2 API to enforce security.
|
|
Additional dependency is now needed with uctypes due to changes in how
|
|
PCRE2 return the match_data in a pointer and require special handling.
|
|
|
|
The converted module is tested with the test_re.py with no regression.
|
|
|
|
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
|
|
---
|
|
unix-ffi/re/re.py | 73 +++++++++++++++++++++++++++++++----------------
|
|
1 file changed, 48 insertions(+), 25 deletions(-)
|
|
|
|
--- a/unix-ffi/re/re.py
|
|
+++ b/unix-ffi/re/re.py
|
|
@@ -1,36 +1,55 @@
|
|
import sys
|
|
import ffilib
|
|
import array
|
|
+import uctypes
|
|
|
|
+pcre2 = ffilib.open("libpcre2-8")
|
|
|
|
-pcre = ffilib.open("libpcre")
|
|
-
|
|
-# pcre *pcre_compile(const char *pattern, int options,
|
|
-# const char **errptr, int *erroffset,
|
|
-# const unsigned char *tableptr);
|
|
-pcre_compile = pcre.func("p", "pcre_compile", "sipps")
|
|
-
|
|
-# int pcre_exec(const pcre *code, const pcre_extra *extra,
|
|
-# const char *subject, int length, int startoffset,
|
|
-# int options, int *ovector, int ovecsize);
|
|
-pcre_exec = pcre.func("i", "pcre_exec", "PPsiiipi")
|
|
-
|
|
-# int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
|
|
-# int what, void *where);
|
|
-pcre_fullinfo = pcre.func("i", "pcre_fullinfo", "PPip")
|
|
-
|
|
-
|
|
-IGNORECASE = I = 1
|
|
-MULTILINE = M = 2
|
|
-DOTALL = S = 4
|
|
-VERBOSE = X = 8
|
|
-PCRE_ANCHORED = 0x10
|
|
+# pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
|
|
+# uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
|
|
+# pcre2_compile_context *ccontext);
|
|
+pcre2_compile = pcre2.func("p", "pcre2_compile_8", "siippp")
|
|
+
|
|
+# int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
|
|
+# PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options,
|
|
+# pcre2_match_data *match_data, pcre2_match_context *mcontext);
|
|
+pcre2_match = pcre2.func("i", "pcre2_match_8", "Psiiipp")
|
|
+
|
|
+# int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
|
|
+# void *where);
|
|
+pcre2_pattern_info = pcre2.func("i", "pcre2_pattern_info_8", "Pip")
|
|
+
|
|
+# PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
|
|
+pcre2_get_ovector_pointer = pcre2.func("p", "pcre2_get_ovector_pointer_8", "p")
|
|
+
|
|
+# pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
|
+# pcre2_general_context *gcontext);
|
|
+pcre2_match_data_create_from_pattern = pcre2.func(
|
|
+ "p", "pcre2_match_data_create_from_pattern_8", "Pp"
|
|
+)
|
|
+
|
|
+# PCRE2_SIZE that is of type size_t.
|
|
+# Use ULONG as type to support both 32bit and 64bit.
|
|
+PCRE2_SIZE_SIZE = uctypes.sizeof({"field": 0 | uctypes.ULONG})
|
|
+PCRE2_SIZE_TYPE = "L"
|
|
+
|
|
+# Real value in pcre2.h is 0xFFFFFFFF for 32bit and
|
|
+# 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent
|
|
+# to -1
|
|
+PCRE2_ZERO_TERMINATED = -1
|
|
+
|
|
+
|
|
+IGNORECASE = I = 0x8
|
|
+MULTILINE = M = 0x400
|
|
+DOTALL = S = 0x20
|
|
+VERBOSE = X = 0x80
|
|
+PCRE2_ANCHORED = 0x80000000
|
|
|
|
# TODO. Note that Python3 has unicode by default
|
|
ASCII = A = 0
|
|
UNICODE = U = 0
|
|
|
|
-PCRE_INFO_CAPTURECOUNT = 2
|
|
+PCRE2_INFO_CAPTURECOUNT = 0x4
|
|
|
|
|
|
class PCREMatch:
|
|
@@ -67,19 +86,23 @@ class PCREPattern:
|
|
def search(self, s, pos=0, endpos=-1, _flags=0):
|
|
assert endpos == -1, "pos: %d, endpos: %d" % (pos, endpos)
|
|
buf = array.array("i", [0])
|
|
- pcre_fullinfo(self.obj, None, PCRE_INFO_CAPTURECOUNT, buf)
|
|
+ pcre2_pattern_info(self.obj, PCRE2_INFO_CAPTURECOUNT, buf)
|
|
cap_count = buf[0]
|
|
- ov = array.array("i", [0, 0, 0] * (cap_count + 1))
|
|
- num = pcre_exec(self.obj, None, s, len(s), pos, _flags, ov, len(ov))
|
|
+ match_data = pcre2_match_data_create_from_pattern(self.obj, None)
|
|
+ num = pcre2_match(self.obj, s, len(s), pos, _flags, match_data, None)
|
|
if num == -1:
|
|
# No match
|
|
return None
|
|
+ ov_ptr = pcre2_get_ovector_pointer(match_data)
|
|
+ # pcre2_get_ovector_pointer return PCRE2_SIZE
|
|
+ ov_buf = uctypes.bytearray_at(ov_ptr, PCRE2_SIZE_SIZE * (cap_count + 1) * 2)
|
|
+ ov = array.array(PCRE2_SIZE_TYPE, ov_buf)
|
|
# We don't care how many matching subexpressions we got, we
|
|
# care only about total # of capturing ones (including empty)
|
|
return PCREMatch(s, cap_count + 1, ov)
|
|
|
|
def match(self, s, pos=0, endpos=-1):
|
|
- return self.search(s, pos, endpos, PCRE_ANCHORED)
|
|
+ return self.search(s, pos, endpos, PCRE2_ANCHORED)
|
|
|
|
def sub(self, repl, s, count=0):
|
|
if not callable(repl):
|
|
@@ -141,9 +164,9 @@ class PCREPattern:
|
|
|
|
|
|
def compile(pattern, flags=0):
|
|
- errptr = bytes(4)
|
|
+ errcode = bytes(4)
|
|
erroffset = bytes(4)
|
|
- regex = pcre_compile(pattern, flags, errptr, erroffset, None)
|
|
+ regex = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, flags, errcode, erroffset, None)
|
|
assert regex
|
|
return PCREPattern(regex)
|
|
|
|
@@ -154,7 +177,7 @@ def search(pattern, string, flags=0):
|
|
|
|
|
|
def match(pattern, string, flags=0):
|
|
- r = compile(pattern, flags | PCRE_ANCHORED)
|
|
+ r = compile(pattern, flags | PCRE2_ANCHORED)
|
|
return r.search(string)
|
|
|
|
|