提交 29c9cc75 authored 作者: Noel Grandin's avatar Noel Grandin 提交者: GitHub

Merge pull request #323 from Akkuzin/regexp_match_enhance

Regular expression functions (REGEXP_REPLACE, REGEXP_LIKE) enhancement
...@@ -3255,15 +3255,53 @@ TRIM(BOTH '_' FROM NAME) ...@@ -3255,15 +3255,53 @@ TRIM(BOTH '_' FROM NAME)
" "
"Functions (String)","REGEXP_REPLACE"," "Functions (String)","REGEXP_REPLACE","
REGEXP_REPLACE(inputString, regexString, replacementString) REGEXP_REPLACE(inputString, regexString, replacementString [, flagsString])
"," ","
Replaces each substring that matches a regular expression. Replaces each substring that matches a regular expression.
For details, see the Java ""String.replaceAll()"" method. For details, see the Java ""String.replaceAll()"" method.
If any parameter is null, the result is null. If any parameter is null (except optional flagsString parameter), the result is null.
Flags values limited to 'i', 'c', 'n', 'm'. Other symbols causes exception.
Multiple symbols could be uses in one flagsString parameter (like 'im').
Later flags overrides first ones, for example 'ic' equivalent to case sensitive matching 'c'.
'i' enables case insensitive matching (Pattern.CASE_INSENSITIVE)
'c' disables case insensitive matching (Pattern.CASE_INSENSITIVE)
'n' allows the period to match the newline character (Pattern.DOTALL)
'm' enables multiline mode (Pattern.MULTILINE)
"," ","
REGEXP_REPLACE('Hello World', ' +', ' ') REGEXP_REPLACE('Hello World', ' +', ' ')
REGEXP_REPLACE('Hello WWWWorld', 'w+', 'W', 'i')
"
"Functions (String)","REGEXP_LIKE","
REGEXP_LIKE(inputString, regexString [, flagsString])
","
Matches string to a regular expression.
For details, see the Java ""Matcher.find()"" method.
If any parameter is null (except optional flagsString parameter), the result is null.
Flags values limited to 'i', 'c', 'n', 'm'. Other symbols causes exception.
Multiple symbols could be uses in one flagsString parameter (like 'im').
Later flags overrides first ones, for example 'ic' equivalent to case sensitive matching 'c'.
'i' enables case insensitive matching (Pattern.CASE_INSENSITIVE)
'c' disables case insensitive matching (Pattern.CASE_INSENSITIVE)
'n' allows the period to match the newline character (Pattern.DOTALL)
'm' enables multiline mode (Pattern.MULTILINE)
","
REGEXP_LIKE('Hello World', '[A-Z ]*', 'i')
" "
"Functions (String)","REPEAT"," "Functions (String)","REPEAT","
REPEAT(string, int) REPEAT(string, int)
"," ","
......
...@@ -20,6 +20,7 @@ import java.util.Calendar; ...@@ -20,6 +20,7 @@ import java.util.Calendar;
import java.util.HashMap; import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import java.util.TimeZone; import java.util.TimeZone;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
import org.h2.api.ErrorCode; import org.h2.api.ErrorCode;
import org.h2.command.Command; import org.h2.command.Command;
...@@ -120,6 +121,8 @@ public class Function extends Expression implements FunctionCall { ...@@ -120,6 +121,8 @@ public class Function extends Expression implements FunctionCall {
FILE_READ = 225, TRANSACTION_ID = 226, TRUNCATE_VALUE = 227, FILE_READ = 225, TRANSACTION_ID = 226, TRUNCATE_VALUE = 227,
NVL2 = 228, DECODE = 229, ARRAY_CONTAINS = 230, FILE_WRITE = 232; NVL2 = 228, DECODE = 229, ARRAY_CONTAINS = 230, FILE_WRITE = 232;
public static final int REGEXP_LIKE = 240;
/** /**
* Used in MySQL-style INSERT ... ON DUPLICATE KEY UPDATE ... VALUES * Used in MySQL-style INSERT ... ON DUPLICATE KEY UPDATE ... VALUES
*/ */
...@@ -295,12 +298,13 @@ public class Function extends Expression implements FunctionCall { ...@@ -295,12 +298,13 @@ public class Function extends Expression implements FunctionCall {
addFunction("XMLCDATA", XMLCDATA, 1, Value.STRING); addFunction("XMLCDATA", XMLCDATA, 1, Value.STRING);
addFunction("XMLSTARTDOC", XMLSTARTDOC, 0, Value.STRING); addFunction("XMLSTARTDOC", XMLSTARTDOC, 0, Value.STRING);
addFunction("XMLTEXT", XMLTEXT, VAR_ARGS, Value.STRING); addFunction("XMLTEXT", XMLTEXT, VAR_ARGS, Value.STRING);
addFunction("REGEXP_REPLACE", REGEXP_REPLACE, 3, Value.STRING); addFunction("REGEXP_REPLACE", REGEXP_REPLACE, VAR_ARGS, Value.STRING);
addFunction("RPAD", RPAD, VAR_ARGS, Value.STRING); addFunction("RPAD", RPAD, VAR_ARGS, Value.STRING);
addFunction("LPAD", LPAD, VAR_ARGS, Value.STRING); addFunction("LPAD", LPAD, VAR_ARGS, Value.STRING);
addFunction("TO_CHAR", TO_CHAR, VAR_ARGS, Value.STRING); addFunction("TO_CHAR", TO_CHAR, VAR_ARGS, Value.STRING);
addFunction("ORA_HASH", ORA_HASH, VAR_ARGS, Value.INT); addFunction("ORA_HASH", ORA_HASH, VAR_ARGS, Value.INT);
addFunction("TRANSLATE", TRANSLATE, 3, Value.STRING); addFunction("TRANSLATE", TRANSLATE, 3, Value.STRING);
addFunction("REGEXP_LIKE", REGEXP_LIKE, VAR_ARGS, Value.BOOLEAN);
// date // date
addFunctionNotDeterministic("CURRENT_DATE", CURRENT_DATE, addFunctionNotDeterministic("CURRENT_DATE", CURRENT_DATE,
...@@ -1381,9 +1385,13 @@ public class Function extends Expression implements FunctionCall { ...@@ -1381,9 +1385,13 @@ public class Function extends Expression implements FunctionCall {
case REGEXP_REPLACE: { case REGEXP_REPLACE: {
String regexp = v1.getString(); String regexp = v1.getString();
String replacement = v2.getString(); String replacement = v2.getString();
String regexpMode = v3 == null || v3.getString() == null ? "" :
v2.getString();
int flags = makeRegexpFlags(regexpMode);
try { try {
result = ValueString.get( result = ValueString.get(
v0.getString().replaceAll(regexp, replacement), Pattern.compile(regexp, flags).matcher(v0.getString())
.replaceAll(replacement),
database.getMode().treatEmptyStringsAsNull); database.getMode().treatEmptyStringsAsNull);
} catch (StringIndexOutOfBoundsException e) { } catch (StringIndexOutOfBoundsException e) {
throw DbException.get( throw DbException.get(
...@@ -1655,6 +1663,19 @@ public class Function extends Expression implements FunctionCall { ...@@ -1655,6 +1663,19 @@ public class Function extends Expression implements FunctionCall {
database.getMode().treatEmptyStringsAsNull); database.getMode().treatEmptyStringsAsNull);
} }
break; break;
case REGEXP_LIKE: {
String regexp = v1.getString();
String regexpMode = v2 == null || v2.getString() == null ? "" :
v2.getString();
int flags = makeRegexpFlags(regexpMode);
try {
result = ValueBoolean.get(Pattern.compile(regexp, flags)
.matcher(v0.getString()).find());
} catch (PatternSyntaxException e) {
throw DbException.get(ErrorCode.LIKE_ESCAPE_ERROR_1, e, regexp);
}
break;
}
case VALUES: case VALUES:
result = session.getVariable(args[0].getSchemaName() + "." + result = session.getVariable(args[0].getSchemaName() + "." +
args[0].getTableName() + "." + args[0].getColumnName()); args[0].getTableName() + "." + args[0].getColumnName());
...@@ -2110,6 +2131,30 @@ public class Function extends Expression implements FunctionCall { ...@@ -2110,6 +2131,30 @@ public class Function extends Expression implements FunctionCall {
return hc; return hc;
} }
public int makeRegexpFlags(String stringFlags) {
int flags = Pattern.UNICODE_CASE;
if (stringFlags != null) {
for (int i = 0; i < stringFlags.length(); ++i) {
switch (stringFlags.charAt(i)) {
case 'i':
flags |= Pattern.CASE_INSENSITIVE;
break;
case 'c':
flags &= ~Pattern.CASE_INSENSITIVE;
break;
case 'n':
flags |= Pattern.DOTALL;
break;
case 'm':
flags |= Pattern.MULTILINE;
break;
default:
throw DbException.get(ErrorCode.INVALID_VALUE_2, stringFlags);
}
}
}
return flags;
}
@Override @Override
public int getType() { public int getType() {
...@@ -2199,6 +2244,14 @@ public class Function extends Expression implements FunctionCall { ...@@ -2199,6 +2244,14 @@ public class Function extends Expression implements FunctionCall {
case CASE: case CASE:
min = 3; min = 3;
break; break;
case REGEXP_REPLACE:
min = 3;
max = 4;
break;
case REGEXP_LIKE:
min = 2;
max = 3;
break;
default: default:
DbException.throwInternalError("type=" + info.type); DbException.throwInternalError("type=" + info.type);
} }
......
...@@ -1140,9 +1140,13 @@ TRIM ( [ { LEADING | TRAILING | BOTH } [ string ] FROM ] string ) ...@@ -1140,9 +1140,13 @@ TRIM ( [ { LEADING | TRAILING | BOTH } [ string ] FROM ] string )
"," ","
Removes all leading spaces, trailing spaces, or spaces at both ends, from a string." Removes all leading spaces, trailing spaces, or spaces at both ends, from a string."
"Functions (String)","REGEXP_REPLACE"," "Functions (String)","REGEXP_REPLACE","
REGEXP_REPLACE(inputString, regexString, replacementString) REGEXP_REPLACE(inputString, regexString, replacementString [, flagsString])
"," ","
Replaces each substring that matches a regular expression." Replaces each substring that matches a regular expression."
"Functions (String)","REGEXP_LIKE","
REGEXP_LIKE(inputString, regexString [, flagsString])
","
Matches string to a regular expression."
"Functions (String)","REPEAT"," "Functions (String)","REPEAT","
REPEAT(string, int) REPEAT(string, int)
"," ","
......
...@@ -167,6 +167,9 @@ drop table test; ...@@ -167,6 +167,9 @@ drop table test;
call regexp_replace('x', 'x', '\'); call regexp_replace('x', 'x', '\');
> exception > exception
call select 1 from dual where regexp_like('x', 'x', '\');
> exception
select * from dual where x = x + 1 or x in(2, 0); select * from dual where x = x + 1 or x in(2, 0);
> X > X
> - > -
...@@ -2112,6 +2115,17 @@ CALL REGEXP_REPLACE('abckaboooom', 'o+', 'o'); ...@@ -2112,6 +2115,17 @@ CALL REGEXP_REPLACE('abckaboooom', 'o+', 'o');
> abckabom > abckabom
> rows: 1 > rows: 1
select x from dual where REGEXP_LIKE('aBc', '[a-z]*', 'i');
> X
> -
> 1
> rows: 1
select x from dual where REGEXP_LIKE('aBc', '[a-z]*', 'c');
> X
> -
> rows: 0
SELECT 'Hello' ~ 'He.*' T1, 'HELLO' ~ 'He.*' F2, CAST('HELLO' AS VARCHAR_IGNORECASE) ~ 'He.*' T3; SELECT 'Hello' ~ 'He.*' T1, 'HELLO' ~ 'He.*' F2, CAST('HELLO' AS VARCHAR_IGNORECASE) ~ 'He.*' T3;
> T1 F2 T3 > T1 F2 T3
> ---- ----- ---- > ---- ----- ----
......
...@@ -720,4 +720,5 @@ xtime xts xvi xyz yacute year years yen yes yet yield yielding yjp ymd york you ...@@ -720,4 +720,5 @@ xtime xts xvi xyz yacute year years yen yes yet yield yielding yjp ymd york you
young younger youngest your yourself youtube ytd yuml yyfxyy yyyymmdd zeile zen young younger youngest your yourself youtube ytd yuml yyfxyy yyyymmdd zeile zen
zepfred zero zeroes zeros zeta zhang zip ziv zloty zone zones zurich zwj zwnj zepfred zero zeroes zeros zeta zhang zip ziv zloty zone zones zurich zwj zwnj
recompiled incl reveal designators templates invoked candidate handshake altered recompiled incl reveal designators templates invoked candidate handshake altered
accomplished permanent clarify weaken excl alternatively dita imjcc optimizes accomplished permanent clarify weaken excl alternatively dita imjcc optimizes
\ No newline at end of file dotall multiline
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论