Merge pull request #1596 from katzyn/misc

Improve SQL Standard compliance in LOB precision parsing

Merge pull request #1596 from katzyn/misc
Improve SQL Standard compliance in LOB precision parsing
88f32032 · Evgenij Ryazanov · GitHub · d6cebc3f · ba40896f · 88f32032
--- a/h2/src/docsrc/help/help.csv
+++ b/h2/src/docsrc/help/help.csv
@@ -3118,7 +3118,7 @@ CHAR(10)
 "Data Types","BLOB Type","
 { BLOB | BINARY LARGE OBJECT
    | TINYBLOB | MEDIUMBLOB | LONGBLOB | IMAGE | OID }
-[ ( precisionInt ) ]
+[ ( precisionInt [K|M|G|T|P]) ]
 ","
 Like BINARY, but intended for very large values such as files or images. Unlike
 when using BINARY, large objects are not kept fully in-memory. Use
@@ -3128,12 +3128,13 @@ Advanced / Large Objects.
 Mapped to ""java.sql.Blob"" (""java.io.InputStream"" is also supported).
 ","
 BLOB
+BLOB(10K)
 "
 "Data Types","CLOB Type","
 { CLOB | CHARACTER LARGE OBJECT
    | TINYTEXT | TEXT | MEDIUMTEXT | LONGTEXT | NTEXT | NCLOB }
-[ ( precisionInt ) ]
+[ ( precisionInt [K|M|G|T|P] [CHARACTERS|OCTETS]) ]
 ","
 CLOB is like VARCHAR, but intended for very large values. Unlike when using
 VARCHAR, large CLOB objects are not kept fully in-memory; instead, they are streamed.
@@ -3145,9 +3146,12 @@ VARCHAR should be used for text with relatively short average size (for example
 shorter than 200 characters). Short CLOB values are stored inline, but there is
 an overhead compared to VARCHAR.
+Precision, if any, should be specified in characters, CHARACTERS and OCTETS units have no effect in H2.
 Mapped to ""java.sql.Clob"" (""java.io.Reader"" is also supported).
 ","
 CLOB
+CLOB(10K)
 "
 "Data Types","UUID Type","

--- a/h2/src/main/org/h2/bnf/context/DbContextRule.java
+++ b/h2/src/main/org/h2/bnf/context/DbContextRule.java
@@ -154,7 +154,7 @@ public class DbContextRule implements Rule {
                break;
            }
            String alias = up.substring(0, i);
-            if (ParserUtil.isKeyword(alias)) {
+            if (ParserUtil.isKeyword(alias, false)) {
                break;
            }
            s = s.substring(alias.length());
@@ -301,7 +301,7 @@ public class DbContextRule implements Rule {
            return s;
        }
        String alias = up.substring(0, i);
-        if ("SET".equals(alias) || ParserUtil.isKeyword(alias)) {
+        if ("SET".equals(alias) || ParserUtil.isKeyword(alias, false)) {
            return s;
        }
        if (newAlias) {

--- a/h2/src/main/org/h2/command/Parser.java
+++ b/h2/src/main/org/h2/command/Parser.java
@@ -4146,6 +4146,14 @@ public class Parser {
        return i;
    }
+    private long readNonNegativeLong() {
+        long v = readLong();
+        if (v < 0) {
+            throw DbException.getInvalidValueException("non-negative long", v);
+        }
+        return v;
+    }
    private long readLong() {
        boolean minus = false;
        if (currentTokenType == MINUS_SIGN) {
@@ -4917,11 +4925,7 @@ public class Parser {
    }
    private boolean isKeyword(String s) {
-        if (!identifiersToUpper) {
+        return ParserUtil.isKeyword(s, !identifiersToUpper);
-            // if not yet converted to uppercase, do it now
-            s = StringUtils.toUpperEnglish(s);
-        }
-        return ParserUtil.isKeyword(s);
    }
    private Column parseColumnForTable(String columnName,
@@ -5302,22 +5306,8 @@ public class Parser {
                }
            } else if (readIf(OPEN_PAREN)) {
                if (!readIf("MAX")) {
-                    long p = readLong();
+                    long p = readPrecision();
-                    if (readIf("K")) {
-                        p *= 1024;
-                    } else if (readIf("M")) {
-                        p *= 1024 * 1024;
-                    } else if (readIf("G")) {
-                        p *= 1024 * 1024 * 1024;
-                    }
-                    if (p > Long.MAX_VALUE) {
-                        p = Long.MAX_VALUE;
-                    }
                    original += "(" + p;
-                    // Oracle syntax
-                    if (!readIf("CHAR")) {
-                        readIf("BYTE");
-                    }
                    if (dataType.supportsScale) {
                        if (readIf(COMMA)) {
                            scale = readInt();
@@ -5435,6 +5425,48 @@ public class Parser {
        return column;
    }
+    private long readPrecision() {
+        long p = readNonNegativeLong();
+        if (currentTokenType == IDENTIFIER && !currentTokenQuoted && currentToken.length() == 1) {
+            long mul;
+            char ch = currentToken.charAt(0);
+            switch (identifiersToUpper ? ch : Character.toUpperCase(ch)) {
+            case 'K':
+                mul = 1L << 10;
+                break;
+            case 'M':
+                mul = 1L << 20;
+                break;
+            case 'G':
+                mul = 1L << 30;
+                break;
+            case 'T':
+                mul = 1L << 40;
+                break;
+            case 'P':
+                mul = 1L << 50;
+                break;
+            default:
+                throw getSyntaxError();
+            }
+            if (p > Long.MAX_VALUE / mul) {
+                throw DbException.getInvalidValueException("precision", p + currentToken);
+            }
+            p *= mul;
+            read();
+        }
+        if (currentTokenType == IDENTIFIER && !currentTokenQuoted) {
+            // Standard char length units
+            if (!readIf("CHARACTERS") && !readIf("OCTETS") &&
+                    // Oracle syntax
+                    !readIf("CHAR")) {
+                // Oracle syntax
+                readIf("BYTE");
+            }
+        }
+        return p;
+    }
    private Prepared parseCreate() {
        boolean orReplace = false;
        if (readIf("OR")) {

--- a/h2/src/main/org/h2/util/ParserUtil.java
+++ b/h2/src/main/org/h2/util/ParserUtil.java
@@ -235,14 +235,16 @@ public class ParserUtil {
     * Checks if this string is a SQL keyword.
     *
     * @param s the token to check
+     * @param ignoreCase true if case should be ignored, false if only upper case
+     *            tokens are detected as keywords
     * @return true if it is a keyword
     */
-    public static boolean isKeyword(String s) {
+    public static boolean isKeyword(String s, boolean ignoreCase) {
        int length = s.length();
        if (length == 0) {
            return false;
        }
-        return getSaveTokenType(s, false, 0, length, false) != IDENTIFIER;
+        return getSaveTokenType(s, ignoreCase, 0, length, false) != IDENTIFIER;
    }
    /**
@@ -278,7 +280,7 @@ public class ParserUtil {
     * @param ignoreCase true if case should be ignored, false if only upper case
     *            tokens are detected as keywords
     * @param start start index of token
-     * @param end index of token
+     * @param end index of token, exclusive; must be greater than start index
     * @param additionalKeywords whether TOP, INTERSECTS, and "current data /
     *                           time" functions are keywords
     * @return the token type

--- a/h2/src/test/org/h2/test/scripts/datatypes/blob.sql
+++ b/h2/src/test/org/h2/test/scripts/datatypes/blob.sql
@@ -21,3 +21,33 @@ SELECT COLUMN_NAME, DATA_TYPE, TYPE_NAME, COLUMN_TYPE FROM INFORMATION_SCHEMA.CO
 DROP TABLE TEST;
 > ok
+CREATE TABLE TEST(B0 BLOB(10), B1 BLOB(10K), B2 BLOB(10M), B3 BLOB(10G), B4 BLOB(10T), B5 BLOB(10P));
+> ok
+SELECT COLUMN_NAME, COLUMN_TYPE FROM INFORMATION_SCHEMA.COLUMNS
+    WHERE TABLE_NAME = 'TEST' ORDER BY ORDINAL_POSITION;
+> COLUMN_NAME COLUMN_TYPE
+> ----------- -----------------------
+> B0          BLOB(10)
+> B1          BLOB(10240)
+> B2          BLOB(10485760)
+> B3          BLOB(10737418240)
+> B4          BLOB(10995116277760)
+> B5          BLOB(11258999068426240)
+> rows (ordered): 6
+INSERT INTO TEST(B0) VALUES ('0102030405060708091011');
+> exception VALUE_TOO_LONG_2
+INSERT INTO TEST(B0) VALUES ('01020304050607080910');
+> update count: 1
+SELECT B0 FROM TEST;
+>> 01020304050607080910
+DROP TABLE TEST;
+> ok
+CREATE TABLE TEST(B BLOB(8192P));
+> exception INVALID_VALUE_2
--- a/h2/src/test/org/h2/test/scripts/datatypes/clob.sql
+++ b/h2/src/test/org/h2/test/scripts/datatypes/clob.sql
@@ -23,3 +23,33 @@ SELECT COLUMN_NAME, DATA_TYPE, TYPE_NAME, COLUMN_TYPE FROM INFORMATION_SCHEMA.CO
 DROP TABLE TEST;
 > ok
+CREATE TABLE TEST(C0 CLOB(10), C1 CLOB(10K), C2 CLOB(10M CHARACTERS), C3 CLOB(10G OCTETS), C4 CLOB(10T), C5 CLOB(10P));
+> ok
+SELECT COLUMN_NAME, COLUMN_TYPE FROM INFORMATION_SCHEMA.COLUMNS
+    WHERE TABLE_NAME = 'TEST' ORDER BY ORDINAL_POSITION;
+> COLUMN_NAME COLUMN_TYPE
+> ----------- -----------------------
+> C0          CLOB(10)
+> C1          CLOB(10240)
+> C2          CLOB(10485760)
+> C3          CLOB(10737418240)
+> C4          CLOB(10995116277760)
+> C5          CLOB(11258999068426240)
+> rows (ordered): 6
+INSERT INTO TEST(C0) VALUES ('12345678901');
+> exception VALUE_TOO_LONG_2
+INSERT INTO TEST(C0) VALUES ('1234567890');
+> update count: 1
+SELECT C0 FROM TEST;
+>> 1234567890
+DROP TABLE TEST;
+> ok
+CREATE TABLE TEST(C CLOB(8192P));
+> exception INVALID_VALUE_2
--- a/h2/src/test/org/h2/test/scripts/datatypes/decimal.sql
+++ b/h2/src/test/org/h2/test/scripts/datatypes/decimal.sql
@@ -2,3 +2,9 @@
 -- and the EPL 1.0 (http://h2database.com/html/license.html).
 -- Initial Developer: H2 Group
 --
+CREATE TABLE TEST(I NUMERIC(-1));
+> exception INVALID_VALUE_2
+CREATE TABLE TEST(I NUMERIC(-1, -1));
+> exception INVALID_VALUE_2
--- a/h2/src/tools/org/h2/build/doc/dictionary.txt
+++ b/h2/src/tools/org/h2/build/doc/dictionary.txt
@@ -804,4 +804,4 @@ qualification opportunity jumping exploited unacceptable vrs duplicated
 queryparser tokenized freeze factorings recompilation unenclosed rfe dsync
 econd irst bcef ordinality nord unnest
 analyst occupation distributive josaph aor engineer sajeewa isuru randil kevin doctor businessman artist ashan
-corrupts splitted disruption unintentional
+corrupts splitted disruption unintentional octets