Merge pull request #1515 from katzyn/aggregate

Use the same aggregate data for HISTOGRAM and MODE

Merge pull request #1515 from katzyn/aggregate
Use the same aggregate data for HISTOGRAM and MODE
fdaab385 · Evgenij Ryazanov · GitHub · 6b2f9008 · 03c2f221 · fdaab385
--- a/h2/src/main/org/h2/expression/aggregate/Aggregate.java
+++ b/h2/src/main/org/h2/expression/aggregate/Aggregate.java
@@ -9,10 +9,12 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.Map.Entry;
 import org.h2.api.ErrorCode;
 import org.h2.command.dml.Select;
 import org.h2.command.dml.SelectOrderBy;
 import org.h2.engine.Database;
+import org.h2.engine.Mode;
 import org.h2.engine.Session;
 import org.h2.expression.Expression;
 import org.h2.expression.ExpressionColumn;
@@ -29,6 +31,8 @@ import org.h2.table.Table;
 import org.h2.table.TableFilter;
 import org.h2.util.StatementBuilder;
 import org.h2.util.StringUtils;
+import org.h2.util.ValueHashMap;
+import org.h2.value.CompareMode;
 import org.h2.value.DataType;
 import org.h2.value.Value;
 import org.h2.value.ValueArray;
@@ -445,33 +449,10 @@ public class Aggregate extends AbstractAggregate {
                return d.getValue(db, dataType);
            }
            break;
-        case GROUP_CONCAT: {
+        case HISTOGRAM:
-            Value[] array = ((AggregateDataCollecting) data).getArray();
+            return getHistogram(session, data);
-            if (array == null) {
+        case GROUP_CONCAT:
-                return ValueNull.INSTANCE;
+            return getGroupConcat(session, data);
-            }
-            if (orderByList != null || distinct) {
-                sortWithOrderBy(array);
-            }
-            StatementBuilder buff = new StatementBuilder();
-            String sep = groupConcatSeparator == null ? "," : groupConcatSeparator.getValue(session).getString();
-            for (Value val : array) {
-                String s;
-                if (val.getType() == Value.ARRAY) {
-                    s = ((ValueArray) val).getList()[0].getString();
-                } else {
-                    s = val.getString();
-                }
-                if (s == null) {
-                    continue;
-                }
-                if (sep != null) {
-                    buff.appendExceptFirst(sep);
-                }
-                buff.append(s);
-            }
-            return ValueString.get(buff.toString());
-        }
        case ARRAY_AGG: {
            Value[] array = ((AggregateDataCollecting) data).getArray();
            if (array == null) {
@@ -495,17 +476,106 @@ public class Aggregate extends AbstractAggregate {
            return AggregateMedian.median(session.getDatabase(), array, dataType);
        }
        case MODE:
-            if (orderByList != null) {
+            return getMode(session, data);
-                return ((AggregateDataMode) data).getOrderedValue(session.getDatabase(), dataType,
-                        (orderByList.get(0).sortType & SortOrder.DESCENDING) != 0);
-            }
-            //$FALL-THROUGH$
        default:
            // Avoid compiler warning
        }
        return data.getValue(session.getDatabase(), dataType);
    }
+    private Value getGroupConcat(Session session, AggregateData data) {
+        Value[] array = ((AggregateDataCollecting) data).getArray();
+        if (array == null) {
+            return ValueNull.INSTANCE;
+        }
+        if (orderByList != null || distinct) {
+            sortWithOrderBy(array);
+        }
+        StatementBuilder buff = new StatementBuilder();
+        String sep = groupConcatSeparator == null ? "," : groupConcatSeparator.getValue(session).getString();
+        for (Value val : array) {
+            String s;
+            if (val.getType() == Value.ARRAY) {
+                s = ((ValueArray) val).getList()[0].getString();
+            } else {
+                s = val.getString();
+            }
+            if (s == null) {
+                continue;
+            }
+            if (sep != null) {
+                buff.appendExceptFirst(sep);
+            }
+            buff.append(s);
+        }
+        return ValueString.get(buff.toString());
+    }
+    private Value getHistogram(Session session, AggregateData data) {
+        ValueHashMap<LongDataCounter> distinctValues = ((AggregateDataDistinctWithCounts) data).getValues();
+        if (distinctValues == null) {
+            return ValueArray.getEmpty();
+        }
+        ValueArray[] values = new ValueArray[distinctValues.size()];
+        int i = 0;
+        for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
+            LongDataCounter d = entry.getValue();
+            values[i] = ValueArray.get(new Value[] { entry.getKey(), ValueLong.get(distinct ? 1L : d.count) });
+            i++;
+        }
+        Database db = session.getDatabase();
+        final Mode mode = db.getMode();
+        final CompareMode compareMode = db.getCompareMode();
+        Arrays.sort(values, new Comparator<ValueArray>() {
+            @Override
+            public int compare(ValueArray v1, ValueArray v2) {
+                Value a1 = v1.getList()[0];
+                Value a2 = v2.getList()[0];
+                return a1.compareTo(a2, mode, compareMode);
+            }
+        });
+        return ValueArray.get(values);
+    }
+    private Value getMode(Session session, AggregateData data) {
+        Value v = ValueNull.INSTANCE;
+        ValueHashMap<LongDataCounter> distinctValues = ((AggregateDataDistinctWithCounts) data).getValues();
+        if (distinctValues == null) {
+            return v;
+        }
+        long count = 0L;
+        if (orderByList != null) {
+            boolean desc = (orderByList.get(0).sortType & SortOrder.DESCENDING) != 0;
+            for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
+                long c = entry.getValue().count;
+                if (c > count) {
+                    v = entry.getKey();
+                    count = c;
+                } else if (c == count) {
+                    Value v2 = entry.getKey();
+                    int cmp = session.getDatabase().compareTypeSafe(v, v2);
+                    if (desc) {
+                        if (cmp >= 0) {
+                            continue;
+                        }
+                    } else if (cmp <= 0) {
+                        continue;
+                    }
+                    v = v2;
+                }
+            }
+        } else {
+            for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
+                long c = entry.getValue().count;
+                if (c > count) {
+                    v = entry.getKey();
+                    count = c;
+                }
+            }
+        }
+        return v.convertTo(dataType);
+    }
    @Override
    public int getType() {
        return dataType;

--- a/h2/src/main/org/h2/expression/aggregate/AggregateData.java
+++ b/h2/src/main/org/h2/expression/aggregate/AggregateData.java
@@ -5,6 +5,7 @@
 */
 package org.h2.expression.aggregate;
+import org.h2.engine.Constants;
 import org.h2.engine.Database;
 import org.h2.expression.aggregate.Aggregate.AggregateType;
 import org.h2.message.DbException;
@@ -55,9 +56,9 @@ abstract class AggregateData {
        case SELECTIVITY:
            return new AggregateDataSelectivity(distinct);
        case HISTOGRAM:
-            return new AggregateDataHistogram(distinct);
+            return new AggregateDataDistinctWithCounts(false, Constants.SELECTIVITY_DISTINCT_COUNT);
        case MODE:
-            return new AggregateDataMode();
+            return new AggregateDataDistinctWithCounts(true, Integer.MAX_VALUE);
        case ENVELOPE:
            return new AggregateDataEnvelope();
        default:

--- a/h2/src/main/org/h2/expression/aggregate/AggregateDataDistinctWithCounts.java
+++ b/h2/src/main/org/h2/expression/aggregate/AggregateDataDistinctWithCounts.java
+/*
+ * Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
+ * and the EPL 1.0 (http://h2database.com/html/license.html).
+ * Initial Developer: H2 Group
+ */
+package org.h2.expression.aggregate;
+import org.h2.engine.Database;
+import org.h2.util.ValueHashMap;
+import org.h2.value.Value;
+import org.h2.value.ValueNull;
+/**
+ * Data stored while calculating an aggregate that needs distinct values with
+ * their counts.
+ */
+class AggregateDataDistinctWithCounts extends AggregateData {
+    private final boolean ignoreNulls;
+    private final int maxDistinctCount;
+    private ValueHashMap<LongDataCounter> values;
+    /**
+     * Creates new instance of data for aggregate that needs distinct values
+     * with their counts.
+     *
+     * @param ignoreNulls
+     *            whether NULL values should be ignored
+     * @param maxDistinctCount
+     *            maximum count of distinct values to collect
+     */
+    AggregateDataDistinctWithCounts(boolean ignoreNulls, int maxDistinctCount) {
+        this.ignoreNulls = ignoreNulls;
+        this.maxDistinctCount = maxDistinctCount;
+    }
+    @Override
+    void add(Database database, int dataType, Value v) {
+        if (ignoreNulls && v == ValueNull.INSTANCE) {
+            return;
+        }
+        if (values == null) {
+            values = new ValueHashMap<>();
+        }
+        LongDataCounter a = values.get(v);
+        if (a == null) {
+            if (values.size() >= maxDistinctCount) {
+                return;
+            }
+            a = new LongDataCounter();
+            values.put(v, a);
+        }
+        a.count++;
+    }
+    @Override
+    Value getValue(Database database, int dataType) {
+        return null;
+    }
+    /**
+     * Returns map with values and their counts.
+     *
+     * @return map with values and their counts
+     */
+    ValueHashMap<LongDataCounter> getValues() {
+        return values;
+    }
+}
--- a/h2/src/main/org/h2/expression/aggregate/AggregateDataHistogram.java
+++ b/h2/src/main/org/h2/expression/aggregate/AggregateDataHistogram.java
-/*
- * Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
- * and the EPL 1.0 (http://h2database.com/html/license.html).
- * Initial Developer: H2 Group
- */
-package org.h2.expression.aggregate;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.Map.Entry;
-import org.h2.engine.Constants;
-import org.h2.engine.Database;
-import org.h2.engine.Mode;
-import org.h2.util.ValueHashMap;
-import org.h2.value.CompareMode;
-import org.h2.value.Value;
-import org.h2.value.ValueArray;
-import org.h2.value.ValueLong;
-/**
- * Data stored while calculating a HISTOGRAM aggregate.
- */
-class AggregateDataHistogram extends AggregateData {
-    private final boolean distinct;
-    private ValueHashMap<LongDataCounter> distinctValues;
-    /**
-     * Creates new instance of data for HISTOGRAM aggregate.
-     *
-     * @param distinct if distinct is used
-     */
-    AggregateDataHistogram(boolean distinct) {
-        this.distinct = distinct;
-    }
-    @Override
-    void add(Database database, int dataType, Value v) {
-        if (distinctValues == null) {
-            distinctValues = new ValueHashMap<>();
-        }
-        LongDataCounter a = distinctValues.get(v);
-        if (a == null) {
-            if (distinctValues.size() >= Constants.SELECTIVITY_DISTINCT_COUNT) {
-                return;
-            }
-            a = new LongDataCounter();
-            distinctValues.put(v, a);
-        }
-        a.count++;
-    }
-    @Override
-    Value getValue(Database database, int dataType) {
-        if (distinctValues == null) {
-            return ValueArray.getEmpty().convertTo(dataType);
-        }
-        ValueArray[] values = new ValueArray[distinctValues.size()];
-        int i = 0;
-        for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
-            LongDataCounter d = entry.getValue();
-            values[i] = ValueArray.get(new Value[] { entry.getKey(), ValueLong.get(distinct ? 1L : d.count) });
-            i++;
-        }
-        final Mode mode = database.getMode();
-        final CompareMode compareMode = database.getCompareMode();
-        Arrays.sort(values, new Comparator<ValueArray>() {
-            @Override
-            public int compare(ValueArray v1, ValueArray v2) {
-                Value a1 = v1.getList()[0];
-                Value a2 = v2.getList()[0];
-                return a1.compareTo(a2, mode, compareMode);
-            }
-        });
-        return ValueArray.get(values).convertTo(dataType);
-    }
-}
--- a/h2/src/main/org/h2/expression/aggregate/AggregateDataMode.java
+++ b/h2/src/main/org/h2/expression/aggregate/AggregateDataMode.java
-/*
- * Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
- * and the EPL 1.0 (http://h2database.com/html/license.html).
- * Initial Developer: H2 Group
- */
-package org.h2.expression.aggregate;
-import java.util.Map.Entry;
-import org.h2.engine.Database;
-import org.h2.util.ValueHashMap;
-import org.h2.value.Value;
-import org.h2.value.ValueNull;
-/**
- * Data stored while calculating a MODE aggregate.
- */
-class AggregateDataMode extends AggregateData {
-    private ValueHashMap<LongDataCounter> distinctValues;
-    @Override
-    void add(Database database, int dataType, Value v) {
-        if (v == ValueNull.INSTANCE) {
-            return;
-        }
-        if (distinctValues == null) {
-            distinctValues = new ValueHashMap<>();
-        }
-        LongDataCounter a = distinctValues.get(v);
-        if (a == null) {
-            a = new LongDataCounter();
-            distinctValues.put(v, a);
-        }
-        a.count++;
-    }
-    @Override
-    Value getValue(Database database, int dataType) {
-        Value v = ValueNull.INSTANCE;
-        if (distinctValues != null) {
-            long count = 0L;
-            for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
-                long c = entry.getValue().count;
-                if (c > count) {
-                    v = entry.getKey();
-                    count = c;
-                }
-            }
-        }
-        return v.convertTo(dataType);
-    }
-    Value getOrderedValue(Database database, int dataType, boolean desc) {
-        Value v = ValueNull.INSTANCE;
-        if (distinctValues != null) {
-            long count = 0L;
-            for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
-                long c = entry.getValue().count;
-                if (c > count) {
-                    v = entry.getKey();
-                    count = c;
-                } else if (c == count) {
-                    Value v2 = entry.getKey();
-                    int cmp = database.compareTypeSafe(v, v2);
-                    if (desc) {
-                        if (cmp >= 0) {
-                            continue;
-                        }
-                    } else if (cmp <= 0) {
-                        continue;
-                    }
-                    v = v2;
-                }
-            }
-        }
-        return v.convertTo(dataType);
-    }
-}
--- a/h2/src/test/org/h2/test/scripts/TestScript.java
+++ b/h2/src/test/org/h2/test/scripts/TestScript.java
@@ -162,7 +162,7 @@ public class TestScript extends TestDb {
            testScript("other/" + s + ".sql");
        }
        for (String s : new String[] { "array-agg", "avg", "bit-and", "bit-or", "count", "envelope",
-                "group-concat", "max", "median", "min", "mode", "selectivity", "stddev-pop",
+                "group-concat", "histogram", "max", "median", "min", "mode", "selectivity", "stddev-pop",
                "stddev-samp", "sum", "var-pop", "var-samp" }) {
            testScript("functions/aggregate/" + s + ".sql");
        }

--- a/h2/src/test/org/h2/test/scripts/functions/aggregate/histogram.sql
+++ b/h2/src/test/org/h2/test/scripts/functions/aggregate/histogram.sql
+-- Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
+-- and the EPL 1.0 (http://h2database.com/html/license.html).
+-- Initial Developer: H2 Group
+--
+SELECT HISTOGRAM(X), HISTOGRAM(DISTINCT X) FROM VALUES (1), (2), (3), (1), (2), (NULL), (5) T(X);
+> HISTOGRAM(C1)                               HISTOGRAM(DISTINCT C1)
+> ------------------------------------------- -------------------------------------------
+> ((null, 1), (1, 2), (2, 2), (3, 1), (5, 1)) ((null, 1), (1, 1), (2, 1), (3, 1), (5, 1))
+> rows: 1
+SELECT HISTOGRAM(X) FILTER (WHERE X > 1), HISTOGRAM(DISTINCT X) FILTER (WHERE X > 1)
+    FROM VALUES (1), (2), (3), (1), (2), (NULL), (5) T(X);
+> HISTOGRAM(C1) FILTER (WHERE (C1 > 1)) HISTOGRAM(DISTINCT C1) FILTER (WHERE (C1 > 1))
+> ------------------------------------- ----------------------------------------------
+> ((2, 2), (3, 1), (5, 1))              ((2, 1), (3, 1), (5, 1))
+> rows: 1
+SELECT HISTOGRAM(X) FILTER (WHERE X > 0), HISTOGRAM(DISTINCT X) FILTER (WHERE X > 0)  FROM VALUES (0) T(X);
+> HISTOGRAM(C1) FILTER (WHERE (C1 > 0)) HISTOGRAM(DISTINCT C1) FILTER (WHERE (C1 > 0))
+> ------------------------------------- ----------------------------------------------
+> ()                                    ()
+> rows: 1