Unverified 提交 5a5a4050 authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov 提交者: GitHub

Merge pull request #1725 from katzyn/aggregate

Add partial implementation of standard LISTAGG aggregate function
......@@ -2671,6 +2671,14 @@ columns with the same name.
TEST AS T LEFT JOIN TEST AS T1 ON T.ID = T1.ID
"
"Other Grammar","Within group specification","
WITHIN GROUP (ORDER BY {expression [ASC|DESC]} [,...])
","
Group specification for ordered set functions.
","
WITHIN GROUP (ORDER BY ID DESC)
"
"Other Grammar","Wildcard expression","
{* | tableAlias.*} [EXCEPT ([tableAlias.]columnName, [,...])]
","
......@@ -3582,10 +3590,12 @@ Aggregates are only allowed in select statements.
COUNT(*)
"
"Functions (Aggregate)","GROUP_CONCAT","
GROUP_CONCAT ( [ DISTINCT|ALL ] string
[ ORDER BY { expression [ ASC | DESC ] } [,...] ]
[ SEPARATOR expression ] )
"Functions (Aggregate)","LISTAGG","
{ LISTAGG ( [ DISTINCT|ALL ] string [, separatorString] [ ON OVERFLOW ERROR ] )
withinGroupSpecification }
| { GROUP_CONCAT ( [ DISTINCT|ALL ] string
[ ORDER BY { expression [ ASC | DESC ] } [,...] ]
[ SEPARATOR separatorString ] ) }
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Concatenates strings with a separator.
......@@ -3593,14 +3603,10 @@ Separator must be the same for all rows in the same group.
The default separator is a ',' (without space).
This method returns a string.
If no rows are selected, the result is NULL.
If ORDER BY is not specified order of strings is not determined.
When this aggregate is used with OVER clause that contains ORDER BY subclause
it does not enforce exact order of strings.
This aggregate needs additional own ORDER BY clause to make it deterministic.
Aggregates are only allowed in select statements.
","
GROUP_CONCAT(NAME ORDER BY ID SEPARATOR ', ')
GROUP_CONCAT(ID ORDER BY ID SEPARATOR ', ') OVER (ORDER BY ID)
LISTAGG(NAME, ', ') WITHIN GROUP (ORDER BY ID)
LISTAGG(ID, ', ') WITHIN GROUP (ORDER BY ID) OVER (ORDER BY ID)
"
"Functions (Aggregate)","ARRAY_AGG","
......@@ -3721,7 +3727,7 @@ VAR_SAMP(X)
"Functions (Aggregate)","RANK aggregate","
RANK(value [,...])
WITHIN GROUP (ORDER BY {expression [ASC|DESC]} [,...])
withinGroupSpecification
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Returns the rank of the hypothetical row in specified collection of rows.
......@@ -3734,7 +3740,7 @@ SELECT RANK(5) WITHIN GROUP (ORDER BY V) FROM TEST;
"Functions (Aggregate)","DENSE_RANK aggregate","
DENSE_RANK(value [,...])
WITHIN GROUP (ORDER BY {expression [ASC|DESC]} [,...])
withinGroupSpecification
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Returns the dense rank of the hypothetical row in specified collection of rows.
......@@ -3747,7 +3753,7 @@ SELECT DENSE_RANK(5) WITHIN GROUP (ORDER BY V) FROM TEST;
"Functions (Aggregate)","PERCENT_RANK aggregate","
PERCENT_RANK(value [,...])
WITHIN GROUP (ORDER BY {expression [ASC|DESC]} [,...])
withinGroupSpecification
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Returns the relative rank of the hypothetical row in specified collection of rows.
......@@ -3759,7 +3765,7 @@ SELECT PERCENT_RANK(5) WITHIN GROUP (ORDER BY V) FROM TEST;
"Functions (Aggregate)","CUME_DIST aggregate","
CUME_DIST(value [,...])
WITHIN GROUP (ORDER BY {expression [ASC|DESC]} [,...])
withinGroupSpecification
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Returns the relative rank of the hypothetical row in specified collection of rows.
......
......@@ -3060,7 +3060,7 @@ public class Parser {
}
}
break;
case GROUP_CONCAT: {
case LISTAGG: {
boolean distinct = readDistinctAgg();
Expression arg = readExpression(), separator = null;
ArrayList<SelectOrderBy> orderByList = null;
......@@ -3072,7 +3072,7 @@ public class Parser {
read("BY");
orderByList = parseSimpleOrderList();
}
} else {
} else if (equalsToken("GROUP_CONCAT", aggregateName)){
if (readIf(ORDER)) {
read("BY");
orderByList = parseSimpleOrderList();
......@@ -3080,12 +3080,27 @@ public class Parser {
if (readIf("SEPARATOR")) {
separator = readExpression();
}
} else {
if (readIf(COMMA)) {
separator = readExpression();
}
if (readIf(ON)) {
read("OVERFLOW");
read("ERROR");
}
}
r = new Aggregate(AggregateType.GROUP_CONCAT,
separator == null ? new Expression[] { arg } : new Expression[] { arg, separator },
currentSelect, distinct);
if (orderByList != null) {
r.setOrderByList(orderByList);
Expression[] args = separator == null ? new Expression[] { arg } : new Expression[] { arg, separator };
int index = lastParseIndex;
read(CLOSE_PAREN);
if (orderByList == null && isToken("WITHIN")) {
r = readWithinGroup(aggregateType, args, distinct, false);
} else {
parseIndex = index;
read();
r = new Aggregate(AggregateType.LISTAGG, args, currentSelect, distinct);
if (orderByList != null) {
r.setOrderByList(orderByList);
}
}
break;
}
......@@ -3109,19 +3124,19 @@ public class Parser {
do {
expressions.add(readExpression());
} while (readIfMore(true));
r = readWithinGroup(aggregateType, expressions.toArray(new Expression[0]), true);
r = readWithinGroup(aggregateType, expressions.toArray(new Expression[0]), false, true);
break;
}
case PERCENTILE_CONT:
case PERCENTILE_DISC: {
Expression num = readExpression();
read(CLOSE_PAREN);
r = readWithinGroup(aggregateType, new Expression[] { num }, false);
r = readWithinGroup(aggregateType, new Expression[] { num }, false, false);
break;
}
case MODE: {
if (readIf(CLOSE_PAREN)) {
r = readWithinGroup(AggregateType.MODE, new Expression[0], false);
r = readWithinGroup(AggregateType.MODE, new Expression[0], false, false);
} else {
Expression expr = readExpression();
r = new Aggregate(aggregateType, new Expression[0], currentSelect, false);
......@@ -3150,13 +3165,14 @@ public class Parser {
return r;
}
private Aggregate readWithinGroup(AggregateType aggregateType, Expression[] args, boolean forHypotheticalSet) {
private Aggregate readWithinGroup(AggregateType aggregateType, Expression[] args, boolean distinct,
boolean forHypotheticalSet) {
read("WITHIN");
read(GROUP);
read(OPEN_PAREN);
read(ORDER);
read("BY");
Aggregate r = new Aggregate(aggregateType, args, currentSelect, false);
Aggregate r = new Aggregate(aggregateType, args, currentSelect, distinct);
if (forHypotheticalSet) {
int count = args.length;
ArrayList<SelectOrderBy> orderList = new ArrayList<>(count);
......
......@@ -89,9 +89,11 @@ public class Aggregate extends AbstractAggregate {
addAggregate("MIN", AggregateType.MIN);
addAggregate("MAX", AggregateType.MAX);
addAggregate("AVG", AggregateType.AVG);
addAggregate("GROUP_CONCAT", AggregateType.GROUP_CONCAT);
addAggregate("LISTAGG", AggregateType.LISTAGG);
// MySQL compatibility: group_concat(expression, delimiter)
addAggregate("GROUP_CONCAT", AggregateType.LISTAGG);
// PostgreSQL compatibility: string_agg(expression, delimiter)
addAggregate("STRING_AGG", AggregateType.GROUP_CONCAT);
addAggregate("STRING_AGG", AggregateType.LISTAGG);
addAggregate("STDDEV_SAMP", AggregateType.STDDEV_SAMP);
addAggregate("STDDEV", AggregateType.STDDEV_SAMP);
addAggregate("STDDEV_POP", AggregateType.STDDEV_POP);
......@@ -188,7 +190,7 @@ public class Aggregate extends AbstractAggregate {
private void updateData(Session session, AggregateData data, Value v, Value[] remembered) {
switch (aggregateType) {
case GROUP_CONCAT:
case LISTAGG:
if (v != ValueNull.INSTANCE) {
v = updateCollecting(session, v.convertTo(Value.STRING), remembered);
}
......@@ -393,8 +395,8 @@ public class Aggregate extends AbstractAggregate {
break;
case HISTOGRAM:
return getHistogram(session, data);
case GROUP_CONCAT:
return getGroupConcat(session, data);
case LISTAGG:
return getListagg(session, data);
case ARRAY_AGG: {
Value[] array = ((AggregateDataCollecting) data).getArray();
if (array == null) {
......@@ -521,7 +523,7 @@ public class Aggregate extends AbstractAggregate {
throw DbException.throwInternalError();
}
private Value getGroupConcat(Session session, AggregateData data) {
private Value getListagg(Session session, AggregateData data) {
AggregateDataCollecting collectingData = (AggregateDataCollecting) data;
Value[] array = collectingData.getArray();
if (array == null) {
......@@ -638,7 +640,7 @@ public class Aggregate extends AbstractAggregate {
int offset;
switch (aggregateType) {
case ARRAY_AGG:
case GROUP_CONCAT:
case LISTAGG:
offset = 1;
break;
default:
......@@ -647,7 +649,7 @@ public class Aggregate extends AbstractAggregate {
orderBySort = createOrder(session, orderByList, offset);
}
switch (aggregateType) {
case GROUP_CONCAT:
case LISTAGG:
type = TypeInfo.TYPE_STRING;
break;
case COUNT_ALL:
......@@ -746,21 +748,6 @@ public class Aggregate extends AbstractAggregate {
super.setEvaluatable(tableFilter, b);
}
private StringBuilder getSQLGroupConcat(StringBuilder builder) {
builder.append("GROUP_CONCAT(");
if (distinct) {
builder.append("DISTINCT ");
}
args[0].getSQL(builder);
Window.appendOrderBy(builder, orderByList);
if (args.length >= 2) {
builder.append(" SEPARATOR ");
args[1].getSQL(builder);
}
builder.append(')');
return appendTailConditions(builder);
}
private StringBuilder getSQLArrayAggregate(StringBuilder builder) {
builder.append("ARRAY_AGG(");
if (distinct) {
......@@ -776,8 +763,6 @@ public class Aggregate extends AbstractAggregate {
public StringBuilder getSQL(StringBuilder builder) {
String text;
switch (aggregateType) {
case GROUP_CONCAT:
return getSQLGroupConcat(builder);
case COUNT_ALL:
return appendTailConditions(builder.append("COUNT(*)"));
case COUNT:
......@@ -846,6 +831,9 @@ public class Aggregate extends AbstractAggregate {
case MEDIAN:
text = "MEDIAN";
break;
case LISTAGG:
text = "LISTAGG";
break;
case ARRAY_AGG:
return getSQLArrayAggregate(builder);
case MODE:
......@@ -860,18 +848,21 @@ public class Aggregate extends AbstractAggregate {
builder.append(text);
if (distinct) {
builder.append("(DISTINCT ");
args[0].getSQL(builder).append(')');
} else {
builder.append('(');
for (Expression arg : args) {
if (arg instanceof Subquery) {
arg.getSQL(builder);
} else {
arg.getUnenclosedSQL(builder);
}
}
for (int i = 0; i < args.length; i++) {
if (i > 0) {
builder.append(", ");
}
Expression arg = args[i];
if (arg instanceof Subquery) {
arg.getSQL(builder);
} else {
arg.getUnenclosedSQL(builder);
}
builder.append(')');
}
builder.append(')');
if (orderByList != null) {
builder.append(" WITHIN GROUP (");
Window.appendOrderBy(builder, orderByList);
......
......@@ -32,7 +32,7 @@ abstract class AggregateData {
return new AggregateDataCount(false);
}
break;
case GROUP_CONCAT:
case LISTAGG:
case ARRAY_AGG:
case RANK:
case DENSE_RANK:
......
......@@ -20,11 +20,6 @@ public enum AggregateType {
*/
COUNT,
/**
* The aggregate type for GROUP_CONCAT(...).
*/
GROUP_CONCAT,
/**
* The aggregate type for SUM(expression).
*/
......@@ -130,6 +125,11 @@ public enum AggregateType {
*/
MEDIAN,
/**
* The aggregate type for LISTAGG(...).
*/
LISTAGG,
/**
* The aggregate type for ARRAY_AGG(expression).
*/
......
......@@ -165,7 +165,7 @@ public class TestScript extends TestDb {
testScript("other/" + s + ".sql");
}
for (String s : new String[] { "any", "array-agg", "avg", "bit-and", "bit-or", "count", "envelope",
"every", "group-concat", "histogram", "max", "min", "mode", "percentile", "rank", "selectivity",
"every", "histogram", "listagg", "max", "min", "mode", "percentile", "rank", "selectivity",
"stddev-pop", "stddev-samp", "sum", "var-pop", "var-samp" }) {
testScript("functions/aggregate/" + s + ".sql");
}
......
......@@ -11,12 +11,20 @@ create table test(v varchar);
insert into test values ('1'), ('2'), ('3'), ('4'), ('5'), ('6'), ('7'), ('8'), ('9');
> update count: 9
select listagg(v, '-') within group (order by v asc),
listagg(v, '-') within group (order by v desc) filter (where v >= '4')
from test where v >= '2';
> LISTAGG(V, '-') WITHIN GROUP (ORDER BY V) LISTAGG(V, '-') WITHIN GROUP (ORDER BY V DESC) FILTER (WHERE (V >= '4'))
> ----------------------------------------- ------------------------------------------------------------------------
> 2-3-4-5-6-7-8-9 9-8-7-6-5-4
> rows: 1
select group_concat(v order by v asc separator '-'),
group_concat(v order by v desc separator '-') filter (where v >= '4')
from test where v >= '2';
> GROUP_CONCAT(V ORDER BY V SEPARATOR '-') GROUP_CONCAT(V ORDER BY V DESC SEPARATOR '-') FILTER (WHERE (V >= '4'))
> ---------------------------------------- -----------------------------------------------------------------------
> 2-3-4-5-6-7-8-9 9-8-7-6-5-4
> LISTAGG(V, '-') WITHIN GROUP (ORDER BY V) LISTAGG(V, '-') WITHIN GROUP (ORDER BY V DESC) FILTER (WHERE (V >= '4'))
> ----------------------------------------- ------------------------------------------------------------------------
> 2-3-4-5-6-7-8-9 9-8-7-6-5-4
> rows: 1
create index test_idx on test(v);
......@@ -25,17 +33,17 @@ create index test_idx on test(v);
select group_concat(v order by v asc separator '-'),
group_concat(v order by v desc separator '-') filter (where v >= '4')
from test where v >= '2';
> GROUP_CONCAT(V ORDER BY V SEPARATOR '-') GROUP_CONCAT(V ORDER BY V DESC SEPARATOR '-') FILTER (WHERE (V >= '4'))
> ---------------------------------------- -----------------------------------------------------------------------
> 2-3-4-5-6-7-8-9 9-8-7-6-5-4
> LISTAGG(V, '-') WITHIN GROUP (ORDER BY V) LISTAGG(V, '-') WITHIN GROUP (ORDER BY V DESC) FILTER (WHERE (V >= '4'))
> ----------------------------------------- ------------------------------------------------------------------------
> 2-3-4-5-6-7-8-9 9-8-7-6-5-4
> rows: 1
select group_concat(v order by v asc separator '-'),
group_concat(v order by v desc separator '-') filter (where v >= '4')
from test;
> GROUP_CONCAT(V ORDER BY V SEPARATOR '-') GROUP_CONCAT(V ORDER BY V DESC SEPARATOR '-') FILTER (WHERE (V >= '4'))
> ---------------------------------------- -----------------------------------------------------------------------
> 1-2-3-4-5-6-7-8-9 9-8-7-6-5-4
> LISTAGG(V, '-') WITHIN GROUP (ORDER BY V) LISTAGG(V, '-') WITHIN GROUP (ORDER BY V DESC) FILTER (WHERE (V >= '4'))
> ----------------------------------------- ------------------------------------------------------------------------
> 1-2-3-4-5-6-7-8-9 9-8-7-6-5-4
> rows: 1
drop table test;
......@@ -48,20 +56,20 @@ insert into test(v) values (7), (2), (8), (3), (7), (3), (9), (-1);
> update count: 8
select group_concat(v) from test;
> GROUP_CONCAT(V)
> LISTAGG(V)
> ----------------
> 7,2,8,3,7,3,9,-1
> rows: 1
select group_concat(distinct v) from test;
> GROUP_CONCAT(DISTINCT V)
> ------------------------
> LISTAGG(DISTINCT V)
> -------------------
> -1,2,3,7,8,9
> rows: 1
select group_concat(distinct v order by v desc) from test;
> GROUP_CONCAT(DISTINCT V ORDER BY V DESC)
> ----------------------------------------
> LISTAGG(DISTINCT V) WITHIN GROUP (ORDER BY V DESC)
> --------------------------------------------------
> 9,8,7,3,2,-1
> rows: 1
......@@ -71,17 +79,17 @@ drop table test;
create table test(g varchar, v int) as values ('-', 1), ('-', 2), ('-', 3), ('|', 4), ('|', 5), ('|', 6), ('*', null);
> ok
select g, group_concat(v separator g) from test group by g;
> G GROUP_CONCAT(V SEPARATOR G)
> - ---------------------------
select g, listagg(v, g) from test group by g;
> G LISTAGG(V, G)
> - -------------
> * null
> - 1-2-3
> | 4|5|6
> rows: 3
select g, group_concat(v separator g) over (partition by g) from test order by v;
> G GROUP_CONCAT(V SEPARATOR G) OVER (PARTITION BY G)
> - -------------------------------------------------
select g, listagg(v, g) over (partition by g) from test order by v;
> G LISTAGG(V, G) OVER (PARTITION BY G)
> - -----------------------------------
> * null
> - 1-2-3
> - 1-2-3
......@@ -91,6 +99,24 @@ select g, group_concat(v separator g) over (partition by g) from test order by v
> | 4|5|6
> rows (ordered): 7
select g, listagg(v, g on overflow error) within group (order by v) filter (where v <> 2) over (partition by g) from test order by v;
> G LISTAGG(V, G) WITHIN GROUP (ORDER BY V) FILTER (WHERE (V <> 2)) OVER (PARTITION BY G)
> - -------------------------------------------------------------------------------------
> * null
> - 1-3
> - 1-3
> - 1-3
> | 4|5|6
> | 4|5|6
> | 4|5|6
> rows (ordered): 7
select listagg(distinct v, '-') from test;
> LISTAGG(DISTINCT V, '-')
> ------------------------
> 1-2-3-4-5-6
> rows: 1
select g, group_concat(v separator v) from test group by g;
> exception INVALID_VALUE_2
......
......@@ -3898,8 +3898,8 @@ SELECT * FROM TEST;
> rows: 0
SELECT GROUP_CONCAT(ID) FROM TEST;
> GROUP_CONCAT(ID)
> ----------------
> LISTAGG(ID)
> -----------
> null
> rows: 1
......@@ -3930,8 +3930,8 @@ INSERT INTO TEST VALUES(2, 'World');
> update count: 1
SELECT group_concat(name) FROM TEST group by id;
> GROUP_CONCAT(NAME)
> ------------------
> LISTAGG(NAME)
> -------------
> Hello
> World
> rows: 2
......@@ -6666,8 +6666,8 @@ INSERT INTO TEST VALUES(?, ?, ?);
> update count: 9
SELECT IFNULL(NAME, '') || ': ' || GROUP_CONCAT(VALUE ORDER BY NAME, VALUE DESC SEPARATOR ', ') FROM TEST GROUP BY NAME ORDER BY 1;
> (IFNULL(NAME, '') || ': ') || GROUP_CONCAT(VALUE ORDER BY NAME, VALUE DESC SEPARATOR ', ')
> ------------------------------------------------------------------------------------------
> (IFNULL(NAME, '') || ': ') || LISTAGG(VALUE, ', ') WITHIN GROUP (ORDER BY NAME, VALUE DESC)
> -------------------------------------------------------------------------------------------
> : 3.10, -10.00
> Apples: 1.50, 1.20, 1.10
> Bananas: 2.50
......@@ -6676,14 +6676,14 @@ SELECT IFNULL(NAME, '') || ': ' || GROUP_CONCAT(VALUE ORDER BY NAME, VALUE DESC
> rows (ordered): 5
SELECT GROUP_CONCAT(ID ORDER BY ID) FROM TEST;
> GROUP_CONCAT(ID ORDER BY ID)
> ----------------------------
> LISTAGG(ID) WITHIN GROUP (ORDER BY ID)
> --------------------------------------
> 1,2,3,4,5,6,7,8,9
> rows: 1
SELECT STRING_AGG(ID,';') FROM TEST;
> GROUP_CONCAT(ID SEPARATOR ';')
> ------------------------------
> LISTAGG(ID, ';')
> -----------------
> 1;2;3;4;5;6;7;8;9
> rows: 1
......
......@@ -806,4 +806,4 @@ econd irst bcef ordinality nord unnest
analyst occupation distributive josaph aor engineer sajeewa isuru randil kevin doctor businessman artist ashan
corrupts splitted disruption unintentional octets preconditions predicates subq objectweb insn opcodes
preserves masking holder unboxing avert iae transformed subtle reevaluate exclusions subclause ftbl rgr
presorted inclusion contexts aax mwd percentile cont interpolate mwa hypothetical regproc childed
presorted inclusion contexts aax mwd percentile cont interpolate mwa hypothetical regproc childed listagg
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论