提交 1e48ee39 authored 作者: Noel Grandin's avatar Noel Grandin

make the index costing sensitive to size of index

revamp the selection of indexes to move the bulk of the costing into
BaseIndex.
In the process, clean it up and modify the costing so that when using a
covering index, we pick the index with the least number of columns.
Smaller indexes are faster to load, leading to lower retrieval costs.
上级 fbafea92
......@@ -160,22 +160,19 @@ public abstract class BaseIndex extends SchemaObjectBase implements Index {
* @param sortOrder the sort order
* @return the estimated cost
*/
protected long getCostRangeIndex(int[] masks, long rowCount,
TableFilter[] filters, int filter, SortOrder sortOrder) {
protected final long getCostRangeIndex(int[] masks, long rowCount,
TableFilter[] filters, int filter, SortOrder sortOrder, boolean isScanIndex) {
rowCount += Constants.COST_ROW_OFFSET;
long cost = rowCount;
long rows = rowCount;
int totalSelectivity = 0;
if (masks == null) {
return cost;
}
long rowsCost = rowCount;
if (masks != null) {
for (int i = 0, len = columns.length; i < len; i++) {
Column column = columns[i];
int index = column.getColumnId();
int mask = masks[index];
if ((mask & IndexCondition.EQUALITY) == IndexCondition.EQUALITY) {
if (i == columns.length - 1 && getIndexType().isUnique()) {
cost = 3;
rowsCost = 3;
break;
}
totalSelectivity = 100 - ((100 - totalSelectivity) *
......@@ -184,34 +181,38 @@ public abstract class BaseIndex extends SchemaObjectBase implements Index {
if (distinctRows <= 0) {
distinctRows = 1;
}
rows = Math.max(rowCount / distinctRows, 1);
cost = 2 + rows;
rowsCost = 2 + Math.max(rowCount / distinctRows, 1);
} else if ((mask & IndexCondition.RANGE) == IndexCondition.RANGE) {
cost = 2 + rows / 4;
rowsCost = 2 + rowCount / 4;
break;
} else if ((mask & IndexCondition.START) == IndexCondition.START) {
cost = 2 + rows / 3;
rowsCost = 2 + rowCount / 3;
break;
} else if ((mask & IndexCondition.END) == IndexCondition.END) {
cost = rows / 3;
rowsCost = rowCount / 3;
break;
} else {
break;
}
}
// if the ORDER BY clause matches the ordering of this index,
// it will be cheaper than another index, so adjust the cost accordingly
}
// If the ORDER BY clause matches the ordering of this index,
// it will be cheaper than another index, so adjust the cost accordingly.
long sortingCost = 0;
if (sortOrder != null) {
sortingCost = 100 + rowCount/10;
}
if (sortOrder != null && !isScanIndex) {
boolean sortOrderMatches = true;
int coveringCount = 0;
int[] sortTypes = sortOrder.getSortTypes();
TableFilter tableFilter = filters == null ? null : filters[filter];
for (int i = 0, len = sortTypes.length; i < len; i++) {
if (i >= indexColumns.length) {
// we can still use this index if we are sorting by more
// We can still use this index if we are sorting by more
// than it's columns, it's just that the coveringCount
// is lower than with an index that contains
// more of the order by columns
// more of the order by columns.
break;
}
Column col = sortOrder.getColumn(i, tableFilter);
......@@ -220,7 +221,7 @@ public abstract class BaseIndex extends SchemaObjectBase implements Index {
break;
}
IndexColumn indexCol = indexColumns[i];
if (col != indexCol.column) {
if (!col.equals(indexCol.column)) {
sortOrderMatches = false;
break;
}
......@@ -234,12 +235,14 @@ public abstract class BaseIndex extends SchemaObjectBase implements Index {
if (sortOrderMatches) {
// "coveringCount" makes sure that when we have two
// or more covering indexes, we choose the one
// that covers more
cost -= coveringCount;
// that covers more.
sortingCost = 100 - coveringCount;
}
}
// If we have two indexes with the same cost, and one of the indexes can satisfy the query
// without needing to read from the primary table, make that one slightly lower cost
// without needing to read from the primary table, make that one slightly lower cost.
boolean needsToReadFromScanIndex = true;
if (!isScanIndex) {
HashSet<Column> set1 = New.hashSet();
for (int i = 0; i < filters.length; i++) {
if (filters[i].getSelect() != null) {
......@@ -253,12 +256,27 @@ public abstract class BaseIndex extends SchemaObjectBase implements Index {
set2.add(c);
}
}
set2.removeAll(Arrays.asList(getColumns()));
set2.removeAll(Arrays.asList(columns));
if (set2.isEmpty()) {
cost -= 1;
needsToReadFromScanIndex = false;
}
}
}
return cost;
long rc;
if (isScanIndex) {
rc = rowsCost + sortingCost + 20;
} else if (needsToReadFromScanIndex) {
rc = rowsCost + rowsCost + sortingCost + 20;
} else {
/* The (20-x) calculation makes sure that when we pick a covering index, we pick the covering
* index that has the smallest number of columns. This is faster because a smaller index will fit into
* fewer data blocks.
*/
rc = rowsCost + sortingCost + (20 - columns.length);
}
getDatabase().getTrace(0).debug("needsToReadFromScanIndex " + needsToReadFromScanIndex + " isScanIndex "
+ isScanIndex + " rowsCost " + rowsCost + " rowCount " + rowCount + " sortingCost " + sortingCost + " rc " +rc);
return rc;
}
@Override
......
......@@ -144,7 +144,7 @@ public class LinkedIndex extends BaseIndex {
public double getCost(Session session, int[] masks,
TableFilter[] filters, int filter, SortOrder sortOrder) {
return 100 + getCostRangeIndex(masks, rowCount +
Constants.COST_ROW_OFFSET, filters, filter, sortOrder);
Constants.COST_ROW_OFFSET, filters, filter, sortOrder, false);
}
@Override
......
......@@ -58,7 +58,7 @@ public class MetaIndex extends BaseIndex {
return 10 * MetaTable.ROW_COUNT_APPROXIMATION;
}
return getCostRangeIndex(masks, MetaTable.ROW_COUNT_APPROXIMATION,
filters, filter, sortOrder);
filters, filter, sortOrder, false);
}
@Override
......
......@@ -220,7 +220,7 @@ public class PageBtreeIndex extends PageIndex {
public double getCost(Session session, int[] masks,
TableFilter[] filters, int filter, SortOrder sortOrder) {
return 10 * getCostRangeIndex(masks, tableData.getRowCount(session),
filters, filter, sortOrder);
filters, filter, sortOrder, false);
}
@Override
......
......@@ -99,7 +99,7 @@ public class PageDelegateIndex extends PageIndex {
public double getCost(Session session, int[] masks,
TableFilter[] filters, int filter, SortOrder sortOrder) {
return 10 * getCostRangeIndex(masks, mainIndex.getRowCount(session),
filters, filter, sortOrder);
filters, filter, sortOrder, false);
}
@Override
......
......@@ -178,12 +178,6 @@ public class SpatialTreeIndex extends BaseIndex implements SpatialIndex {
filter.getSession());
}
@Override
protected long getCostRangeIndex(int[] masks, long rowCount,
TableFilter[] filters, int filter, SortOrder sortOrder) {
return getCostRangeIndex(masks, rowCount, columns);
}
/**
* Compute spatial index cost
* @param masks Search mask
......@@ -210,10 +204,10 @@ public class SpatialTreeIndex extends BaseIndex implements SpatialIndex {
@Override
public double getCost(Session session, int[] masks,
TableFilter[] filters, int filter, SortOrder sortOrder) {
return getCostRangeIndex(masks, table.getRowCountApproximation(),
filters, filter, sortOrder);
return getCostRangeIndex(masks, table.getRowCountApproximation(), columns);
}
@Override
public void remove(Session session) {
if (!treeMap.isClosed()) {
......
......@@ -321,7 +321,7 @@ public class TreeIndex extends BaseIndex {
public double getCost(Session session, int[] masks, TableFilter[] filters, int filter,
SortOrder sortOrder) {
return getCostRangeIndex(masks, tableData.getRowCountApproximation(),
filters, filter, sortOrder);
filters, filter, sortOrder, false);
}
@Override
......
......@@ -91,7 +91,7 @@ public class MVDelegateIndex extends BaseIndex implements MVIndex {
public double getCost(Session session, int[] masks,
TableFilter[] filters, int filter, SortOrder sortOrder) {
return 10 * getCostRangeIndex(masks, mainIndex.getRowCountApproximation(),
filters, filter, sortOrder);
filters, filter, sortOrder, true);
}
@Override
......
......@@ -11,7 +11,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import org.h2.api.ErrorCode;
import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.engine.Session;
import org.h2.index.BaseIndex;
......@@ -219,10 +218,8 @@ public class MVPrimaryIndex extends BaseIndex {
public double getCost(Session session, int[] masks,
TableFilter[] filters, int filter, SortOrder sortOrder) {
try {
// we use 9 here and 10 in MVSecondaryIndex to biase the decision towards
// using a table-scan when it has similar cost to an index
return 9 * getCostRangeIndex(masks, dataMap.sizeAsLongMax(),
filters, filter, sortOrder);
return 10 * getCostRangeIndex(masks, dataMap.sizeAsLongMax(),
filters, filter, sortOrder, true);
} catch (IllegalStateException e) {
throw DbException.get(ErrorCode.OBJECT_CLOSED, e);
}
......
......@@ -355,7 +355,7 @@ public class MVSecondaryIndex extends BaseIndex implements MVIndex {
TableFilter[] filters, int filter, SortOrder sortOrder) {
try {
return 10 * getCostRangeIndex(masks, dataMap.sizeAsLongMax(),
filters, filter, sortOrder);
filters, filter, sortOrder, false);
} catch (IllegalStateException e) {
throw DbException.get(ErrorCode.OBJECT_CLOSED, e);
}
......
......@@ -239,14 +239,7 @@ public class MVSpatialIndex extends BaseIndex implements SpatialIndex, MVIndex {
@Override
public double getCost(Session session, int[] masks,
TableFilter[] filters, int filter, SortOrder sortOrder) {
return getCostRangeIndex(masks, table.getRowCountApproximation(),
filters, filter, sortOrder);
}
@Override
protected long getCostRangeIndex(int[] masks, long rowCount,
TableFilter[] filters, int filter, SortOrder sortOrder) {
return SpatialTreeIndex.getCostRangeIndex(masks, rowCount, columns);
return SpatialTreeIndex.getCostRangeIndex(masks, table.getRowCountApproximation(), columns);
}
@Override
......
......@@ -6,6 +6,7 @@
package org.h2.table;
import java.util.ArrayList;
import org.h2.command.Parser;
import org.h2.command.dml.Select;
import org.h2.engine.Right;
......@@ -17,9 +18,10 @@ import org.h2.expression.ConditionAndOr;
import org.h2.expression.Expression;
import org.h2.expression.ExpressionColumn;
import org.h2.index.Index;
import org.h2.index.IndexLookupBatch;
import org.h2.index.IndexCondition;
import org.h2.index.IndexCursor;
import org.h2.index.IndexLookupBatch;
import org.h2.index.ScanIndex;
import org.h2.index.ViewIndex;
import org.h2.message.DbException;
import org.h2.result.Row;
......@@ -179,16 +181,16 @@ public class TableFilter implements ColumnResolver {
* @return the best plan item
*/
public PlanItem getBestPlanItem(Session s, TableFilter[] filters, int filter) {
PlanItem item;
PlanItem item1 = null;
SortOrder sortOrder = null;
if (select != null) {
sortOrder = select.getSortOrder();
}
if (indexConditions.size() == 0) {
item = new PlanItem();
item.setIndex(table.getScanIndex(s, null, filters, filter, sortOrder));
item.cost = item.getIndex().getCost(s, null, filters, filter, sortOrder);
} else {
item1 = new PlanItem();
item1.setIndex(table.getScanIndex(s, null, filters, filter, sortOrder));
item1.cost = item1.getIndex().getCost(s, null, filters, filter, sortOrder);
}
int len = table.getColumns().length;
int[] masks = new int[len];
for (IndexCondition condition : indexConditions) {
......@@ -203,13 +205,17 @@ public class TableFilter implements ColumnResolver {
}
}
}
item = table.getBestPlanItem(s, masks, filters, filter, sortOrder);
PlanItem item = table.getBestPlanItem(s, masks, filters, filter, sortOrder);
item.setMasks(masks);
// The more index conditions, the earlier the table.
// This is to ensure joins without indexes run quickly:
// x (x.a=10); y (x.b=y.b) - see issue 113
item.cost -= item.cost * indexConditions.size() / 100 / (filter + 1);
if (item1 != null && item1.cost < item.cost) {
item = item1;
}
if (nestedJoin != null) {
setEvaluatable(nestedJoin);
item.setNestedJoinPlan(nestedJoin.getBestPlanItem(s, filters, filter));
......
......@@ -1043,6 +1043,11 @@ public class TestOptimizations extends TestBase {
ResultSet rs = stat.executeQuery("EXPLAIN ANALYZE SELECT MAX(b.id) as id FROM tbl_b b JOIN tbl_a a ON b.tbl_a_id = a.id GROUP BY b.tbl_a_id HAVING A.ACTIVE = TRUE");
rs.next();
assertContains(rs.getString(1), "/* PUBLIC.TBL_B_IDX: TBL_A_ID = A.ID */");
rs = stat.executeQuery("EXPLAIN ANALYZE SELECT MAX(id) FROM tbl_b GROUP BY tbl_a_id");
rs.next();
assertContains(rs.getString(1), "/* PUBLIC.TBL_B_IDX");
conn.close();
}
}
......@@ -80,10 +80,10 @@ public class TestSelectCountNonNullColumn extends TestBase {
if (expect >= 0) {
assertEquals(expect, rs.getLong(1));
} else {
// System.out.println(rs.getString(1));
assertEquals("SELECT\n" + " COUNT(KEY)\n"
assertEquals("SELECT\n"
+ " COUNT(KEY)\n"
+ "FROM PUBLIC.SIMPLE\n"
+ " /* PUBLIC.SIMPLE.tableScan */\n"
+ " /* PUBLIC.PRIMARY_KEY_9 */\n"
+ "/* direct lookup */", rs.getString(1));
}
}
......
......@@ -864,7 +864,7 @@ public class TestSpatial extends TestBase {
stat.execute("create spatial index spatial on test(the_geom)");
ResultSet rs = stat.executeQuery("explain select * from test where _ROWID_ = 5");
assertTrue(rs.next());
assertContains(rs.getString(1), "tableScan");
assertContains(rs.getString(1), "/* PUBLIC.SPATIAL: _ROWID_ = 5 */");
} finally {
// Close the database
conn.close();
......
......@@ -185,7 +185,7 @@ public class TestTableEngines extends TestBase {
Connection conn = getConnection("tableEngine");
Statement stat = conn.createStatement();
stat.executeUpdate("CREATE TABLE T(A INT, B VARCHAR, C BIGINT) ENGINE \"" +
stat.executeUpdate("CREATE TABLE T(A INT, B VARCHAR, C BIGINT, D BIGINT DEFAULT 0) ENGINE \"" +
TreeSetIndexTableEngine.class.getName() + "\"");
stat.executeUpdate("CREATE INDEX IDX_C_B_A ON T(C, B, A)");
......@@ -200,7 +200,7 @@ public class TestTableEngines extends TestBase {
dataSet.add(Arrays.<Object>asList(0, "1", null));
dataSet.add(Arrays.<Object>asList(2, null, 0L));
PreparedStatement prep = conn.prepareStatement("INSERT INTO T VALUES(?,?,?)");
PreparedStatement prep = conn.prepareStatement("INSERT INTO T(A,B,C) VALUES(?,?,?)");
for (List<Object> row : dataSet) {
for (int i = 0; i < row.size(); i++) {
prep.setObject(i + 1, row.get(i));
......@@ -227,7 +227,7 @@ public class TestTableEngines extends TestBase {
checkPlan(stat, "select * from t where a > 0 and b > ''", "IDX_B_A");
checkPlan(stat, "select * from t where b < ''", "IDX_B_A");
checkPlan(stat, "select * from t where b < '' and c < 1", "IDX_C_B_A");
checkPlan(stat, "select * from t where a = 0", "IDX_C_B_A");
checkPlan(stat, "select * from t where a = 0", "scan");
checkPlan(stat, "select * from t where a > 0 order by c, b", "IDX_C_B_A");
checkPlan(stat, "select * from t where a = 0 and c > 0", "IDX_C_B_A");
checkPlan(stat, "select * from t where a = 0 and b < 0", "IDX_B_A");
......@@ -517,7 +517,7 @@ public class TestTableEngines extends TestBase {
+ "INNER JOIN PUBLIC.T T2 /* batched:test PUBLIC.T_IDX_B: B = U.B */ "
+ "ON 1=1 WHERE (T1.A = U.A) AND (U.B = T2.B)");
checkPlan(stat, "SELECT 1 FROM ( SELECT A FROM PUBLIC.T ) Z "
+ "/* SELECT A FROM PUBLIC.T /++ PUBLIC.\"scan\" ++/ */ "
+ "/* SELECT A FROM PUBLIC.T /++ PUBLIC.T_IDX_A ++/ */ "
+ "INNER JOIN PUBLIC.T /* batched:test PUBLIC.T_IDX_B: B = Z.A */ "
+ "ON 1=1 WHERE Z.A = T.B");
checkPlan(stat, "SELECT 1 FROM PUBLIC.T /* PUBLIC.T_IDX_B */ "
......@@ -554,9 +554,9 @@ public class TestTableEngines extends TestBase {
+ "ON 1=1 WHERE T.B = U.B */ INNER JOIN PUBLIC.T /* batched:test PUBLIC.T_IDX_A: A = Z.A */ "
+ "ON 1=1 WHERE Z.A = T.A");
checkPlan(stat, "SELECT 1 FROM ( (SELECT A FROM PUBLIC.T) UNION (SELECT A FROM PUBLIC.U) ) Z "
+ "/* (SELECT A FROM PUBLIC.T /++ PUBLIC.\"scan\" ++/) "
+ "/* (SELECT A FROM PUBLIC.T /++ PUBLIC.T_IDX_A ++/) "
+ "UNION "
+ "(SELECT A FROM PUBLIC.U /++ PUBLIC.\"scan\" ++/) */ "
+ "(SELECT A FROM PUBLIC.U /++ PUBLIC.U_IDX_A ++/) */ "
+ "INNER JOIN PUBLIC.T /* batched:test PUBLIC.T_IDX_A: A = Z.A */ ON 1=1 WHERE Z.A = T.A");
checkPlan(stat, "SELECT 1 FROM PUBLIC.U /* PUBLIC.U_IDX_B */ "
+ "INNER JOIN ( (SELECT A, B FROM PUBLIC.T) UNION (SELECT B, A FROM PUBLIC.U) ) Z "
......@@ -1132,7 +1132,7 @@ public class TestTableEngines extends TestBase {
public double getCost(Session session, int[] masks,
TableFilter[] filters, int filter, SortOrder sortOrder) {
doTests(session);
return getRowCount(session) + Constants.COST_ROW_OFFSET;
return getCostRangeIndex(masks, getRowCount(session), filters, filter, sortOrder, true);
}
};
......@@ -1476,7 +1476,7 @@ public class TestTableEngines extends TestBase {
public double getCost(Session session, int[] masks,
TableFilter[] filters, int filter, SortOrder sortOrder) {
doTests(session);
return getCostRangeIndex(masks, set.size(), filters, filter, sortOrder);
return getCostRangeIndex(masks, set.size(), filters, filter, sortOrder, false);
}
@Override
......@@ -1580,9 +1580,6 @@ public class TestTableEngines extends TestBase {
@SuppressWarnings("unchecked")
@Override
public int compare(List<Object> row1, List<Object> row2) {
if (row1.size() != row2.size()) {
throw new IllegalStateException("Row size mismatch.");
}
for (int i = 0; i < cols.length; i++) {
int col = cols[i];
Comparable<Object> o1 = (Comparable<Object>) row1.get(col);
......
......@@ -3825,8 +3825,8 @@ inner join test2 on test1.id=test2.id left
outer join test3 on test2.id=test3.id
where test3.id is null;
> PLAN
> -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> SELECT TEST1.ID, TEST2.ID, TEST3.ID FROM PUBLIC.TEST1 /* PUBLIC.TEST1.tableScan */ INNER JOIN PUBLIC.TEST2 /* PUBLIC.PRIMARY_KEY_4C: ID = TEST1.ID AND ID = TEST1.ID */ ON 1=1 /* WHERE TEST1.ID = TEST2.ID */ LEFT OUTER JOIN PUBLIC.TEST3 /* PUBLIC.PRIMARY_KEY_4C0: ID = TEST2.ID */ ON TEST2.ID = TEST3.ID WHERE (TEST3.ID IS NULL) AND (TEST1.ID = TEST2.ID)
> --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> SELECT TEST1.ID, TEST2.ID, TEST3.ID FROM PUBLIC.TEST2 /* PUBLIC.TEST2.tableScan */ LEFT OUTER JOIN PUBLIC.TEST3 /* PUBLIC.PRIMARY_KEY_4C0: ID = TEST2.ID */ ON TEST2.ID = TEST3.ID INNER JOIN PUBLIC.TEST1 /* PUBLIC.PRIMARY_KEY_4: ID = TEST2.ID */ ON 1=1 WHERE (TEST3.ID IS NULL) AND (TEST1.ID = TEST2.ID)
> rows: 1
insert into test1 select x from system_range(2, 1000);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论