提交 301ae834 authored 作者: Thomas Mueller's avatar Thomas Mueller

Shrink the javascript documentation search index

上级 d42b9406
......@@ -16,7 +16,6 @@ import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.StringTokenizer;
import org.h2.util.IOUtils;
import org.h2.util.StringUtils;
......@@ -30,6 +29,10 @@ public class Indexer {
private static final int MAX_RELATIONS = 20;
private ArrayList pages = new ArrayList();
/**
* Lower case word to Word map.
*/
private HashMap words = new HashMap();
private HashSet noIndex = new HashSet();
private ArrayList wordList;
......@@ -87,20 +90,35 @@ public class Indexer {
}
private void sortWords() {
ArrayList names = new ArrayList(words.keySet());
for (int i = 0; i < names.size(); i++) {
String name = (String) names.get(i);
if (name.endsWith("s")) {
String singular = name.substring(0, name.length() - 1);
if (words.containsKey(singular)) {
Word wp = (Word) words.get(name);
Word ws = (Word) words.get(singular);
ws.addAll(wp);
words.remove(name);
}
} else if (name.startsWith("abc")) {
words.remove(name);
}
}
wordList = new ArrayList(words.values());
// TODO support ignored keywords (to shrink the index)
// String ignored = "";
// for(int i=0; i<wordList.size(); i++) {
// Word word = (Word) wordList.get(i);
// if(word.pages.size() >= pages.size()/4) {
// wordList.remove(i);
// if(ignored.length()==0) {
// ignored += ",";
// }
// ignored += word.name;
// i--;
// }
// }
// ignored very common words (to shrink the index)
String ignored = "";
for (int i = 0; i < wordList.size(); i++) {
Word word = (Word) wordList.get(i);
if (word.pages.size() >= pages.size() / 4) {
wordList.remove(i);
if (ignored.length() > 0) {
ignored += ",";
}
ignored += word.name;
i--;
}
}
// output.println("var ignored = '" + convertUTF(ignored) + "'");
// TODO support A, B, C,... class links in the index file and use them
// for combined AND searches
......
......@@ -42,4 +42,8 @@ public class Page {
this.fileName = fileName;
}
public String toString() {
return "p" + id + "(" + fileName + ")";
}
}
......@@ -35,4 +35,9 @@ public class Weight {
* The weight value.
*/
int value;
public String toString() {
return "" + value;
}
}
......@@ -10,6 +10,8 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* Represents a word of the full text index.
......@@ -21,7 +23,11 @@ public class Word {
*/
String name;
private HashMap pages = new HashMap();
/**
* The pages map.
*/
HashMap pages = new HashMap();
private ArrayList weightList;
Word(String name) {
......@@ -45,6 +51,24 @@ public class Word {
page.relations++;
}
public String toString() {
return name + ":" + pages;
}
/**
* Add all data of the other word to this word.
*
* @param other the other word
*/
void addAll(Word other) {
for (Iterator it = other.pages.entrySet().iterator(); it.hasNext();) {
Map.Entry entry = (Map.Entry) it.next();
Page p = (Page) entry.getKey();
Weight w = (Weight) entry.getValue();
addPage(p, w.value);
}
}
ArrayList getSortedWeights() {
if (weightList == null) {
weightList = new ArrayList(pages.values());
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论