提交 8d803f02 authored 作者: Thomas Mueller's avatar Thomas Mueller

Improved output and speed for large (>10 GB) directories.

上级 87be3ddb
...@@ -20,9 +20,10 @@ import java.io.InputStream; ...@@ -20,9 +20,10 @@ import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.zip.Deflater; import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream; import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream; import java.util.zip.InflaterInputStream;
...@@ -67,8 +68,29 @@ public class ArchiveTool { ...@@ -67,8 +68,29 @@ public class ArchiveTool {
} }
private static void compress(String fromDir, String toFile) throws IOException { private static void compress(String fromDir, String toFile) throws IOException {
long start = System.currentTimeMillis(); final long start = System.currentTimeMillis();
long size = getSize(new File(fromDir)); final AtomicBoolean title = new AtomicBoolean();
long size = getSize(new File(fromDir), new Runnable() {
int count;
long lastTime = start;
@Override
public void run() {
count++;
if (count % 1000 == 0) {
long now = System.currentTimeMillis();
if (now - lastTime > 3000) {
if (!title.getAndSet(true)) {
System.out.println("Counting files");
}
System.out.print(count + " ");
lastTime = now;
}
}
}
});
if (title.get()) {
System.out.println();
}
System.out.println("Compressing " + size / MB + " MB"); System.out.println("Compressing " + size / MB + " MB");
InputStream in = getDirectoryInputStream(fromDir); InputStream in = getDirectoryInputStream(fromDir);
String temp = toFile + ".temp"; String temp = toFile + ".temp";
...@@ -109,19 +131,20 @@ public class ArchiveTool { ...@@ -109,19 +131,20 @@ public class ArchiveTool {
" seconds"); " seconds");
} }
private static long getSize(File f) { private static long getSize(File f, Runnable r) {
// assume a metadata entry is 40 bytes // assume a metadata entry is 40 bytes
long size = 40; long size = 40;
if (f.isDirectory()) { if (f.isDirectory()) {
File[] list = f.listFiles(); File[] list = f.listFiles();
if (list != null) { if (list != null) {
for (File c : list) { for (File c : list) {
size += getSize(c); size += getSize(c, r);
} }
} }
} else { } else {
size += f.length(); size += f.length();
} }
r.run();
return size; return size;
} }
...@@ -391,11 +414,11 @@ public class ArchiveTool { ...@@ -391,11 +414,11 @@ public class ArchiveTool {
tempOut.close(); tempOut.close();
size = outPos; size = outPos;
inPos = 0; inPos = 0;
ArrayList<ChunkStream> segmentIn = new ArrayList<ChunkStream>(); TreeSet<ChunkStream> segmentIn = new TreeSet<ChunkStream>();
for (int i = 0; i < segmentStart.size(); i++) { for (int i = 0; i < segmentStart.size(); i++) {
in = new FileInputStream(tempFileName); in = new FileInputStream(tempFileName);
in.skip(segmentStart.get(i)); in.skip(segmentStart.get(i));
ChunkStream s = new ChunkStream(); ChunkStream s = new ChunkStream(i);
s.readKey = true; s.readKey = true;
s.in = new DataInputStream(new BufferedInputStream(in)); s.in = new DataInputStream(new BufferedInputStream(in));
inPos += s.readNext(); inPos += s.readNext();
...@@ -413,8 +436,8 @@ public class ArchiveTool { ...@@ -413,8 +436,8 @@ public class ArchiveTool {
// chunk: pos* 0 data // chunk: pos* 0 data
while (segmentIn.size() > 0) { while (segmentIn.size() > 0) {
Collections.sort(segmentIn); ChunkStream s = segmentIn.first();
ChunkStream s = segmentIn.get(0); segmentIn.remove(s);
Chunk c = s.current; Chunk c = s.current;
if (last == null) { if (last == null) {
last = c; last = c;
...@@ -428,8 +451,8 @@ public class ArchiveTool { ...@@ -428,8 +451,8 @@ public class ArchiveTool {
} }
inPos += s.readNext(); inPos += s.readNext();
lastTime = printProgress(lastTime, 50, 100, inPos, size); lastTime = printProgress(lastTime, 50, 100, inPos, size);
if (s.current == null) { if (s.current != null) {
segmentIn.remove(0); segmentIn.add(s);
} }
} }
if (last != null) { if (last != null) {
...@@ -585,11 +608,11 @@ public class ArchiveTool { ...@@ -585,11 +608,11 @@ public class ArchiveTool {
tempOut.close(); tempOut.close();
size = outPos; size = outPos;
inPos = 0; inPos = 0;
ArrayList<ChunkStream> segmentIn = new ArrayList<ChunkStream>(); TreeSet<ChunkStream> segmentIn = new TreeSet<ChunkStream>();
for (int i = 0; i < segmentStart.size(); i++) { for (int i = 0; i < segmentStart.size(); i++) {
FileInputStream f = new FileInputStream(tempFileName); FileInputStream f = new FileInputStream(tempFileName);
f.skip(segmentStart.get(i)); f.skip(segmentStart.get(i));
ChunkStream s = new ChunkStream(); ChunkStream s = new ChunkStream(i);
s.in = new DataInputStream(new BufferedInputStream(f)); s.in = new DataInputStream(new BufferedInputStream(f));
inPos += s.readNext(); inPos += s.readNext();
if (s.current != null) { if (s.current != null) {
...@@ -598,14 +621,14 @@ public class ArchiveTool { ...@@ -598,14 +621,14 @@ public class ArchiveTool {
} }
DataOutputStream dataOut = new DataOutputStream(out); DataOutputStream dataOut = new DataOutputStream(out);
while (segmentIn.size() > 0) { while (segmentIn.size() > 0) {
Collections.sort(segmentIn); ChunkStream s = segmentIn.first();
ChunkStream s = segmentIn.get(0); segmentIn.remove(s);
Chunk c = s.current; Chunk c = s.current;
dataOut.write(c.value); dataOut.write(c.value);
inPos += s.readNext(); inPos += s.readNext();
lastTime = printProgress(lastTime, 50, 100, inPos, size); lastTime = printProgress(lastTime, 50, 100, inPos, size);
if (s.current == null) { if (s.current != null) {
segmentIn.remove(0); segmentIn.add(s);
} }
} }
new File(tempFileName).delete(); new File(tempFileName).delete();
...@@ -616,10 +639,15 @@ public class ArchiveTool { ...@@ -616,10 +639,15 @@ public class ArchiveTool {
* A stream of chunks. * A stream of chunks.
*/ */
static class ChunkStream implements Comparable<ChunkStream> { static class ChunkStream implements Comparable<ChunkStream> {
final int id;
Chunk current; Chunk current;
DataInputStream in; DataInputStream in;
boolean readKey; boolean readKey;
ChunkStream(int id) {
this.id = id;
}
/** /**
* Read the next chunk. * Read the next chunk.
* *
...@@ -635,7 +663,11 @@ public class ArchiveTool { ...@@ -635,7 +663,11 @@ public class ArchiveTool {
@Override @Override
public int compareTo(ChunkStream o) { public int compareTo(ChunkStream o) {
return current.compareTo(o.current); int comp = current.compareTo(o.current);
if (comp != 0) {
return comp;
}
return Integer.signum(id - o.id);
} }
} }
...@@ -700,7 +732,7 @@ public class ArchiveTool { ...@@ -700,7 +732,7 @@ public class ArchiveTool {
} }
len += writeVarLong(out, 0); len += writeVarLong(out, 0);
if (writeKey) { if (writeKey) {
for (int i = 0; i < 4; i++) { for (int i = 0; i < sortKey.length; i++) {
out.writeInt(sortKey[i]); out.writeInt(sortKey[i]);
len += 4; len += 4;
} }
...@@ -722,6 +754,7 @@ public class ArchiveTool { ...@@ -722,6 +754,7 @@ public class ArchiveTool {
} else if (a > b) { } else if (a > b) {
return 1; return 1;
} }
return 0;
} }
for (int i = 0; i < sortKey.length; i++) { for (int i = 0; i < sortKey.length; i++) {
if (sortKey[i] < o.sortKey[i]) { if (sortKey[i] < o.sortKey[i]) {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论