1,69 → 1,33 |
import java.io.*; |
import java.io.IOException; |
import java.io.InputStream; |
import java.io.FileInputStream; |
|
public class XmlParser2 |
public class CycleInputBuffer |
{ |
public static void main(String[] args) |
throws Exception |
{ |
if(args.length < 1) { |
System.err.println("Need file name as parameter"); |
return; |
} |
|
long startTime = System.nanoTime(); |
public static final int BUFFER_SIZE = 4096; |
|
private InputStream in; |
private String charsetName = "UTF-8"; |
|
FileInputStream in = new FileInputStream(args[0]); |
|
try { |
XmlBuffer buf = new XmlBuffer(in); |
long pos = -1; |
|
for(int i = 0; !buf.isEnd(); ++i) { |
byte c = buf.cur(); |
//System.out.print((char)c); |
buf.toNext(); |
/*if(i == 8 || i == 40) { |
buf.mark(); |
} |
else if(i == 14 || i == 42) { |
buf.reset(); |
} |
|
if(i % 7 == 0) { |
if(pos >= 0) { |
System.out.print(buf.toString(pos, (int)(buf.getPosition() - pos))); |
} |
pos = buf.getPosition(); |
}*/ |
} |
} |
finally { |
in.close(); |
} |
// following members and the method toNextBuffer() are public |
// but they should be accessed directly only is speed critical applications; |
// common way is to use cur() and toNext() methods |
public byte[][] bufs = new byte[2][BUFFER_SIZE]; // buffers |
public int bufCur = -1; // current buffer number from the 'bufs' |
public int lenCur = 0; // length of valid data in current buffer |
public int pos = 0; // position in buffers, 0 means begin of current one |
|
System.out.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms"); |
} |
} |
private int[] lens = new int[bufs.length]; // lengthes of valid data in buffers |
private int bufNext = -1; // next buffer to be used, -1 in not loaded |
private int bufPrev = -1; // previous buffer was used, -1 in not loaded |
private boolean end = false; // no more data in the input stream |
private long offset = 0; // offset in input for begin of current buffer |
|
class XmlBuffer |
{ |
private InputStream in; |
private String charsetName = "UTF-8"; |
|
private byte[][] bufs = new byte[2][4096]; |
private int[] lens = new int[bufs.length]; // lengthes of valid data in buffers |
private int bufCur = -1; |
private int bufNext = -1; |
private int bufPrev = -1; |
private int lenCur = 0; // length of valid data in current buffer |
private int pos = 0; // position in buffers, 0 means begin of current one |
private boolean end = false; // no more data in the input stream |
private long offset = 0; // offset in input for begin of current buffer |
|
// saved 'offset' and 'pos' |
private long markedOffset = -1; |
private int markedPos = 0; |
|
public XmlBuffer(InputStream in) |
public CycleInputBuffer(InputStream in) |
throws IOException |
{ |
this.in = in; |
78,12 → 42,12 |
{ |
return charsetName; |
} |
|
|
public void setCharsetName(String charsetName) |
{ |
this.charsetName = charsetName; |
} |
|
|
public byte cur() |
throws IOException |
{ |
90,9 → 54,9 |
if(pos >= lenCur) { |
return at(0); |
} |
|
//System.out.println("cur " + offset + "+" + pos + " [" + (char)bufs[bufCur][pos] + "]"); |
return bufs[bufCur][pos]; |
else { |
return bufs[bufCur][pos]; |
} |
} |
|
public byte at(int n) |
102,7 → 66,6 |
byte[] buf; |
|
if(p >= lenCur) { |
//System.out.println("at p1 " + p + " " + lenCur); |
if(!loadNextBuffer()) return 0; // no more data |
|
p -= lenCur; |
111,14 → 74,11 |
if(p > lens[bufNext]) { |
return 0; |
} |
//System.out.println("at p2 " + p + " " + bufNext); |
} |
else { |
//System.out.println("at p3 " + p + " " + lenCur + " " + bufCur); |
buf = bufs[bufCur]; |
} |
|
//System.out.println("at " + offset + "+" + pos + "+" + p + " [" + (char)buf[p] + "]"); |
return buf[p]; |
} |
|
126,20 → 86,22 |
throws IOException |
{ |
++pos; |
if(pos >= lenCur) toNextBuffer(); |
} |
|
if(pos >= lenCur) { |
//System.out.println("toNext " + pos + " " + lenCur); |
if(bufNext < 0) { |
if(!loadNextBuffer()) return; |
} |
public void toNextBuffer() |
throws IOException |
{ |
if(bufNext < 0) { |
if(!loadNextBuffer()) return; |
} |
|
pos -= lenCur; |
offset += lenCur; |
bufPrev = bufCur; |
bufCur = bufNext; |
lenCur = lens[bufCur]; |
bufNext = -1; |
} |
pos -= lenCur; |
offset += lenCur; |
bufPrev = bufCur; |
bufCur = bufNext; |
lenCur = lens[bufCur]; |
bufNext = -1; |
} |
|
public boolean isEnd() |
149,11 → 111,10 |
|
public void mark() |
{ |
//System.out.println("mark " + offset + "+" + pos + " " + bufPrev + " " + bufCur + " " + bufNext); |
markedOffset = offset; |
markedPos = pos; |
} |
|
|
public void reset() |
throws IOException |
{ |
161,13 → 122,12 |
throw new IOException("no mark"); |
} |
|
//System.out.println("reset p1 " + offset + "+" + pos + " " + bufPrev + " " + bufCur + " " + bufNext); |
int diff = (int)(offset - markedOffset); |
if(diff > 0) { // moved to next buffer after the 'mark' |
if(bufPrev < 0 || diff > lens[bufPrev]) { |
throw new IOException("mark too old"); |
} |
|
|
bufNext = bufCur; |
bufCur = bufPrev; |
offset = markedOffset; |
178,13 → 138,11 |
lenCur = lens[bufCur]; |
|
markedOffset = -1; |
//System.out.println("reset p2 " + offset + "+" + pos + " " + bufPrev + " " + bufCur + " " + bufNext); |
} |
|
private boolean loadNextBuffer() |
throws IOException |
{ |
//System.out.println("loadNextBuffer p1"); |
if(end) return false; |
|
bufNext = (bufCur + 1); |
192,7 → 150,6 |
if(bufNext == bufPrev) bufPrev = -1; |
|
lens[bufNext] = in.read(bufs[bufNext]); |
//System.out.println("loadNextBuffer p2 " + bufNext + " " + lens[bufNext]); |
if(lens[bufNext] < 0) { |
end = true; |
|
199,7 → 156,6 |
return false; |
} |
|
//System.out.println("loadNextBuffer p3"); |
return true; |
} |
|
213,14 → 169,14 |
public int p; |
public int bufNumber; |
} |
|
|
private BufferPositon findPositionInBuffers(long pos) |
throws IOException |
{ |
BufferPositon res = new BufferPositon(); |
|
|
res.p = (int)(pos - offset); |
|
|
if(res.p >= lens[bufCur]) { // next buffer |
res.p -= lens[bufCur]; |
res.bufNumber = bufNext; |
236,15 → 192,15 |
if(res.bufNumber < 0 || res.p < 0 || res.p >= lens[res.bufNumber]) { |
throw new IOException("position not found in buffers"); |
} |
|
|
return res; |
} |
|
|
public String toString(long begin, int length) |
throws IOException |
{ |
BufferPositon pos = findPositionInBuffers(begin); |
|
|
int l = lens[pos.bufNumber] - pos.p; |
if(length <= l) { |
return new String(bufs[pos.bufNumber], pos.p, length, charsetName); |
254,12 → 210,12 |
+ toString(begin + l, length - l); |
} |
} |
|
|
public void copy(long begin, int length, byte[] buf, int bufBegin) |
throws IOException |
{ |
BufferPositon pos = findPositionInBuffers(begin); |
|
|
int l = lens[pos.bufNumber] - pos.p; |
if(length <= l) { |
System.arraycopy(bufs[pos.bufNumber], pos.p, buf, bufBegin, length); |
269,4 → 225,67 |
copy(begin + l, length - l, buf, bufBegin + l); |
} |
} |
|
// == TEST METHODS ================================================================================================= |
private static void testDirectAccess(CycleInputBuffer buf) |
{ |
for(int i = 0; !buf.isEnd(); ++i) { |
byte c; |
if(buf.pos >= buf.lenCur) { |
c = buf.at(0); |
} |
else { |
c = buf.bufs[buf.bufCur][buf.pos]; |
} |
//System.out.print((char)c); |
|
++buf.pos; |
if(buf.pos >= buf.lenCur) buf.toNextBuffer(); |
} |
} |
|
private static void testIndirectAccess(CycleInputBuffer buf) |
{ |
for(int i = 0; !buf.isEnd(); ++i) { |
byte c = buf.cur(); |
//System.out.print((char)c); |
buf.toNext(); |
} |
} |
|
public static void main(String[] args) |
throws Exception |
{ |
if(args.length < 1) { |
System.err.println("Need file name as parameter"); |
return; |
} |
|
long startTime; |
String fileName = args[0]; |
FileInputStream in = null; |
CycleBuffer buf; |
|
in = new FileInputStream(fileName); |
try { |
startTime = System.nanoTime(); |
CycleInputBuffer buf = new CycleInputBuffer(in); |
testDirectAccess(buf); |
System.out.println("Direct access: " + (System.nanoTime() - startTime) / 1000000 + "ms"); |
} |
finally { |
if(in != null) in.close(); |
} |
|
in = new FileInputStream(fileName); |
try { |
startTime = System.nanoTime(); |
CycleInputBuffer buf = new CycleInputBuffer(in); |
testIndirectAccess(buf); |
System.out.println("Indirect access: " + (System.nanoTime() - startTime) / 1000000 + "ms"); |
} |
finally { |
if(in != null) in.close(); |
} |
} |
} |