Subversion Repositories general

Compare Revisions

Ignore whitespace Rev 1358 → Rev 1359

/xmlparser_java/branches/003_cycle_buffer/XmlParser2.java
1,69 → 1,33
import java.io.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileInputStream;
 
public class XmlParser2
public class CycleInputBuffer
{
public static void main(String[] args)
throws Exception
{
if(args.length < 1) {
System.err.println("Need file name as parameter");
return;
}
long startTime = System.nanoTime();
public static final int BUFFER_SIZE = 4096;
private InputStream in;
private String charsetName = "UTF-8";
 
FileInputStream in = new FileInputStream(args[0]);
try {
XmlBuffer buf = new XmlBuffer(in);
long pos = -1;
for(int i = 0; !buf.isEnd(); ++i) {
byte c = buf.cur();
//System.out.print((char)c);
buf.toNext();
/*if(i == 8 || i == 40) {
buf.mark();
}
else if(i == 14 || i == 42) {
buf.reset();
}
if(i % 7 == 0) {
if(pos >= 0) {
System.out.print(buf.toString(pos, (int)(buf.getPosition() - pos)));
}
pos = buf.getPosition();
}*/
}
}
finally {
in.close();
}
// following members and the method toNextBuffer() are public
// but they should be accessed directly only is speed critical applications;
// common way is to use cur() and toNext() methods
public byte[][] bufs = new byte[2][BUFFER_SIZE]; // buffers
public int bufCur = -1; // current buffer number from the 'bufs'
public int lenCur = 0; // length of valid data in current buffer
public int pos = 0; // position in buffers, 0 means begin of current one
 
System.out.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
}
private int[] lens = new int[bufs.length]; // lengthes of valid data in buffers
private int bufNext = -1; // next buffer to be used, -1 in not loaded
private int bufPrev = -1; // previous buffer was used, -1 in not loaded
private boolean end = false; // no more data in the input stream
private long offset = 0; // offset in input for begin of current buffer
 
class XmlBuffer
{
private InputStream in;
private String charsetName = "UTF-8";
private byte[][] bufs = new byte[2][4096];
private int[] lens = new int[bufs.length]; // lengthes of valid data in buffers
private int bufCur = -1;
private int bufNext = -1;
private int bufPrev = -1;
private int lenCur = 0; // length of valid data in current buffer
private int pos = 0; // position in buffers, 0 means begin of current one
private boolean end = false; // no more data in the input stream
private long offset = 0; // offset in input for begin of current buffer
// saved 'offset' and 'pos'
private long markedOffset = -1;
private int markedPos = 0;
 
public XmlBuffer(InputStream in)
public CycleInputBuffer(InputStream in)
throws IOException
{
this.in = in;
78,12 → 42,12
{
return charsetName;
}
 
public void setCharsetName(String charsetName)
{
this.charsetName = charsetName;
}
 
public byte cur()
throws IOException
{
90,9 → 54,9
if(pos >= lenCur) {
return at(0);
}
 
//System.out.println("cur " + offset + "+" + pos + " [" + (char)bufs[bufCur][pos] + "]");
return bufs[bufCur][pos];
else {
return bufs[bufCur][pos];
}
}
 
public byte at(int n)
102,7 → 66,6
byte[] buf;
 
if(p >= lenCur) {
//System.out.println("at p1 " + p + " " + lenCur);
if(!loadNextBuffer()) return 0; // no more data
 
p -= lenCur;
111,14 → 74,11
if(p > lens[bufNext]) {
return 0;
}
//System.out.println("at p2 " + p + " " + bufNext);
}
else {
//System.out.println("at p3 " + p + " " + lenCur + " " + bufCur);
buf = bufs[bufCur];
}
 
//System.out.println("at " + offset + "+" + pos + "+" + p + " [" + (char)buf[p] + "]");
return buf[p];
}
 
126,20 → 86,22
throws IOException
{
++pos;
if(pos >= lenCur) toNextBuffer();
}
 
if(pos >= lenCur) {
//System.out.println("toNext " + pos + " " + lenCur);
if(bufNext < 0) {
if(!loadNextBuffer()) return;
}
public void toNextBuffer()
throws IOException
{
if(bufNext < 0) {
if(!loadNextBuffer()) return;
}
 
pos -= lenCur;
offset += lenCur;
bufPrev = bufCur;
bufCur = bufNext;
lenCur = lens[bufCur];
bufNext = -1;
}
pos -= lenCur;
offset += lenCur;
bufPrev = bufCur;
bufCur = bufNext;
lenCur = lens[bufCur];
bufNext = -1;
}
 
public boolean isEnd()
149,11 → 111,10
 
public void mark()
{
//System.out.println("mark " + offset + "+" + pos + " " + bufPrev + " " + bufCur + " " + bufNext);
markedOffset = offset;
markedPos = pos;
}
 
public void reset()
throws IOException
{
161,13 → 122,12
throw new IOException("no mark");
}
 
//System.out.println("reset p1 " + offset + "+" + pos + " " + bufPrev + " " + bufCur + " " + bufNext);
int diff = (int)(offset - markedOffset);
if(diff > 0) { // moved to next buffer after the 'mark'
if(bufPrev < 0 || diff > lens[bufPrev]) {
throw new IOException("mark too old");
}
 
bufNext = bufCur;
bufCur = bufPrev;
offset = markedOffset;
178,13 → 138,11
lenCur = lens[bufCur];
 
markedOffset = -1;
//System.out.println("reset p2 " + offset + "+" + pos + " " + bufPrev + " " + bufCur + " " + bufNext);
}
 
private boolean loadNextBuffer()
throws IOException
{
//System.out.println("loadNextBuffer p1");
if(end) return false;
 
bufNext = (bufCur + 1);
192,7 → 150,6
if(bufNext == bufPrev) bufPrev = -1;
 
lens[bufNext] = in.read(bufs[bufNext]);
//System.out.println("loadNextBuffer p2 " + bufNext + " " + lens[bufNext]);
if(lens[bufNext] < 0) {
end = true;
 
199,7 → 156,6
return false;
}
 
//System.out.println("loadNextBuffer p3");
return true;
}
 
213,14 → 169,14
public int p;
public int bufNumber;
}
 
private BufferPositon findPositionInBuffers(long pos)
throws IOException
{
BufferPositon res = new BufferPositon();
 
res.p = (int)(pos - offset);
 
if(res.p >= lens[bufCur]) { // next buffer
res.p -= lens[bufCur];
res.bufNumber = bufNext;
236,15 → 192,15
if(res.bufNumber < 0 || res.p < 0 || res.p >= lens[res.bufNumber]) {
throw new IOException("position not found in buffers");
}
 
return res;
}
 
public String toString(long begin, int length)
throws IOException
{
BufferPositon pos = findPositionInBuffers(begin);
 
int l = lens[pos.bufNumber] - pos.p;
if(length <= l) {
return new String(bufs[pos.bufNumber], pos.p, length, charsetName);
254,12 → 210,12
+ toString(begin + l, length - l);
}
}
 
public void copy(long begin, int length, byte[] buf, int bufBegin)
throws IOException
{
BufferPositon pos = findPositionInBuffers(begin);
 
int l = lens[pos.bufNumber] - pos.p;
if(length <= l) {
System.arraycopy(bufs[pos.bufNumber], pos.p, buf, bufBegin, length);
269,4 → 225,67
copy(begin + l, length - l, buf, bufBegin + l);
}
}
 
// == TEST METHODS =================================================================================================
private static void testDirectAccess(CycleInputBuffer buf)
{
for(int i = 0; !buf.isEnd(); ++i) {
byte c;
if(buf.pos >= buf.lenCur) {
c = buf.at(0);
}
else {
c = buf.bufs[buf.bufCur][buf.pos];
}
//System.out.print((char)c);
 
++buf.pos;
if(buf.pos >= buf.lenCur) buf.toNextBuffer();
}
}
private static void testIndirectAccess(CycleInputBuffer buf)
{
for(int i = 0; !buf.isEnd(); ++i) {
byte c = buf.cur();
//System.out.print((char)c);
buf.toNext();
}
}
public static void main(String[] args)
throws Exception
{
if(args.length < 1) {
System.err.println("Need file name as parameter");
return;
}
 
long startTime;
String fileName = args[0];
FileInputStream in = null;
CycleBuffer buf;
 
in = new FileInputStream(fileName);
try {
startTime = System.nanoTime();
CycleInputBuffer buf = new CycleInputBuffer(in);
testDirectAccess(buf);
System.out.println("Direct access: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
finally {
if(in != null) in.close();
}
 
in = new FileInputStream(fileName);
try {
startTime = System.nanoTime();
CycleInputBuffer buf = new CycleInputBuffer(in);
testIndirectAccess(buf);
System.out.println("Indirect access: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
finally {
if(in != null) in.close();
}
}
}