Subversion Repositories general

Compare Revisions

Ignore whitespace Rev 1359 → Rev 1360

/xmlparser_java/branches/003_cycle_buffer/XmlParser2.java
File deleted
/xmlparser_java/branches/003_cycle_buffer/CycleInputBuffer.java
0,0 → 1,293
import java.io.IOException;
import java.io.InputStream;
import java.io.FileInputStream;
 
public class CycleInputBuffer
{
public static final int BUFFER_SIZE = 4096;
private InputStream in;
private String charsetName = "UTF-8";
 
// following members and the method toNextBuffer() are public
// but they should be accessed directly only is speed critical applications;
// common way is to use cur() and toNext() methods
public byte[][] bufs = new byte[2][BUFFER_SIZE]; // buffers
public int bufCur = -1; // current buffer number from the 'bufs'
public int lenCur = 0; // length of valid data in current buffer
public int pos = 0; // position in buffers, 0 means begin of current one
 
private int[] lens = new int[bufs.length]; // lengthes of valid data in buffers
private int bufNext = -1; // next buffer to be used, -1 in not loaded
private int bufPrev = -1; // previous buffer was used, -1 in not loaded
private boolean end = false; // no more data in the input stream
private long offset = 0; // offset in input for begin of current buffer
 
// saved 'offset' and 'pos'
private long markedOffset = -1;
private int markedPos = 0;
 
public CycleInputBuffer(InputStream in)
throws IOException
{
this.in = in;
 
loadNextBuffer();
bufCur = bufNext;
lenCur = lens[bufCur];
bufNext = -1;
}
 
public String getCharsetName()
{
return charsetName;
}
 
public void setCharsetName(String charsetName)
{
this.charsetName = charsetName;
}
 
public byte cur()
throws IOException
{
if(pos >= lenCur) {
return at(0);
}
else {
return bufs[bufCur][pos];
}
}
 
public byte at(int n)
throws IOException
{
int p = pos + n;
byte[] buf;
 
if(p >= lenCur) {
if(!loadNextBuffer()) return 0; // no more data
 
p -= lenCur;
buf = bufs[bufNext];
 
if(p > lens[bufNext]) {
return 0;
}
}
else {
buf = bufs[bufCur];
}
 
return buf[p];
}
 
public void toNext()
throws IOException
{
++pos;
if(pos >= lenCur) toNextBuffer();
}
 
public void toNextBuffer()
throws IOException
{
if(bufNext < 0) {
if(!loadNextBuffer()) return;
}
 
pos -= lenCur;
offset += lenCur;
bufPrev = bufCur;
bufCur = bufNext;
lenCur = lens[bufCur];
bufNext = -1;
}
 
public boolean isEnd()
{
return (end && pos >= lenCur);
}
 
public void mark()
{
markedOffset = offset;
markedPos = pos;
}
 
public void reset()
throws IOException
{
if(markedOffset < 0) {
throw new IOException("no mark");
}
 
int diff = (int)(offset - markedOffset);
if(diff > 0) { // moved to next buffer after the 'mark'
if(bufPrev < 0 || diff > lens[bufPrev]) {
throw new IOException("mark too old");
}
 
bufNext = bufCur;
bufCur = bufPrev;
offset = markedOffset;
}
// else still the same buffer
 
pos = markedPos;
lenCur = lens[bufCur];
 
markedOffset = -1;
}
 
private boolean loadNextBuffer()
throws IOException
{
if(end) return false;
 
bufNext = (bufCur + 1);
if(bufNext >= bufs.length) bufNext = 0;
if(bufNext == bufPrev) bufPrev = -1;
 
lens[bufNext] = in.read(bufs[bufNext]);
if(lens[bufNext] < 0) {
end = true;
 
return false;
}
 
return true;
}
 
public long getPosition()
{
return (offset + pos);
}
 
private class BufferPositon
{
public int p;
public int bufNumber;
}
 
private BufferPositon findPositionInBuffers(long pos)
throws IOException
{
BufferPositon res = new BufferPositon();
 
res.p = (int)(pos - offset);
 
if(res.p >= lens[bufCur]) { // next buffer
res.p -= lens[bufCur];
res.bufNumber = bufNext;
}
else if(res.p >= 0) { // current buffer
res.bufNumber = bufCur;
}
else { // previous buffer
if(bufPrev >= 0) res.p += lens[bufPrev];
res.bufNumber = bufPrev;
}
 
if(res.bufNumber < 0 || res.p < 0 || res.p >= lens[res.bufNumber]) {
throw new IOException("position not found in buffers");
}
 
return res;
}
 
public String toString(long begin, int length)
throws IOException
{
BufferPositon pos = findPositionInBuffers(begin);
 
int l = lens[pos.bufNumber] - pos.p;
if(length <= l) {
return new String(bufs[pos.bufNumber], pos.p, length, charsetName);
}
else {
return new String(bufs[pos.bufNumber], pos.p, l, charsetName)
+ toString(begin + l, length - l);
}
}
 
public void copy(long begin, int length, byte[] buf, int bufBegin)
throws IOException
{
BufferPositon pos = findPositionInBuffers(begin);
 
int l = lens[pos.bufNumber] - pos.p;
if(length <= l) {
System.arraycopy(bufs[pos.bufNumber], pos.p, buf, bufBegin, length);
}
else {
System.arraycopy(bufs[pos.bufNumber], pos.p, buf, bufBegin, l);
copy(begin + l, length - l, buf, bufBegin + l);
}
}
 
// == TEST METHODS =================================================================================================
private static void testDirectAccess(CycleInputBuffer buf)
throws Exception
{
for(int i = 0; !buf.isEnd(); ++i) {
byte c;
if(buf.pos >= buf.lenCur) {
c = buf.at(0);
}
else {
c = buf.bufs[buf.bufCur][buf.pos];
}
//System.out.print((char)c);
 
++buf.pos;
if(buf.pos >= buf.lenCur) buf.toNextBuffer();
}
}
private static void testIndirectAccess(CycleInputBuffer buf)
throws Exception
{
for(int i = 0; !buf.isEnd(); ++i) {
byte c = buf.cur();
//System.out.print((char)c);
buf.toNext();
}
}
public static void main(String[] args)
throws Exception
{
if(args.length < 1) {
System.err.println("Need file name as parameter");
return;
}
 
String fileName = args[0];
FileInputStream in = null;
 
in = new FileInputStream(fileName);
try {
long startTime = System.nanoTime();
CycleInputBuffer buf = new CycleInputBuffer(in);
testDirectAccess(buf);
System.out.println("Direct access: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
finally {
if(in != null) in.close();
}
 
in = new FileInputStream(fileName);
try {
long startTime = System.nanoTime();
CycleInputBuffer buf = new CycleInputBuffer(in);
 
testIndirectAccess(buf);
System.out.println("Indirect access: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
finally {
if(in != null) in.close();
}
}
}