Subversion Repositories general

Compare Revisions

Ignore whitespace Rev 1360 → Rev 1361

/xmlparser_java/branches/001_buffer_move/XmlParser.java
598,8 → 598,6
private boolean parseCharData(XmlSelection sel)
throws XmlException, IOException
{
// FIXME allow 'Reference' here
 
boolean found = false;
saveSelBegin(sel);
 
609,7 → 607,7
}
 
byte c = buf[bufPos];
if(c == '<' || c == '&') {
if(c == '<') {
break;
}
 
/xmlparser_java/branches/003_cycle_buffer/CycleInputBuffer.java
9,14 → 9,10
private InputStream in;
private String charsetName = "UTF-8";
 
// following members and the method toNextBuffer() are public
// but they should be accessed directly only is speed critical applications;
// common way is to use cur() and toNext() methods
public byte[][] bufs = new byte[2][BUFFER_SIZE]; // buffers
public int bufCur = -1; // current buffer number from the 'bufs'
public int lenCur = 0; // length of valid data in current buffer
public int pos = 0; // position in buffers, 0 means begin of current one
 
private byte[][] bufs = new byte[2][BUFFER_SIZE]; // buffers
private int bufCur = -1; // current buffer number from the 'bufs'
private int lenCur = 0; // length of valid data in current buffer
private int pos = 0; // position in buffers, 0 means begin of current one
private int[] lens = new int[bufs.length]; // lengthes of valid data in buffers
private int bufNext = -1; // next buffer to be used, -1 in not loaded
private int bufPrev = -1; // previous buffer was used, -1 in not loaded
66,13 → 62,15
byte[] buf;
 
if(p >= lenCur) {
if(!loadNextBuffer()) return 0; // no more data
if(!loadNextBuffer()) {
throw new IOException("end of input stream");
}
 
p -= lenCur;
buf = bufs[bufNext];
 
if(p > lens[bufNext]) {
return 0;
throw new IOException("end of input stream");
}
}
else {
89,9 → 87,16
if(pos >= lenCur) toNextBuffer();
}
 
public void toNextBuffer()
public void skip(int n)
throws IOException
{
pos += n;
while(pos >= lenCur) toNextBuffer();
}
 
private void toNextBuffer()
throws IOException
{
if(bufNext < 0) {
if(!loadNextBuffer()) return;
}
227,34 → 232,6
}
 
// == TEST METHODS =================================================================================================
private static void testDirectAccess(CycleInputBuffer buf)
throws Exception
{
for(int i = 0; !buf.isEnd(); ++i) {
byte c;
if(buf.pos >= buf.lenCur) {
c = buf.at(0);
}
else {
c = buf.bufs[buf.bufCur][buf.pos];
}
//System.out.print((char)c);
 
++buf.pos;
if(buf.pos >= buf.lenCur) buf.toNextBuffer();
}
}
private static void testIndirectAccess(CycleInputBuffer buf)
throws Exception
{
for(int i = 0; !buf.isEnd(); ++i) {
byte c = buf.cur();
//System.out.print((char)c);
buf.toNext();
}
}
public static void main(String[] args)
throws Exception
{
270,21 → 247,13
try {
long startTime = System.nanoTime();
CycleInputBuffer buf = new CycleInputBuffer(in);
testDirectAccess(buf);
System.out.println("Direct access: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
finally {
if(in != null) in.close();
}
 
in = new FileInputStream(fileName);
try {
long startTime = System.nanoTime();
CycleInputBuffer buf = new CycleInputBuffer(in);
 
testIndirectAccess(buf);
System.out.println("Indirect access: " + (System.nanoTime() - startTime) / 1000000 + "ms");
while(!buf.isEnd()) {
byte c = buf.cur();
//System.out.print((char)c);
buf.toNext();
}
System.out.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
finally {
if(in != null) in.close();
/xmlparser_java/branches/003_cycle_buffer/XmlParser.java
136,192 → 136,11
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlBuffer
{
private InputStream in;
private byte[] buf = new byte[2*2048]; // (buf.length % 2 == 0)
private int size = buf.length;
private int half = size/2;
private long len = -1;
private long pos = 0;
private boolean second = true; // which half of buffer is used
private boolean end = false;
private long line = 1;
private long linePos = 1;
private long lastPosUsed = pos;
private long marked = -1;
private long markedLine = -1;
private long markedLinePos = -1;
public XmlBuffer(InputStream in)
throws IOException
{
if(buf.length % 2 != 0) {
throw new RuntimeException("wrong buffer size: " + buf.length);
}
this.in = in;
ensure(0);
}
 
public byte cur()
throws IOException
{
return at(0);
}
 
public byte at(int n)
throws IOException
{
if(pos + n >= len) {
if(!ensure(pos + n)) return 0;
}
 
int p = (int)((pos + n) % size);
 
byte c = buf[p];
if(lastPosUsed != pos) {
if(c == '\r') {
}
else if(c == '\n') {
++line;
linePos = 0; // a small issue: the \n is counted at zero position of next line
}
else {
++linePos;
}
lastPosUsed = pos;
}
return c;
}
 
public void toNext()
throws IOException
{
if(lastPosUsed != pos) {
cur();
}
++pos;
}
 
public boolean isEnd()
{
return (end && pos >= len);
}
 
public void skip(int n)
throws IOException
{
//pos += n;
for(int i = 0; i < n; ++i) toNext();
}
 
public long getPosition()
{
return pos;
}
 
public long getLine()
{
return line;
}
 
public long getLinePosition()
{
return linePos;
}
 
public void mark()
{
marked = pos;
markedLine = line;
markedLinePos = linePos;
}
 
public void reset()
{
if(marked < 0) {
throw new RuntimeException("no position saved");
}
else {
pos = marked;
line = markedLine;
linePos = markedLinePos;
lastPosUsed = pos;
marked = -1;
markedLine = -1;
markedLinePos = -1;
}
}
 
private boolean ensure(long p)
throws IOException
{
if(end) {
return false;
}
else if(len >= 0 && p < len) {
return true;
}
 
if(len < 0) {
len = 0;
second = false;
}
else {
second = !second;
}
 
int read = in.read(buf, (second ? half : 0), half);
if(read < 0) {
end = true;
return false;
}
 
if(marked >= 0 && marked < len - half) {
marked = -1;
}
 
len += read;
 
return true;
}
public String selectionToString(long begin, int length)
throws IOException
{
if(begin < ((len-1)/half-1)*half) return "";
if(begin+length >= len) return "";
 
int p1 = (int)(begin % size);
int p2 = (int)((begin+length) % size);
 
if(p1 > p2) {
if(second) return "";
 
return new String(buf, p1, size-p1, "UTF-8")
+ new String(buf, 0, p2, "UTF-8");
}
else if(p1 < half && p2 >= half) {
if(!second) return "";
 
return new String(buf, p1, half-p1, "UTF-8")
+ new String(buf, half, p2-half, "UTF-8");
}
else {
return new String(buf, p1, p2-p1, "UTF-8");
}
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlDocument
{
private XmlListener listener;
private XmlBuffer buf;
private int level;
private XmlListener listener;
private CycleInputBuffer buf;
private int level;
 
public XmlDocument(XmlListener listener)
{
331,7 → 150,7
public void parse(InputStream in)
throws XmlException, IOException
{
this.buf = new XmlBuffer(in);
this.buf = new CycleInputBuffer(in);
this.level = 0;
if(listener != null) listener.init(this);
348,7 → 167,7
private void throwException(String message)
throws XmlException, IOException
{
throw new XmlException(message, buf.getLine(), buf.getLinePosition());
throw new XmlException(message, 0, buf.getPosition());
}
 
private void log(String message)
359,7 → 178,7
public String selectionToString(XmlSelection sel)
throws IOException
{
return buf.selectionToString(sel.begin, (int)sel.getLength());
return buf.toString(sel.begin, (int)sel.getLength());
}
private void saveSelBegin(XmlSelection sel)
389,12 → 208,9
{
boolean found = false;
for(;;) {
while(!buf.isEnd()) {
byte c = buf.cur();
if(buf.isEnd()) {
break;
}
else if(c == ' ' || c == '\t' || c == '\n' || c == '\r') {
if(c == ' ' || c == '\t' || c == '\n' || c == '\r') {
found = true;
buf.toNext();
}
454,10 → 270,6
}
for(;;) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
c = buf.cur();
if(('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9')
|| c == '.' || c == '-' || c == '_' || c == ':')
511,10 → 323,6
saveSelBegin(selValue);
for(;; buf.toNext()) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
byte c = buf.cur();
if(c == '<' || c == '&' || c == '"') {
break;
534,7 → 342,10
private boolean parseMisc()
throws XmlException, IOException
{
if(parseComment()) {
if(buf.isEnd()) {
return false;
}
else if(parseComment()) {
return true;
}
else if(parseProcessInstruction()) {
557,10 → 368,6
buf.skip(4);
for(;; buf.toNext()) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
if(buf.at(0) == '-' && buf.at(1) == '-') {
if(buf.at(2) == '>') {
buf.skip(3);
583,10 → 390,6
buf.skip(2);
for(;; buf.toNext()) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
 
if(buf.at(0) == '?' && buf.at(1) == '>') {
buf.skip(2);
634,10 → 437,6
private boolean parseStartTag(XmlElement element)
throws XmlException, IOException
{
long pos = buf.getPosition();
long line = buf.getLine();
long linePos = buf.getLinePosition();
// begin
if(!testChar('<')) {
return false;
696,9 → 495,6
{
XmlSelection sel = new XmlSelection();
for(;;) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
if(buf.at(0) == '<' && buf.at(1) == '/') break;
if(parseElement() != null) {
724,20 → 520,10
private boolean parseCharData(XmlSelection sel)
throws XmlException, IOException
{
// FIXME allow 'Reference' here
 
boolean found = false;
saveSelBegin(sel);
for(;; buf.toNext()) {
byte c = buf.cur();
if(buf.isEnd()) {
throwException("unexpected EoF");
}
else if(c == '<' || c == '&') {
break;
}
for(; buf.cur() != '<'; buf.toNext()) {
found = true;
}
763,10 → 549,6
saveSelBegin(sel);
for(;; buf.toNext()) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
 
if(buf.at(0) == ']' && buf.at(1) == ']' && buf.at(2) == '>') {
saveSelEnd(sel);
buf.skip(3);