/xmlparser_java/branches/003_cycle_buffer/XmlParser.java |
---|
19,9 → 19,11 |
FileInputStream in = new FileInputStream(args[0]); |
try { |
XmlDocument doc = new XmlDocument(); |
XmlListenerImpl listener = new XmlListenerImpl(); |
XmlDocument doc = new XmlDocument(listener); |
doc.parse(in); |
System.out.println(listener.count + " elements found"); |
} |
catch(XmlException ex) { |
System.out.println(ex); |
35,6 → 37,42 |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlListenerImpl |
implements XmlListener |
{ |
public long count = 0; |
private XmlDocument document; |
public void init(XmlDocument document) |
throws XmlException, IOException |
{ |
this.document = document; |
} |
public void processElementBegin(XmlElement element) |
throws XmlException, IOException |
{ |
++count; |
} |
public void processElementEnd(XmlElement element) |
throws XmlException, IOException |
{ |
} |
public void processCharData(XmlSelection sel) |
throws XmlException, IOException |
{ |
} |
public void processCData(XmlSelection sel) |
throws XmlException, IOException |
{ |
} |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlException |
extends Exception |
{ |
62,6 → 100,25 |
} |
// -------------------------------------------------------------------------------------------------------------------- |
interface XmlListener |
{ |
public void init(XmlDocument document) |
throws XmlException, IOException; |
public void processElementBegin(XmlElement element) |
throws XmlException, IOException; |
public void processElementEnd(XmlElement element) |
throws XmlException, IOException; |
public void processCharData(XmlSelection sel) |
throws XmlException, IOException; |
public void processCData(XmlSelection sel) |
throws XmlException, IOException; |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlSelection |
{ |
public long begin; |
74,6 → 131,8 |
class XmlElement |
{ |
public boolean isEmpty = false; |
public XmlSelection elementSel; |
public XmlSelection nameSel; |
} |
// -------------------------------------------------------------------------------------------------------------------- |
108,13 → 167,6 |
public byte cur() |
throws IOException |
{ |
/* if(pos >= len) { |
if(!ensure(pos)) return 0; |
} |
int p = (int)(pos % size); |
return buf[p];*/ |
return at(0); |
} |
141,10 → 193,7 |
lastPosUsed = pos; |
} |
//System.out.println("at " + pos + "+" + n + "=" + line + ":" + linePos + " (" + p + ") [" + (char)c + "]"); |
return c; |
// return buf[p]; |
} |
public void toNext() |
159,7 → 208,7 |
public boolean isEnd() |
{ |
return end; |
return (end && pos >= len); |
} |
public void skip(int n) |
226,7 → 275,10 |
} |
int read = in.read(buf, (second ? half : 0), half); |
if(read < 0) return false; |
if(read < 0) { |
end = true; |
return false; |
} |
if(marked >= 0 && marked < len - half) { |
marked = -1; |
233,40 → 285,32 |
} |
len += read; |
//System.out.println("ensure " + p + " " + len + " " + second); |
return true; |
} |
public String toString(long begin, int length) |
public String selectionToString(long begin, int length) |
throws IOException |
{ |
//System.out.println("toString " + begin + " " + length + " : " + len + " " + half + " " + ((len-1)/half-1)*half); |
if(begin < ((len-1)/half-1)*half) return ""; |
if(begin+length >= len) return ""; |
int p1 = (int)(begin % size); |
int p2 = (int)((begin+length) % size); |
//System.out.println(" toString p1 " + p1 + " " + p2); |
if(p1 > p2) { |
//System.out.println(" toString p2"); |
if(second) return ""; |
//System.out.println(" toString p3"); |
return new String(buf, p1, size-p1, "UTF-8") |
+ new String(buf, 0, p2, "UTF-8"); |
} |
else if(p1 < half && p2 >= half) { |
//System.out.println(" toString p4"); |
if(!second) return ""; |
//System.out.println(" toString p5"); |
return new String(buf, p1, half-p1, "UTF-8") |
+ new String(buf, half, p2-half, "UTF-8"); |
} |
else { |
//System.out.println(" toString p6"); |
return new String(buf, p1, p2-p1, "UTF-8"); |
} |
} |
275,20 → 319,29 |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlDocument |
{ |
private XmlListener listener; |
private XmlBuffer buf; |
private int level; |
public XmlDocument(XmlListener listener) |
{ |
this.listener = listener; |
} |
public void parse(InputStream in) |
throws XmlException, IOException |
{ |
this.buf = new XmlBuffer(in); |
this.level = 0; |
if(listener != null) listener.init(this); |
if(parseProlog()) { |
parseElement(); |
while(parseMisc()); |
/* |
if(this.pos < this.len) throwException("trash at end of text"); |
*/ |
if(!buf.isEnd()) throwException("EoF expected"); |
} |
} |
303,10 → 356,10 |
System.out.println(message); |
} |
private String toString(XmlSelection sel) |
public String selectionToString(XmlSelection sel) |
throws IOException |
{ |
return buf.toString(sel.begin, (int)sel.getLength()); |
return buf.selectionToString(sel.begin, (int)sel.getLength()); |
} |
private void saveSelBegin(XmlSelection sel) |
334,12 → 387,11 |
private boolean skipSpaces() |
throws XmlException, IOException |
{ |
//log("skipSpaces begin " + buf.getPosition()); |
boolean found = false; |
for(;;) { |
byte c = buf.cur(); |
if(c == 0) { |
if(buf.isEnd()) { |
break; |
} |
else if(c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
351,7 → 403,6 |
} |
} |
//log("skipSpaces " + found + " " + buf.getPosition()); |
return found; |
} |
358,13 → 409,10 |
private boolean parseDecl() |
throws XmlException, IOException |
{ |
//log("parseDecl begin " + buf.getPosition()); |
// begin |
if(!testChar('<') || !testChar('?') || !testChar('x') |
|| !testChar('m') || !testChar('l')) |
{ |
//log("parseDecl no 'xml' " + buf.getPosition()); |
return false; |
} |
381,7 → 429,6 |
throwException("end of XML declaration expected"); |
} |
//log("parseDecl ok " + buf.getPosition()); |
return true; |
} |
396,7 → 443,6 |
private boolean parseName(XmlSelection sel) |
throws XmlException, IOException |
{ |
//log("parseName begin " + buf.getPosition()); |
saveSelBegin(sel); |
byte c = buf.cur(); |
404,7 → 450,6 |
buf.toNext(); |
} |
else { |
//log("parseName not a name " + buf.getPosition()); |
return false; |
} |
425,7 → 470,6 |
} |
saveSelEnd(sel); |
//log("parseName ok " + buf.getPosition()); |
return true; |
} |
432,13 → 476,10 |
private boolean testChar(char c) |
throws XmlException, IOException |
{ |
//log("testChar begin " + buf.getPosition() + " [" + c + "]"); |
if(buf.cur() != c) { |
//log("testChar false " + buf.getPosition()); |
return false; |
} |
else { |
//log("testChar true " + buf.getPosition()); |
buf.toNext(); |
return true; |
} |
447,11 → 488,9 |
private boolean parseAttribute(XmlSelection selName, XmlSelection selValue) |
throws XmlException, IOException |
{ |
//log("parseAttribute begin " + buf.getPosition()); |
// name |
buf.mark(); |
if(!parseName(selName)) { |
//log("parseAttribute no name " + buf.getPosition()); |
buf.reset(); |
return false; |
} |
489,7 → 528,6 |
// FIXME check '[WFC: No External Entity References]' |
//log("parseAttribute ok " + buf.getPosition()); |
return true; |
} |
496,21 → 534,16 |
private boolean parseMisc() |
throws XmlException, IOException |
{ |
//log("parseMisc begin " + buf.getPosition()); |
if(parseComment()) { |
//log("parseMisc comment ok " + buf.getPosition()); |
return true; |
} |
else if(parseProcessInstruction()) { |
//log("parseMisc pi ok " + buf.getPosition()); |
return true; |
} |
else if(skipSpaces()) { |
//log("parseMisc spaces ok " + buf.getPosition()); |
return true; |
} |
else { |
//log("parseMisc false " + buf.getPosition()); |
return false; |
} |
} |
518,9 → 551,7 |
private boolean parseComment() |
throws XmlException, IOException |
{ |
//log("parseComment begin " + buf.getPosition()); |
if(buf.at(0) != '<' || buf.at(1) != '!' || buf.at(2) != '-' || buf.at(3) != '-') { |
//log("parseComment no signature " + buf.getPosition()); |
return false; |
} |
533,7 → 564,7 |
if(buf.at(0) == '-' && buf.at(1) == '-') { |
if(buf.at(2) == '>') { |
buf.skip(3); |
//log("parseComment ok " + buf.getPosition()); |
return true; |
} |
else { |
558,6 → 589,7 |
if(buf.at(0) == '?' && buf.at(1) == '>') { |
buf.skip(2); |
return true; |
} |
} |
566,15 → 598,18 |
private XmlElement parseElement() |
throws XmlException, IOException |
{ |
//log("parseElement begin " + buf.getPosition()); |
XmlElement element = new XmlElement(); |
element.elementSel = new XmlSelection(); |
saveSelBegin(element.elementSel); |
if(!parseStartTag(element)) { |
//log("parseElement no start tag " + buf.getPosition()); |
return null; |
} |
++level; |
if(listener != null) listener.processElementBegin(element); |
if(!element.isEmpty) { |
if(!parseTagContent(element)) { |
throwException("cannot parse tag content"); |
583,11 → 618,16 |
XmlSelection selEndName = new XmlSelection(); |
parseEndTag(selEndName); |
//if(element.name != selEndName |
//if(!selectionToString(element.nameSel).equals(selectionToString(selEndName))) { |
// throwException("tag names do not match"); |
//} |
} |
//log("parseElement ok " + buf.getPosition()); |
saveSelEnd(element.elementSel); |
if(listener != null) listener.processElementEnd(element); |
--level; |
return element; |
} |
594,7 → 634,6 |
private boolean parseStartTag(XmlElement element) |
throws XmlException, IOException |
{ |
//log("parseStartTag begin " + buf.getPosition()); |
long pos = buf.getPosition(); |
long line = buf.getLine(); |
long linePos = buf.getLinePosition(); |
601,18 → 640,17 |
// begin |
if(!testChar('<')) { |
//log("parseStartTag no signature " + buf.getPosition()); |
return false; |
} |
// name |
buf.mark(); |
XmlSelection sel = new XmlSelection(); |
if(!parseName(sel)) { |
element.nameSel = new XmlSelection(); |
if(!parseName(element.nameSel)) { |
buf.reset(); |
return false; |
} |
//System.out.print("[" + toString(sel) + "]@" + line + ":" + linePos + " (" + pos + ")"); |
// attributes |
XmlSelection selName = new XmlSelection(); |
620,9 → 658,7 |
for(;;) { |
if(!skipSpaces()) break; |
if(!parseAttribute(selName, selValue)) break; |
//System.out.print(" [" + toString(selName) + "]=[" + toString(selValue) + "]"); |
} |
//System.out.println(); |
// end |
element.isEmpty = testChar('/'); |
630,7 → 666,6 |
throwException("end of tag expected"); |
} |
//log("parseStartTag ok " + buf.getPosition()); |
return true; |
} |
637,7 → 672,6 |
private void parseEndTag(XmlSelection sel) |
throws XmlException, IOException |
{ |
//log("parseEndTag begin " + buf.getPosition()); |
// begin |
if(!testChar('<') || !testChar('/')) { |
throwException("cannot find tag end"); |
655,14 → 689,11 |
if(!testChar('>')) { |
throwException("end of tag expected"); |
} |
//log("parseEndTag ok " + buf.getPosition()); |
} |
private boolean parseTagContent(XmlElement element) |
throws XmlException, IOException |
{ |
//log("parseTagContent begin " + buf.getPosition()); |
XmlSelection sel = new XmlSelection(); |
for(;;) { |
if(buf.isEnd()) { |
680,25 → 711,30 |
} |
else if(parseCharData(sel)) { |
} |
else { |
throwException("unexpected tag content"); |
} |
// FIXME allow 'Reference' here |
} |
//log("parseTagContent ok " + buf.getPosition()); |
return true; |
} |
// FIXME not fully conform the standard |
private boolean parseCharData(XmlSelection sel) |
throws XmlException, IOException |
{ |
//log("parseCharData begin " + buf.getPosition()); |
// FIXME allow 'Reference' here |
boolean found = false; |
saveSelBegin(sel); |
for(;; buf.toNext()) { |
byte c = buf.cur(); |
if(c == 0 || c == '<') { |
if(buf.isEnd()) { |
throwException("unexpected EoF"); |
} |
else if(c == '<' || c == '&') { |
break; |
} |
705,8 → 741,9 |
found = true; |
} |
//log("parseCharData " + found + " " + buf.getPosition()); |
saveSelEnd(sel); |
if(listener != null) listener.processCharData(sel); |
return found; |
} |
733,6 → 770,8 |
if(buf.at(0) == ']' && buf.at(1) == ']' && buf.at(2) == '>') { |
saveSelEnd(sel); |
buf.skip(3); |
if(listener != null) listener.processCData(sel); |
return true; |
} |
} |
/xmlparser_java/branches/004_buffer_in_onw_class/XmlParser.java |
---|
19,9 → 19,11 |
FileInputStream in = new FileInputStream(args[0]); |
try { |
XmlDocument doc = new XmlDocument(); |
XmlListenerImpl listener = new XmlListenerImpl(); |
XmlDocument doc = new XmlDocument(listener); |
doc.parse(in); |
System.out.println(listener.count + " elements found"); |
} |
catch(XmlException ex) { |
System.out.println(ex); |
30,11 → 32,47 |
in.close(); |
} |
System.err.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms"); |
System.out.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms"); |
} |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlListenerImpl |
implements XmlListener |
{ |
public long count = 0; |
private XmlDocument document; |
public void init(XmlDocument document) |
throws XmlException, IOException |
{ |
this.document = document; |
} |
public void processElementBegin(XmlElement element) |
throws XmlException, IOException |
{ |
++count; |
} |
public void processElementEnd(XmlElement element) |
throws XmlException, IOException |
{ |
} |
public void processCharData(XmlSelection sel) |
throws XmlException, IOException |
{ |
} |
public void processCData(XmlSelection sel) |
throws XmlException, IOException |
{ |
} |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlException |
extends Exception |
{ |
71,9 → 109,30 |
} |
// -------------------------------------------------------------------------------------------------------------------- |
interface XmlListener |
{ |
public void init(XmlDocument document) |
throws XmlException, IOException; |
public void processElementBegin(XmlElement element) |
throws XmlException, IOException; |
public void processElementEnd(XmlElement element) |
throws XmlException, IOException; |
public void processCharData(XmlSelection sel) |
throws XmlException, IOException; |
public void processCData(XmlSelection sel) |
throws XmlException, IOException; |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlElement |
{ |
public boolean isEmpty = false; |
public XmlSelection elementSel; |
public XmlSelection nameSel; |
} |
// -------------------------------------------------------------------------------------------------------------------- |
134,18 → 193,8 |
pos += n; |
} |
public long getOffset() |
public long getPosition() |
{ |
return offset; |
} |
public int getPosition() |
{ |
return pos; |
} |
public long getAbsPosition() |
{ |
return (offset + pos); |
} |
169,7 → 218,6 |
throws IOException |
{ |
if(pos + count >= len) { |
//log("ensureNext start " + pos + " " + count); |
if(len == 0) { |
// read full buffer at begin |
len = in.read(buf); |
209,24 → 257,29 |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlDocument |
{ |
private XmlListener listener; |
private XmlBuffer buf; |
private long line; |
private long linePos; |
private int level; |
public XmlDocument(XmlListener listener) |
{ |
this.listener = listener; |
} |
public void parse(InputStream in) |
throws XmlException, IOException |
{ |
this.buf = new XmlBuffer(in); |
this.line = 1; |
this.linePos = 0; |
this.level = 0; |
if(listener != null) listener.init(this); |
if(parseProlog()) { |
parseElement(); |
while(parseMisc()); |
/* |
if(this.pos < this.len) throwException("trash at end of text"); |
*/ |
if(!buf.isEnd()) throwException("EoF expected"); |
} |
} |
233,7 → 286,7 |
private void throwException(String message) |
throws XmlException, IOException |
{ |
throw new XmlException(message, line, /*linePos*/ buf.getAbsPosition()); |
throw new XmlException(message, 1, buf.getPosition()); |
} |
private void log(String message) |
241,7 → 294,7 |
System.out.println(message); |
} |
private String toString(XmlSelection sel) |
public String selectionToString(XmlSelection sel) |
throws IOException |
{ |
return buf.toString(sel.begin, (int)sel.getLength()); |
249,12 → 302,12 |
private void saveSelBegin(XmlSelection sel) |
{ |
sel.begin = buf.getAbsPosition(); |
sel.begin = buf.getPosition(); |
} |
private void saveSelEnd(XmlSelection sel) |
{ |
sel.end = buf.getAbsPosition(); |
sel.end = buf.getPosition(); |
} |
private boolean parseProlog() |
272,7 → 325,6 |
private boolean skipSpaces() |
throws XmlException, IOException |
{ |
//log("skipSpaces begin " + bufPos); |
boolean found = false; |
for(;;) { |
289,7 → 341,6 |
} |
} |
//log("skipSpaces " + found + " " + bufPos); |
return found; |
} |
296,13 → 347,10 |
private boolean parseDecl() |
throws XmlException, IOException |
{ |
//log("parseDecl begin " + bufPos); |
// begin |
if(!testChar('<') || !testChar('?') || !testChar('x') |
|| !testChar('m') || !testChar('l')) |
{ |
//log("parseDecl no 'xml' " + bufPos); |
return false; |
} |
319,7 → 367,6 |
throwException("end of XML declaration expected"); |
} |
//log("parseDecl ok " + bufPos); |
return true; |
} |
334,7 → 381,6 |
private boolean parseName(XmlSelection sel) |
throws XmlException, IOException |
{ |
//log("parseName begin " + bufPos); |
saveSelBegin(sel); |
byte c = buf.cur(); |
342,7 → 388,6 |
buf.toNext(); |
} |
else { |
//log("parseName not a name " + bufPos); |
return false; |
} |
363,7 → 408,6 |
} |
saveSelEnd(sel); |
//log("parseName ok " + bufPos); |
return true; |
} |
382,11 → 426,9 |
private boolean parseAttribute(XmlSelection selName, XmlSelection selValue) |
throws XmlException, IOException |
{ |
//log("parseAttribute begin " + bufPos); |
// name |
buf.mark(); |
if(!parseName(selName)) { |
//log("parseAttribute no name " + bufPos); |
buf.reset(); |
return false; |
} |
424,7 → 466,6 |
// FIXME check '[WFC: No External Entity References]' |
//log("parseAttribute ok " + bufPos); |
return true; |
} |
431,21 → 472,16 |
private boolean parseMisc() |
throws XmlException, IOException |
{ |
//log("parseMisc begin " + bufPos); |
if(parseComment()) { |
//log("parseMisc comment ok " + bufPos); |
return true; |
} |
else if(parseProcessInstruction()) { |
//log("parseMisc pi ok " + bufPos); |
return true; |
} |
else if(skipSpaces()) { |
//log("parseMisc spaces ok " + bufPos); |
return true; |
} |
else { |
//log("parseMisc false " + bufPos); |
return false; |
} |
} |
453,9 → 489,7 |
private boolean parseComment() |
throws XmlException, IOException |
{ |
//log("parseComment begin " + bufPos); |
if(buf.at(0) != '<' || buf.at(1) != '!' || buf.at(2) != '-' || buf.at(3) != '-') { |
//log("parseComment no signature " + bufPos); |
return false; |
} |
468,7 → 502,7 |
if(buf.at(0) == '-' && buf.at(1) == '-') { |
if(buf.at(2) == '>') { |
buf.skip(3); |
//log("parseComment ok " + bufPos); |
return true; |
} |
else { |
493,6 → 527,7 |
if(buf.at(0) == '?' && buf.at(1) == '>') { |
buf.skip(2); |
return true; |
} |
} |
501,15 → 536,18 |
private XmlElement parseElement() |
throws XmlException, IOException |
{ |
//log("parseElement begin " + bufPos); |
XmlElement element = new XmlElement(); |
element.elementSel = new XmlSelection(); |
saveSelBegin(element.elementSel); |
if(!parseStartTag(element)) { |
//log("parseElement no start tag " + bufPos); |
return null; |
} |
++level; |
if(listener != null) listener.processElementBegin(element); |
if(!element.isEmpty) { |
if(!parseTagContent(element)) { |
throwException("cannot parse tag content"); |
518,11 → 556,16 |
XmlSelection selEndName = new XmlSelection(); |
parseEndTag(selEndName); |
//if(element.name != selEndName |
//if(!selectionToString(element.nameSel).equals(selectionToString(selEndName))) { |
// throwException("tag names do not match"); |
//} |
} |
//log("parseElement ok " + bufPos); |
saveSelEnd(element.elementSel); |
if(listener != null) listener.processElementEnd(element); |
--level; |
return element; |
} |
529,21 → 572,19 |
private boolean parseStartTag(XmlElement element) |
throws XmlException, IOException |
{ |
//log("parseStartTag begin " + bufPos); |
// begin |
if(!testChar('<')) { |
//log("parseStartTag no signature " + bufPos); |
return false; |
} |
// name |
buf.mark(); |
XmlSelection sel = new XmlSelection(); |
if(!parseName(sel)) { |
element.nameSel = new XmlSelection(); |
if(!parseName(element.nameSel)) { |
buf.reset(); |
return false; |
} |
//System.out.print("[" + toString(sel) + "]"); |
// attributes |
XmlSelection selName = new XmlSelection(); |
551,9 → 592,7 |
for(;;) { |
if(!skipSpaces()) break; |
if(!parseAttribute(selName, selValue)) break; |
//System.out.print(" [" + toString(selName) + "]=[" + toString(selValue) + "]"); |
} |
//System.out.println(); |
// end |
element.isEmpty = testChar('/'); |
561,7 → 600,6 |
throwException("end of tag expected"); |
} |
//log("parseStartTag ok " + bufPos); |
return true; |
} |
568,7 → 606,6 |
private void parseEndTag(XmlSelection sel) |
throws XmlException, IOException |
{ |
//log("parseEndTag begin " + bufPos); |
// begin |
if(!testChar('<') || !testChar('/')) { |
throwException("cannot find tag end"); |
586,14 → 623,11 |
if(!testChar('>')) { |
throwException("end of tag expected"); |
} |
//log("parseEndTag ok " + bufPos); |
} |
private boolean parseTagContent(XmlElement element) |
throws XmlException, IOException |
{ |
//log("parseTagContent begin " + bufPos); |
XmlSelection sel = new XmlSelection(); |
for(;;) { |
if(buf.isEnd()) { |
611,25 → 645,31 |
} |
else if(parseCharData(sel)) { |
} |
else { |
throwException("unexpected tag content"); |
} |
// FIXME allow 'Reference' here |
} |
//log("parseTagContent ok " + bufPos); |
return true; |
} |
// FIXME not fully conform the standard |
private boolean parseCharData(XmlSelection sel) |
throws XmlException, IOException |
{ |
//log("parseCharData begin " + bufPos); |
// FIXME allow 'Reference' here |
boolean found = false; |
saveSelBegin(sel); |
for(;; buf.toNext()) { |
byte c = buf.cur(); |
if(c == 0 || c == '<') { |
if(buf.isEnd()) { |
throwException("unexpected EoF"); |
} |
if(c == '<' || c == '&') { |
break; |
} |
636,8 → 676,9 |
found = true; |
} |
//log("parseCharData " + found + " " + bufPos); |
saveSelEnd(sel); |
if(listener != null) listener.processCharData(sel); |
return found; |
} |
664,6 → 705,8 |
if(buf.at(0) == ']' && buf.at(1) == ']' && buf.at(2) == '>') { |
saveSelEnd(sel); |
buf.skip(3); |
if(listener != null) listener.processCData(sel); |
return true; |
} |
} |
/xmlparser_java/branches/001_buffer_move/XmlParser.java |
---|
19,9 → 19,11 |
FileInputStream in = new FileInputStream(args[0]); |
try { |
XmlDocument doc = new XmlDocument(); |
XmlListenerImpl listener = new XmlListenerImpl(); |
XmlDocument doc = new XmlDocument(listener); |
doc.parse(in); |
System.out.println(listener.count + " elements found"); |
} |
catch(XmlException ex) { |
System.out.println(ex); |
30,11 → 32,47 |
in.close(); |
} |
System.err.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms"); |
System.out.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms"); |
} |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlListenerImpl |
implements XmlListener |
{ |
public long count = 0; |
private XmlDocument document; |
public void init(XmlDocument document) |
throws XmlException, IOException |
{ |
this.document = document; |
} |
public void processElementBegin(XmlElement element) |
throws XmlException, IOException |
{ |
++count; |
} |
public void processElementEnd(XmlElement element) |
throws XmlException, IOException |
{ |
} |
public void processCharData(XmlSelection sel) |
throws XmlException, IOException |
{ |
} |
public void processCData(XmlSelection sel) |
throws XmlException, IOException |
{ |
} |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlException |
extends Exception |
{ |
71,22 → 109,48 |
} |
// -------------------------------------------------------------------------------------------------------------------- |
interface XmlListener |
{ |
public void init(XmlDocument document) |
throws XmlException, IOException; |
public void processElementBegin(XmlElement element) |
throws XmlException, IOException; |
public void processElementEnd(XmlElement element) |
throws XmlException, IOException; |
public void processCharData(XmlSelection sel) |
throws XmlException, IOException; |
public void processCData(XmlSelection sel) |
throws XmlException, IOException; |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlElement |
{ |
public boolean isEmpty = false; |
public XmlSelection elementSel; |
public XmlSelection nameSel; |
} |
// -------------------------------------------------------------------------------------------------------------------- |
class XmlDocument |
{ |
private XmlListener listener; |
private InputStream in; |
private byte[] buf = new byte[2048]; // (buf.length % 16 == 0) |
private int bufLen; |
private int bufPos; |
private long bufOffset; |
private long line; |
private long linePos; |
private int level; |
public XmlDocument(XmlListener listener) |
{ |
this.listener = listener; |
} |
public void parse(InputStream in) |
throws XmlException, IOException |
{ |
98,16 → 162,16 |
this.bufLen = 0; |
this.bufPos = 0; |
this.bufOffset = 0; |
this.line = 1; |
this.linePos = 0; |
this.level = 0; |
if(listener != null) listener.init(this); |
if(parseProlog()) { |
parseElement(); |
while(parseMisc()); |
/* |
if(this.pos < this.len) throwException("trash at end of text"); |
*/ |
if(bufPos < bufLen) throwException("EoF expected"); |
} |
} |
114,7 → 178,7 |
private void throwException(String message) |
throws XmlException, IOException |
{ |
throw new XmlException(message, line, /*linePos*/ bufPos); |
throw new XmlException(message, 1, bufPos); |
} |
private void log(String message) |
122,12 → 186,17 |
System.out.println(message); |
} |
private String toString(XmlSelection sel) |
public String selectionToString(XmlSelection sel) |
throws IOException |
{ |
return new String(buf, (int)(sel.begin - bufOffset), (int)sel.getLength(), "UTF-8"); |
} |
public int getLevel() |
{ |
return level; |
} |
private void saveSelBegin(XmlSelection sel) |
{ |
sel.begin = bufOffset + bufPos; |
142,7 → 211,6 |
throws XmlException, IOException |
{ |
if(bufPos + count >= bufLen) { |
//log("ensureNext start " + bufPos + " " + count); |
if(bufLen == 0) { |
// read full buffer at begin |
bufLen = in.read(buf); |
183,7 → 251,6 |
private boolean skipSpaces() |
throws XmlException, IOException |
{ |
//log("skipSpaces begin " + bufPos); |
boolean found = false; |
for(;;) { |
199,7 → 266,6 |
} |
} |
//log("skipSpaces " + found + " " + bufPos); |
return found; |
} |
206,13 → 272,10 |
private boolean parseDecl() |
throws XmlException, IOException |
{ |
//log("parseDecl begin " + bufPos); |
// begin |
if(!testChar('<') || !testChar('?') || !testChar('x') |
|| !testChar('m') || !testChar('l')) |
{ |
//log("parseDecl no 'xml' " + bufPos); |
return false; |
} |
229,7 → 292,6 |
throwException("end of XML declaration expected"); |
} |
//log("parseDecl ok " + bufPos); |
return true; |
} |
244,7 → 306,6 |
private boolean parseName(XmlSelection sel) |
throws XmlException, IOException |
{ |
//log("parseName begin " + bufPos); |
saveSelBegin(sel); |
int start = bufPos; |
257,7 → 318,6 |
++bufPos; |
} |
else { |
//log("parseName not a name " + bufPos); |
bufPos = start; |
return false; |
} |
279,7 → 339,6 |
} |
saveSelEnd(sel); |
//log("parseName ok " + bufPos); |
return true; |
} |
302,10 → 361,8 |
private boolean parseAttribute(XmlSelection selName, XmlSelection selValue) |
throws XmlException, IOException |
{ |
//log("parseAttribute begin " + bufPos); |
// name |
if(!parseName(selName)) { |
//log("parseAttribute no name " + bufPos); |
return false; |
} |
342,7 → 399,6 |
// FIXME check '[WFC: No External Entity References]' |
//log("parseAttribute ok " + bufPos); |
return true; |
} |
349,21 → 405,16 |
private boolean parseMisc() |
throws XmlException, IOException |
{ |
//log("parseMisc begin " + bufPos); |
if(parseComment()) { |
//log("parseMisc comment ok " + bufPos); |
return true; |
} |
else if(parseProcessInstruction()) { |
//log("parseMisc pi ok " + bufPos); |
return true; |
} |
else if(skipSpaces()) { |
//log("parseMisc spaces ok " + bufPos); |
return true; |
} |
else { |
//log("parseMisc false " + bufPos); |
return false; |
} |
} |
371,13 → 422,10 |
private boolean parseComment() |
throws XmlException, IOException |
{ |
//log("parseComment begin " + bufPos); |
if(bufPos+3 >= bufLen && !ensureNext(4)) { |
//log("parseComment no data " + bufPos); |
return false; |
} |
if(buf[bufPos] != '<' || buf[bufPos+1] != '!' || buf[bufPos+2] != '-' || buf[bufPos+3] != '-') { |
//log("parseComment no signature " + bufPos); |
return false; |
} |
390,7 → 438,7 |
if(buf[bufPos] == '-' && buf[bufPos+1] == '-') { |
if(buf[bufPos+2] == '>') { |
bufPos += 3; |
//log("parseComment ok " + bufPos); |
return true; |
} |
else { |
418,6 → 466,7 |
if(buf[bufPos] == '?' && buf[bufPos+1] == '>') { |
bufPos += 2; |
return true; |
} |
} |
426,15 → 475,18 |
private XmlElement parseElement() |
throws XmlException, IOException |
{ |
//log("parseElement begin " + bufPos); |
XmlElement element = new XmlElement(); |
element.elementSel = new XmlSelection(); |
saveSelBegin(element.elementSel); |
if(!parseStartTag(element)) { |
//log("parseElement no start tag " + bufPos); |
return null; |
} |
++level; |
if(listener != null) listener.processElementBegin(element); |
if(!element.isEmpty) { |
if(!parseTagContent(element)) { |
throwException("cannot parse tag content"); |
443,11 → 495,16 |
XmlSelection selEndName = new XmlSelection(); |
parseEndTag(selEndName); |
//if(element.name != selEndName |
//if(!selectionToString(element.nameSel).equals(selectionToString(selEndName))) { |
// throwException("tag names do not match"); |
//} |
} |
//log("parseElement ok " + bufPos); |
saveSelEnd(element.elementSel); |
if(listener != null) listener.processElementEnd(element); |
--level; |
return element; |
} |
454,23 → 511,20 |
private boolean parseStartTag(XmlElement element) |
throws XmlException, IOException |
{ |
//log("parseStartTag begin " + bufPos); |
int start = bufPos; |
// begin |
if(!testChar('<')) { |
//log("parseStartTag no signature " + bufPos); |
return false; |
} |
// name |
XmlSelection sel = new XmlSelection(); |
if(!parseName(sel)) { |
//log("parseStartTag no name " + bufPos); |
element.nameSel = new XmlSelection(); |
if(!parseName(element.nameSel)) { |
bufPos = start; |
return false; |
} |
//System.out.print("[" + toString(sel) + "]"); |
// attributes |
XmlSelection selName = new XmlSelection(); |
478,9 → 532,7 |
for(;;) { |
if(!skipSpaces()) break; |
if(!parseAttribute(selName, selValue)) break; |
//System.out.print(" [" + toString(selName) + "]=[" + toString(selValue) + "]"); |
} |
//System.out.println(); |
// end |
element.isEmpty = testChar('/'); |
488,7 → 540,6 |
throwException("end of tag expected"); |
} |
//log("parseStartTag ok " + bufPos); |
return true; |
} |
495,7 → 546,6 |
private void parseEndTag(XmlSelection sel) |
throws XmlException, IOException |
{ |
//log("parseEndTag begin " + bufPos); |
// begin |
if(!testChar('<') || !testChar('/')) { |
throwException("cannot find tag end"); |
513,14 → 563,11 |
if(!testChar('>')) { |
throwException("end of tag expected"); |
} |
//log("parseEndTag ok " + bufPos); |
} |
private boolean parseTagContent(XmlElement element) |
throws XmlException, IOException |
{ |
//log("parseTagContent begin " + bufPos); |
XmlSelection sel = new XmlSelection(); |
for(;;) { |
if(bufPos+1 >= bufLen && !ensureNext(2)) { |
538,29 → 585,31 |
} |
else if(parseCharData(sel)) { |
} |
else { |
throwException("unexpected tag content"); |
} |
// FIXME allow 'Reference' here |
} |
//log("parseTagContent ok " + bufPos); |
return true; |
} |
// FIXME not fully conform the standard |
private boolean parseCharData(XmlSelection sel) |
throws XmlException, IOException |
{ |
//log("parseCharData begin " + bufPos); |
// FIXME allow 'Reference' here |
boolean found = false; |
saveSelBegin(sel); |
for(;; ++bufPos) { |
if(bufPos >= bufLen && !ensureNext(1)) { |
return true; |
throwException("unexpected EoF"); |
} |
byte c = buf[bufPos]; |
if(c == '<') { |
if(c == '<' || c == '&') { |
break; |
} |
567,8 → 616,9 |
found = true; |
} |
//log("parseCharData " + found + " " + bufPos); |
saveSelEnd(sel); |
if(listener != null) listener.processCharData(sel); |
return found; |
} |
598,6 → 648,8 |
if(buf[bufPos] == ']' && buf[bufPos+1] == ']' && buf[bufPos+2] == '>') { |
saveSelEnd(sel); |
bufPos += 3; |
if(listener != null) listener.processCData(sel); |
return true; |
} |
} |