Subversion Repositories general

Compare Revisions

Ignore whitespace Rev 1352 → Rev 1353

/xmlparser_java/branches/003_cycle_buffer/XmlParser.java
19,9 → 19,11
FileInputStream in = new FileInputStream(args[0]);
try {
XmlDocument doc = new XmlDocument();
XmlListenerImpl listener = new XmlListenerImpl();
XmlDocument doc = new XmlDocument(listener);
doc.parse(in);
System.out.println(listener.count + " elements found");
}
catch(XmlException ex) {
System.out.println(ex);
35,6 → 37,42
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlListenerImpl
implements XmlListener
{
public long count = 0;
private XmlDocument document;
public void init(XmlDocument document)
throws XmlException, IOException
{
this.document = document;
}
public void processElementBegin(XmlElement element)
throws XmlException, IOException
{
++count;
}
public void processElementEnd(XmlElement element)
throws XmlException, IOException
{
}
public void processCharData(XmlSelection sel)
throws XmlException, IOException
{
}
public void processCData(XmlSelection sel)
throws XmlException, IOException
{
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlException
extends Exception
{
62,6 → 100,25
}
 
// --------------------------------------------------------------------------------------------------------------------
interface XmlListener
{
public void init(XmlDocument document)
throws XmlException, IOException;
public void processElementBegin(XmlElement element)
throws XmlException, IOException;
public void processElementEnd(XmlElement element)
throws XmlException, IOException;
public void processCharData(XmlSelection sel)
throws XmlException, IOException;
public void processCData(XmlSelection sel)
throws XmlException, IOException;
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlSelection
{
public long begin;
73,7 → 130,9
// --------------------------------------------------------------------------------------------------------------------
class XmlElement
{
public boolean isEmpty = false;
public boolean isEmpty = false;
public XmlSelection elementSel;
public XmlSelection nameSel;
}
 
// --------------------------------------------------------------------------------------------------------------------
108,13 → 167,6
public byte cur()
throws IOException
{
/* if(pos >= len) {
if(!ensure(pos)) return 0;
}
 
int p = (int)(pos % size);
 
return buf[p];*/
return at(0);
}
 
141,10 → 193,7
lastPosUsed = pos;
}
//System.out.println("at " + pos + "+" + n + "=" + line + ":" + linePos + " (" + p + ") [" + (char)c + "]");
return c;
 
// return buf[p];
}
 
public void toNext()
159,7 → 208,7
 
public boolean isEnd()
{
return end;
return (end && pos >= len);
}
 
public void skip(int n)
226,7 → 275,10
}
 
int read = in.read(buf, (second ? half : 0), half);
if(read < 0) return false;
if(read < 0) {
end = true;
return false;
}
 
if(marked >= 0 && marked < len - half) {
marked = -1;
233,40 → 285,32
}
 
len += read;
//System.out.println("ensure " + p + " " + len + " " + second);
 
return true;
}
public String toString(long begin, int length)
public String selectionToString(long begin, int length)
throws IOException
{
//System.out.println("toString " + begin + " " + length + " : " + len + " " + half + " " + ((len-1)/half-1)*half);
if(begin < ((len-1)/half-1)*half) return "";
if(begin+length >= len) return "";
 
int p1 = (int)(begin % size);
int p2 = (int)((begin+length) % size);
//System.out.println(" toString p1 " + p1 + " " + p2);
 
if(p1 > p2) {
//System.out.println(" toString p2");
if(second) return "";
//System.out.println(" toString p3");
 
return new String(buf, p1, size-p1, "UTF-8")
+ new String(buf, 0, p2, "UTF-8");
}
else if(p1 < half && p2 >= half) {
//System.out.println(" toString p4");
if(!second) return "";
//System.out.println(" toString p5");
 
return new String(buf, p1, half-p1, "UTF-8")
+ new String(buf, half, p2-half, "UTF-8");
}
else {
//System.out.println(" toString p6");
return new String(buf, p1, p2-p1, "UTF-8");
}
}
275,20 → 319,29
// --------------------------------------------------------------------------------------------------------------------
class XmlDocument
{
private XmlListener listener;
private XmlBuffer buf;
private int level;
 
public XmlDocument(XmlListener listener)
{
this.listener = listener;
}
public void parse(InputStream in)
throws XmlException, IOException
{
this.buf = new XmlBuffer(in);
this.buf = new XmlBuffer(in);
this.level = 0;
if(listener != null) listener.init(this);
if(parseProlog()) {
parseElement();
while(parseMisc());
/*
if(this.pos < this.len) throwException("trash at end of text");
*/
 
if(!buf.isEnd()) throwException("EoF expected");
}
}
 
303,10 → 356,10
System.out.println(message);
}
 
private String toString(XmlSelection sel)
public String selectionToString(XmlSelection sel)
throws IOException
{
return buf.toString(sel.begin, (int)sel.getLength());
return buf.selectionToString(sel.begin, (int)sel.getLength());
}
private void saveSelBegin(XmlSelection sel)
334,12 → 387,11
private boolean skipSpaces()
throws XmlException, IOException
{
//log("skipSpaces begin " + buf.getPosition());
boolean found = false;
for(;;) {
byte c = buf.cur();
if(c == 0) {
if(buf.isEnd()) {
break;
}
else if(c == ' ' || c == '\t' || c == '\n' || c == '\r') {
351,7 → 403,6
}
}
//log("skipSpaces " + found + " " + buf.getPosition());
return found;
}
 
358,13 → 409,10
private boolean parseDecl()
throws XmlException, IOException
{
//log("parseDecl begin " + buf.getPosition());
// begin
if(!testChar('<') || !testChar('?') || !testChar('x')
|| !testChar('m') || !testChar('l'))
{
//log("parseDecl no 'xml' " + buf.getPosition());
return false;
}
381,7 → 429,6
throwException("end of XML declaration expected");
}
//log("parseDecl ok " + buf.getPosition());
return true;
}
 
396,7 → 443,6
private boolean parseName(XmlSelection sel)
throws XmlException, IOException
{
//log("parseName begin " + buf.getPosition());
saveSelBegin(sel);
 
byte c = buf.cur();
404,7 → 450,6
buf.toNext();
}
else {
//log("parseName not a name " + buf.getPosition());
return false;
}
425,7 → 470,6
}
saveSelEnd(sel);
//log("parseName ok " + buf.getPosition());
return true;
}
 
432,13 → 476,10
private boolean testChar(char c)
throws XmlException, IOException
{
//log("testChar begin " + buf.getPosition() + " [" + c + "]");
if(buf.cur() != c) {
//log("testChar false " + buf.getPosition());
return false;
}
else {
//log("testChar true " + buf.getPosition());
buf.toNext();
return true;
}
447,11 → 488,9
private boolean parseAttribute(XmlSelection selName, XmlSelection selValue)
throws XmlException, IOException
{
//log("parseAttribute begin " + buf.getPosition());
// name
buf.mark();
if(!parseName(selName)) {
//log("parseAttribute no name " + buf.getPosition());
buf.reset();
return false;
}
489,7 → 528,6
// FIXME check '[WFC: No External Entity References]'
//log("parseAttribute ok " + buf.getPosition());
return true;
}
 
496,21 → 534,16
private boolean parseMisc()
throws XmlException, IOException
{
//log("parseMisc begin " + buf.getPosition());
if(parseComment()) {
//log("parseMisc comment ok " + buf.getPosition());
return true;
}
else if(parseProcessInstruction()) {
//log("parseMisc pi ok " + buf.getPosition());
return true;
}
else if(skipSpaces()) {
//log("parseMisc spaces ok " + buf.getPosition());
return true;
}
else {
//log("parseMisc false " + buf.getPosition());
return false;
}
}
518,9 → 551,7
private boolean parseComment()
throws XmlException, IOException
{
//log("parseComment begin " + buf.getPosition());
if(buf.at(0) != '<' || buf.at(1) != '!' || buf.at(2) != '-' || buf.at(3) != '-') {
//log("parseComment no signature " + buf.getPosition());
return false;
}
533,7 → 564,7
if(buf.at(0) == '-' && buf.at(1) == '-') {
if(buf.at(2) == '>') {
buf.skip(3);
//log("parseComment ok " + buf.getPosition());
return true;
}
else {
558,6 → 589,7
 
if(buf.at(0) == '?' && buf.at(1) == '>') {
buf.skip(2);
return true;
}
}
566,15 → 598,18
private XmlElement parseElement()
throws XmlException, IOException
{
//log("parseElement begin " + buf.getPosition());
 
XmlElement element = new XmlElement();
element.elementSel = new XmlSelection();
saveSelBegin(element.elementSel);
if(!parseStartTag(element)) {
//log("parseElement no start tag " + buf.getPosition());
return null;
}
 
++level;
if(listener != null) listener.processElementBegin(element);
if(!element.isEmpty) {
if(!parseTagContent(element)) {
throwException("cannot parse tag content");
583,11 → 618,16
XmlSelection selEndName = new XmlSelection();
parseEndTag(selEndName);
//if(element.name != selEndName
//if(!selectionToString(element.nameSel).equals(selectionToString(selEndName))) {
// throwException("tag names do not match");
//}
}
 
saveSelEnd(element.elementSel);
if(listener != null) listener.processElementEnd(element);
//log("parseElement ok " + buf.getPosition());
--level;
return element;
}
 
594,7 → 634,6
private boolean parseStartTag(XmlElement element)
throws XmlException, IOException
{
//log("parseStartTag begin " + buf.getPosition());
long pos = buf.getPosition();
long line = buf.getLine();
long linePos = buf.getLinePosition();
601,18 → 640,17
// begin
if(!testChar('<')) {
//log("parseStartTag no signature " + buf.getPosition());
return false;
}
// name
buf.mark();
XmlSelection sel = new XmlSelection();
if(!parseName(sel)) {
element.nameSel = new XmlSelection();
if(!parseName(element.nameSel)) {
buf.reset();
return false;
}
//System.out.print("[" + toString(sel) + "]@" + line + ":" + linePos + " (" + pos + ")");
// attributes
XmlSelection selName = new XmlSelection();
620,9 → 658,7
for(;;) {
if(!skipSpaces()) break;
if(!parseAttribute(selName, selValue)) break;
//System.out.print(" [" + toString(selName) + "]=[" + toString(selValue) + "]");
}
//System.out.println();
// end
element.isEmpty = testChar('/');
630,7 → 666,6
throwException("end of tag expected");
}
//log("parseStartTag ok " + buf.getPosition());
return true;
}
 
637,7 → 672,6
private void parseEndTag(XmlSelection sel)
throws XmlException, IOException
{
//log("parseEndTag begin " + buf.getPosition());
// begin
if(!testChar('<') || !testChar('/')) {
throwException("cannot find tag end");
655,14 → 689,11
if(!testChar('>')) {
throwException("end of tag expected");
}
//log("parseEndTag ok " + buf.getPosition());
}
 
private boolean parseTagContent(XmlElement element)
throws XmlException, IOException
{
//log("parseTagContent begin " + buf.getPosition());
XmlSelection sel = new XmlSelection();
for(;;) {
if(buf.isEnd()) {
680,25 → 711,30
}
else if(parseCharData(sel)) {
}
else {
throwException("unexpected tag content");
}
// FIXME allow 'Reference' here
}
//log("parseTagContent ok " + buf.getPosition());
return true;
}
 
// FIXME not fully conform the standard
private boolean parseCharData(XmlSelection sel)
throws XmlException, IOException
{
//log("parseCharData begin " + buf.getPosition());
// FIXME allow 'Reference' here
 
boolean found = false;
saveSelBegin(sel);
for(;; buf.toNext()) {
byte c = buf.cur();
if(c == 0 || c == '<') {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
else if(c == '<' || c == '&') {
break;
}
705,8 → 741,9
found = true;
}
//log("parseCharData " + found + " " + buf.getPosition());
saveSelEnd(sel);
if(listener != null) listener.processCharData(sel);
return found;
}
 
733,6 → 770,8
if(buf.at(0) == ']' && buf.at(1) == ']' && buf.at(2) == '>') {
saveSelEnd(sel);
buf.skip(3);
if(listener != null) listener.processCData(sel);
return true;
}
}
/xmlparser_java/branches/004_buffer_in_onw_class/XmlParser.java
19,9 → 19,11
FileInputStream in = new FileInputStream(args[0]);
try {
XmlDocument doc = new XmlDocument();
XmlListenerImpl listener = new XmlListenerImpl();
XmlDocument doc = new XmlDocument(listener);
doc.parse(in);
System.out.println(listener.count + " elements found");
}
catch(XmlException ex) {
System.out.println(ex);
30,11 → 32,47
in.close();
}
 
System.err.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms");
System.out.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlListenerImpl
implements XmlListener
{
public long count = 0;
private XmlDocument document;
public void init(XmlDocument document)
throws XmlException, IOException
{
this.document = document;
}
public void processElementBegin(XmlElement element)
throws XmlException, IOException
{
++count;
}
public void processElementEnd(XmlElement element)
throws XmlException, IOException
{
}
public void processCharData(XmlSelection sel)
throws XmlException, IOException
{
}
public void processCData(XmlSelection sel)
throws XmlException, IOException
{
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlException
extends Exception
{
71,9 → 109,30
}
 
// --------------------------------------------------------------------------------------------------------------------
interface XmlListener
{
public void init(XmlDocument document)
throws XmlException, IOException;
public void processElementBegin(XmlElement element)
throws XmlException, IOException;
public void processElementEnd(XmlElement element)
throws XmlException, IOException;
public void processCharData(XmlSelection sel)
throws XmlException, IOException;
public void processCData(XmlSelection sel)
throws XmlException, IOException;
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlElement
{
public boolean isEmpty = false;
public boolean isEmpty = false;
public XmlSelection elementSel;
public XmlSelection nameSel;
}
 
// --------------------------------------------------------------------------------------------------------------------
134,18 → 193,8
pos += n;
}
 
public long getOffset()
public long getPosition()
{
return offset;
}
 
public int getPosition()
{
return pos;
}
 
public long getAbsPosition()
{
return (offset + pos);
}
 
169,7 → 218,6
throws IOException
{
if(pos + count >= len) {
//log("ensureNext start " + pos + " " + count);
if(len == 0) {
// read full buffer at begin
len = in.read(buf);
209,24 → 257,29
// --------------------------------------------------------------------------------------------------------------------
class XmlDocument
{
private XmlListener listener;
private XmlBuffer buf;
private long line;
private long linePos;
private int level;
 
public XmlDocument(XmlListener listener)
{
this.listener = listener;
}
public void parse(InputStream in)
throws XmlException, IOException
{
this.buf = new XmlBuffer(in);
this.line = 1;
this.linePos = 0;
this.buf = new XmlBuffer(in);
this.level = 0;
 
if(listener != null) listener.init(this);
if(parseProlog()) {
parseElement();
while(parseMisc());
/*
if(this.pos < this.len) throwException("trash at end of text");
*/
if(!buf.isEnd()) throwException("EoF expected");
}
}
 
233,7 → 286,7
private void throwException(String message)
throws XmlException, IOException
{
throw new XmlException(message, line, /*linePos*/ buf.getAbsPosition());
throw new XmlException(message, 1, buf.getPosition());
}
 
private void log(String message)
241,7 → 294,7
System.out.println(message);
}
 
private String toString(XmlSelection sel)
public String selectionToString(XmlSelection sel)
throws IOException
{
return buf.toString(sel.begin, (int)sel.getLength());
249,12 → 302,12
private void saveSelBegin(XmlSelection sel)
{
sel.begin = buf.getAbsPosition();
sel.begin = buf.getPosition();
}
private void saveSelEnd(XmlSelection sel)
{
sel.end = buf.getAbsPosition();
sel.end = buf.getPosition();
}
private boolean parseProlog()
272,7 → 325,6
private boolean skipSpaces()
throws XmlException, IOException
{
//log("skipSpaces begin " + bufPos);
boolean found = false;
for(;;) {
289,7 → 341,6
}
}
//log("skipSpaces " + found + " " + bufPos);
return found;
}
 
296,13 → 347,10
private boolean parseDecl()
throws XmlException, IOException
{
//log("parseDecl begin " + bufPos);
// begin
if(!testChar('<') || !testChar('?') || !testChar('x')
|| !testChar('m') || !testChar('l'))
{
//log("parseDecl no 'xml' " + bufPos);
return false;
}
319,7 → 367,6
throwException("end of XML declaration expected");
}
//log("parseDecl ok " + bufPos);
return true;
}
 
334,7 → 381,6
private boolean parseName(XmlSelection sel)
throws XmlException, IOException
{
//log("parseName begin " + bufPos);
saveSelBegin(sel);
 
byte c = buf.cur();
342,7 → 388,6
buf.toNext();
}
else {
//log("parseName not a name " + bufPos);
return false;
}
363,7 → 408,6
}
saveSelEnd(sel);
//log("parseName ok " + bufPos);
return true;
}
 
382,11 → 426,9
private boolean parseAttribute(XmlSelection selName, XmlSelection selValue)
throws XmlException, IOException
{
//log("parseAttribute begin " + bufPos);
// name
buf.mark();
if(!parseName(selName)) {
//log("parseAttribute no name " + bufPos);
buf.reset();
return false;
}
424,7 → 466,6
// FIXME check '[WFC: No External Entity References]'
//log("parseAttribute ok " + bufPos);
return true;
}
 
431,21 → 472,16
private boolean parseMisc()
throws XmlException, IOException
{
//log("parseMisc begin " + bufPos);
if(parseComment()) {
//log("parseMisc comment ok " + bufPos);
return true;
}
else if(parseProcessInstruction()) {
//log("parseMisc pi ok " + bufPos);
return true;
}
else if(skipSpaces()) {
//log("parseMisc spaces ok " + bufPos);
return true;
}
else {
//log("parseMisc false " + bufPos);
return false;
}
}
453,9 → 489,7
private boolean parseComment()
throws XmlException, IOException
{
//log("parseComment begin " + bufPos);
if(buf.at(0) != '<' || buf.at(1) != '!' || buf.at(2) != '-' || buf.at(3) != '-') {
//log("parseComment no signature " + bufPos);
return false;
}
468,7 → 502,7
if(buf.at(0) == '-' && buf.at(1) == '-') {
if(buf.at(2) == '>') {
buf.skip(3);
//log("parseComment ok " + bufPos);
 
return true;
}
else {
493,6 → 527,7
 
if(buf.at(0) == '?' && buf.at(1) == '>') {
buf.skip(2);
return true;
}
}
501,15 → 536,18
private XmlElement parseElement()
throws XmlException, IOException
{
//log("parseElement begin " + bufPos);
 
XmlElement element = new XmlElement();
element.elementSel = new XmlSelection();
saveSelBegin(element.elementSel);
if(!parseStartTag(element)) {
//log("parseElement no start tag " + bufPos);
return null;
}
 
++level;
if(listener != null) listener.processElementBegin(element);
if(!element.isEmpty) {
if(!parseTagContent(element)) {
throwException("cannot parse tag content");
518,11 → 556,16
XmlSelection selEndName = new XmlSelection();
parseEndTag(selEndName);
//if(element.name != selEndName
//if(!selectionToString(element.nameSel).equals(selectionToString(selEndName))) {
// throwException("tag names do not match");
//}
}
//log("parseElement ok " + bufPos);
saveSelEnd(element.elementSel);
if(listener != null) listener.processElementEnd(element);
--level;
 
return element;
}
 
529,21 → 572,19
private boolean parseStartTag(XmlElement element)
throws XmlException, IOException
{
//log("parseStartTag begin " + bufPos);
// begin
if(!testChar('<')) {
//log("parseStartTag no signature " + bufPos);
return false;
}
// name
buf.mark();
XmlSelection sel = new XmlSelection();
if(!parseName(sel)) {
element.nameSel = new XmlSelection();
if(!parseName(element.nameSel)) {
buf.reset();
return false;
}
//System.out.print("[" + toString(sel) + "]");
// attributes
XmlSelection selName = new XmlSelection();
551,9 → 592,7
for(;;) {
if(!skipSpaces()) break;
if(!parseAttribute(selName, selValue)) break;
//System.out.print(" [" + toString(selName) + "]=[" + toString(selValue) + "]");
}
//System.out.println();
// end
element.isEmpty = testChar('/');
561,7 → 600,6
throwException("end of tag expected");
}
//log("parseStartTag ok " + bufPos);
return true;
}
 
568,7 → 606,6
private void parseEndTag(XmlSelection sel)
throws XmlException, IOException
{
//log("parseEndTag begin " + bufPos);
// begin
if(!testChar('<') || !testChar('/')) {
throwException("cannot find tag end");
586,14 → 623,11
if(!testChar('>')) {
throwException("end of tag expected");
}
//log("parseEndTag ok " + bufPos);
}
 
private boolean parseTagContent(XmlElement element)
throws XmlException, IOException
{
//log("parseTagContent begin " + bufPos);
XmlSelection sel = new XmlSelection();
for(;;) {
if(buf.isEnd()) {
611,25 → 645,31
}
else if(parseCharData(sel)) {
}
else {
throwException("unexpected tag content");
}
// FIXME allow 'Reference' here
}
//log("parseTagContent ok " + bufPos);
return true;
}
 
// FIXME not fully conform the standard
private boolean parseCharData(XmlSelection sel)
throws XmlException, IOException
{
//log("parseCharData begin " + bufPos);
// FIXME allow 'Reference' here
 
boolean found = false;
saveSelBegin(sel);
for(;; buf.toNext()) {
byte c = buf.cur();
if(c == 0 || c == '<') {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
if(c == '<' || c == '&') {
break;
}
636,8 → 676,9
found = true;
}
//log("parseCharData " + found + " " + bufPos);
saveSelEnd(sel);
if(listener != null) listener.processCharData(sel);
return found;
}
 
664,6 → 705,8
if(buf.at(0) == ']' && buf.at(1) == ']' && buf.at(2) == '>') {
saveSelEnd(sel);
buf.skip(3);
if(listener != null) listener.processCData(sel);
 
return true;
}
}
/xmlparser_java/branches/001_buffer_move/XmlParser.java
19,9 → 19,11
FileInputStream in = new FileInputStream(args[0]);
try {
XmlDocument doc = new XmlDocument();
XmlListenerImpl listener = new XmlListenerImpl();
XmlDocument doc = new XmlDocument(listener);
doc.parse(in);
System.out.println(listener.count + " elements found");
}
catch(XmlException ex) {
System.out.println(ex);
30,11 → 32,47
in.close();
}
 
System.err.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms");
System.out.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlListenerImpl
implements XmlListener
{
public long count = 0;
private XmlDocument document;
public void init(XmlDocument document)
throws XmlException, IOException
{
this.document = document;
}
public void processElementBegin(XmlElement element)
throws XmlException, IOException
{
++count;
}
public void processElementEnd(XmlElement element)
throws XmlException, IOException
{
}
public void processCharData(XmlSelection sel)
throws XmlException, IOException
{
}
public void processCData(XmlSelection sel)
throws XmlException, IOException
{
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlException
extends Exception
{
71,22 → 109,48
}
 
// --------------------------------------------------------------------------------------------------------------------
interface XmlListener
{
public void init(XmlDocument document)
throws XmlException, IOException;
public void processElementBegin(XmlElement element)
throws XmlException, IOException;
public void processElementEnd(XmlElement element)
throws XmlException, IOException;
public void processCharData(XmlSelection sel)
throws XmlException, IOException;
public void processCData(XmlSelection sel)
throws XmlException, IOException;
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlElement
{
public boolean isEmpty = false;
public boolean isEmpty = false;
public XmlSelection elementSel;
public XmlSelection nameSel;
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlDocument
{
private XmlListener listener;
private InputStream in;
private byte[] buf = new byte[2048]; // (buf.length % 16 == 0)
private int bufLen;
private int bufPos;
private long bufOffset;
private long line;
private long linePos;
private int level;
 
public XmlDocument(XmlListener listener)
{
this.listener = listener;
}
public void parse(InputStream in)
throws XmlException, IOException
{
98,16 → 162,16
this.bufLen = 0;
this.bufPos = 0;
this.bufOffset = 0;
this.line = 1;
this.linePos = 0;
this.level = 0;
 
if(listener != null) listener.init(this);
if(parseProlog()) {
parseElement();
while(parseMisc());
/*
if(this.pos < this.len) throwException("trash at end of text");
*/
 
if(bufPos < bufLen) throwException("EoF expected");
}
}
 
114,7 → 178,7
private void throwException(String message)
throws XmlException, IOException
{
throw new XmlException(message, line, /*linePos*/ bufPos);
throw new XmlException(message, 1, bufPos);
}
 
private void log(String message)
122,12 → 186,17
System.out.println(message);
}
 
private String toString(XmlSelection sel)
public String selectionToString(XmlSelection sel)
throws IOException
{
return new String(buf, (int)(sel.begin - bufOffset), (int)sel.getLength(), "UTF-8");
}
public int getLevel()
{
return level;
}
private void saveSelBegin(XmlSelection sel)
{
sel.begin = bufOffset + bufPos;
142,7 → 211,6
throws XmlException, IOException
{
if(bufPos + count >= bufLen) {
//log("ensureNext start " + bufPos + " " + count);
if(bufLen == 0) {
// read full buffer at begin
bufLen = in.read(buf);
183,7 → 251,6
private boolean skipSpaces()
throws XmlException, IOException
{
//log("skipSpaces begin " + bufPos);
boolean found = false;
for(;;) {
199,7 → 266,6
}
}
//log("skipSpaces " + found + " " + bufPos);
return found;
}
 
206,13 → 272,10
private boolean parseDecl()
throws XmlException, IOException
{
//log("parseDecl begin " + bufPos);
// begin
if(!testChar('<') || !testChar('?') || !testChar('x')
|| !testChar('m') || !testChar('l'))
{
//log("parseDecl no 'xml' " + bufPos);
return false;
}
229,7 → 292,6
throwException("end of XML declaration expected");
}
//log("parseDecl ok " + bufPos);
return true;
}
 
244,7 → 306,6
private boolean parseName(XmlSelection sel)
throws XmlException, IOException
{
//log("parseName begin " + bufPos);
saveSelBegin(sel);
int start = bufPos;
257,7 → 318,6
++bufPos;
}
else {
//log("parseName not a name " + bufPos);
bufPos = start;
return false;
}
279,7 → 339,6
}
saveSelEnd(sel);
//log("parseName ok " + bufPos);
return true;
}
 
302,10 → 361,8
private boolean parseAttribute(XmlSelection selName, XmlSelection selValue)
throws XmlException, IOException
{
//log("parseAttribute begin " + bufPos);
// name
if(!parseName(selName)) {
//log("parseAttribute no name " + bufPos);
return false;
}
342,7 → 399,6
// FIXME check '[WFC: No External Entity References]'
//log("parseAttribute ok " + bufPos);
return true;
}
 
349,21 → 405,16
private boolean parseMisc()
throws XmlException, IOException
{
//log("parseMisc begin " + bufPos);
if(parseComment()) {
//log("parseMisc comment ok " + bufPos);
return true;
}
else if(parseProcessInstruction()) {
//log("parseMisc pi ok " + bufPos);
return true;
}
else if(skipSpaces()) {
//log("parseMisc spaces ok " + bufPos);
return true;
}
else {
//log("parseMisc false " + bufPos);
return false;
}
}
371,13 → 422,10
private boolean parseComment()
throws XmlException, IOException
{
//log("parseComment begin " + bufPos);
if(bufPos+3 >= bufLen && !ensureNext(4)) {
//log("parseComment no data " + bufPos);
return false;
}
if(buf[bufPos] != '<' || buf[bufPos+1] != '!' || buf[bufPos+2] != '-' || buf[bufPos+3] != '-') {
//log("parseComment no signature " + bufPos);
return false;
}
390,7 → 438,7
if(buf[bufPos] == '-' && buf[bufPos+1] == '-') {
if(buf[bufPos+2] == '>') {
bufPos += 3;
//log("parseComment ok " + bufPos);
return true;
}
else {
418,6 → 466,7
 
if(buf[bufPos] == '?' && buf[bufPos+1] == '>') {
bufPos += 2;
return true;
}
}
426,15 → 475,18
private XmlElement parseElement()
throws XmlException, IOException
{
//log("parseElement begin " + bufPos);
 
XmlElement element = new XmlElement();
element.elementSel = new XmlSelection();
saveSelBegin(element.elementSel);
if(!parseStartTag(element)) {
//log("parseElement no start tag " + bufPos);
return null;
}
 
++level;
if(listener != null) listener.processElementBegin(element);
if(!element.isEmpty) {
if(!parseTagContent(element)) {
throwException("cannot parse tag content");
443,11 → 495,16
XmlSelection selEndName = new XmlSelection();
parseEndTag(selEndName);
//if(element.name != selEndName
//if(!selectionToString(element.nameSel).equals(selectionToString(selEndName))) {
// throwException("tag names do not match");
//}
}
//log("parseElement ok " + bufPos);
saveSelEnd(element.elementSel);
if(listener != null) listener.processElementEnd(element);
--level;
return element;
}
 
454,23 → 511,20
private boolean parseStartTag(XmlElement element)
throws XmlException, IOException
{
//log("parseStartTag begin " + bufPos);
int start = bufPos;
 
// begin
if(!testChar('<')) {
//log("parseStartTag no signature " + bufPos);
return false;
}
// name
XmlSelection sel = new XmlSelection();
if(!parseName(sel)) {
//log("parseStartTag no name " + bufPos);
element.nameSel = new XmlSelection();
if(!parseName(element.nameSel)) {
bufPos = start;
return false;
}
//System.out.print("[" + toString(sel) + "]");
// attributes
XmlSelection selName = new XmlSelection();
478,9 → 532,7
for(;;) {
if(!skipSpaces()) break;
if(!parseAttribute(selName, selValue)) break;
//System.out.print(" [" + toString(selName) + "]=[" + toString(selValue) + "]");
}
//System.out.println();
// end
element.isEmpty = testChar('/');
488,7 → 540,6
throwException("end of tag expected");
}
//log("parseStartTag ok " + bufPos);
return true;
}
 
495,7 → 546,6
private void parseEndTag(XmlSelection sel)
throws XmlException, IOException
{
//log("parseEndTag begin " + bufPos);
// begin
if(!testChar('<') || !testChar('/')) {
throwException("cannot find tag end");
513,14 → 563,11
if(!testChar('>')) {
throwException("end of tag expected");
}
//log("parseEndTag ok " + bufPos);
}
 
private boolean parseTagContent(XmlElement element)
throws XmlException, IOException
{
//log("parseTagContent begin " + bufPos);
XmlSelection sel = new XmlSelection();
for(;;) {
if(bufPos+1 >= bufLen && !ensureNext(2)) {
538,29 → 585,31
}
else if(parseCharData(sel)) {
}
else {
throwException("unexpected tag content");
}
// FIXME allow 'Reference' here
}
//log("parseTagContent ok " + bufPos);
return true;
}
 
// FIXME not fully conform the standard
private boolean parseCharData(XmlSelection sel)
throws XmlException, IOException
{
//log("parseCharData begin " + bufPos);
// FIXME allow 'Reference' here
boolean found = false;
saveSelBegin(sel);
for(;; ++bufPos) {
if(bufPos >= bufLen && !ensureNext(1)) {
return true;
throwException("unexpected EoF");
}
byte c = buf[bufPos];
if(c == '<') {
if(c == '<' || c == '&') {
break;
}
567,8 → 616,9
found = true;
}
//log("parseCharData " + found + " " + bufPos);
saveSelEnd(sel);
if(listener != null) listener.processCharData(sel);
 
return found;
}
 
598,6 → 648,8
if(buf[bufPos] == ']' && buf[bufPos+1] == ']' && buf[bufPos+2] == '>') {
saveSelEnd(sel);
bufPos += 3;
if(listener != null) listener.processCData(sel);
 
return true;
}
}