13,15 → 13,15 |
System.err.println("Need file name as parameter"); |
return; |
} |
|
|
long startTime = System.nanoTime(); |
|
|
FileInputStream in = new FileInputStream(args[0]); |
|
|
try { |
XmlListenerImpl listener = new XmlListenerImpl(); |
XmlDocument doc = new XmlDocument(listener); |
|
|
doc.parse(in); |
System.out.println(listener.count + " elements found"); |
} |
41,31 → 41,31 |
implements XmlListener |
{ |
public long count = 0; |
|
|
private XmlDocument document; |
|
|
public void init(XmlDocument document) |
throws XmlException, IOException |
{ |
this.document = document; |
} |
|
|
public void processElementBegin(XmlElement element) |
throws XmlException, IOException |
{ |
++count; |
} |
|
|
public void processElementEnd(XmlElement element) |
throws XmlException, IOException |
{ |
} |
|
|
public void processCharData(XmlSelection sel) |
throws XmlException, IOException |
{ |
} |
|
|
public void processCData(XmlSelection sel) |
throws XmlException, IOException |
{ |
80,19 → 80,19 |
|
private long line; |
private long linePos; |
|
|
public XmlException(String message, long line, long linePos) |
{ |
super(message); |
|
|
this.line = line; |
this.linePos = linePos; |
} |
|
|
public long getLine() { return line; } |
|
|
public long getLinePos() { return linePos; } |
|
|
public String toString() |
{ |
return "Error: " + getMessage() + " at " + line + ":" + linePos; |
104,7 → 104,7 |
{ |
public long begin; |
public long end; |
|
|
public long getLength() { return end - begin; } |
} |
|
113,18 → 113,18 |
{ |
public void init(XmlDocument document) |
throws XmlException, IOException; |
|
|
public void processElementBegin(XmlElement element) |
throws XmlException, IOException; |
|
|
public void processElementEnd(XmlElement element) |
throws XmlException, IOException; |
|
|
public void processCharData(XmlSelection sel) |
throws XmlException, IOException; |
|
throws XmlException, IOException; |
|
public void processCData(XmlSelection sel) |
throws XmlException, IOException; |
throws XmlException, IOException; |
} |
|
// -------------------------------------------------------------------------------------------------------------------- |
150,7 → 150,7 |
{ |
this.listener = listener; |
} |
|
|
public void parse(InputStream in) |
throws XmlException, IOException |
{ |
157,7 → 157,7 |
if(buf.length % 16 != 0) { |
throwException("wrong buffer size: " + buf.length); |
} |
|
|
this.in = in; |
this.bufLen = 0; |
this.bufPos = 0; |
165,10 → 165,10 |
this.level = 0; |
|
if(listener != null) listener.init(this); |
|
|
if(parseProlog()) { |
parseElement(); |
|
|
while(parseMisc()); |
|
if(bufPos < bufLen) throwException("EoF expected"); |
191,22 → 191,22 |
{ |
return new String(buf, (int)(sel.begin - bufOffset), (int)sel.getLength(), "UTF-8"); |
} |
|
|
public int getLevel() |
{ |
return level; |
} |
|
|
private void saveSelBegin(XmlSelection sel) |
{ |
sel.begin = bufOffset + bufPos; |
} |
|
|
private void saveSelEnd(XmlSelection sel) |
{ |
sel.end = bufOffset + bufPos; |
} |
|
|
private boolean ensureNext(int count) |
throws XmlException, IOException |
{ |
228,7 → 228,7 |
bufPos -= buf.length / 16 * 15; |
bufOffset += buf.length / 16 * 15; |
} |
|
|
return (bufPos + count < bufLen); |
} |
else { |
235,7 → 235,7 |
return true; |
} |
} |
|
|
private boolean parseProlog() |
throws XmlException, IOException |
{ |
252,10 → 252,10 |
throws XmlException, IOException |
{ |
boolean found = false; |
|
|
for(;;) { |
if(bufPos >= bufLen && !ensureNext(1)) break; |
|
|
byte c = buf[bufPos]; |
if(c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
found = true; |
265,7 → 265,7 |
break; |
} |
} |
|
|
return found; |
} |
|
278,7 → 278,7 |
{ |
return false; |
} |
|
|
// attributes |
XmlSelection selName = new XmlSelection(); |
XmlSelection selValue = new XmlSelection(); |
291,7 → 291,7 |
if(!testChar('?') || !testChar('>')) { |
throwException("end of XML declaration expected"); |
} |
|
|
return true; |
} |
|
299,20 → 299,20 |
throws XmlException, IOException |
{ |
// FIXME not implemented |
|
|
return true; |
} |
|
|
private boolean parseName(XmlSelection sel) |
throws XmlException, IOException |
{ |
saveSelBegin(sel); |
int start = bufPos; |
|
|
if(bufPos >= bufLen && !ensureNext(1)) { |
return false; |
} |
|
|
byte c = buf[bufPos]; |
if(('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_' || c == ':') { |
++bufPos; |
321,12 → 321,12 |
bufPos = start; |
return false; |
} |
|
|
for(;;) { |
if(bufPos >= bufLen && !ensureNext(1)) { |
throwException("unexpected EoF"); |
} |
|
|
c = buf[bufPos]; |
if(('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') |
|| c == '.' || c == '-' || c == '_' || c == ':') |
338,7 → 338,7 |
} |
} |
saveSelEnd(sel); |
|
|
return true; |
} |
|
348,7 → 348,7 |
if(bufPos >= bufLen && !ensureNext(1)) { |
return false; |
} |
|
|
if(buf[bufPos] != c) { |
return false; |
} |
365,7 → 365,7 |
if(!parseName(selName)) { |
return false; |
} |
|
|
// eq |
skipSpaces(); |
if(!testChar('=')) { |
372,20 → 372,20 |
throwException("equal sign expected"); |
} |
skipSpaces(); |
|
|
// FIXME allow 'Reference' here |
|
|
// value |
if(!testChar('"')) { |
throwException("quoted string expected"); |
} |
|
|
saveSelBegin(selValue); |
for(;; ++bufPos) { |
if(bufPos >= bufLen && !ensureNext(1)) { |
throwException("unexpected EoF"); |
} |
|
|
byte c = buf[bufPos]; |
if(c == '<' || c == '&' || c == '"') { |
break; |
392,13 → 392,13 |
} |
} |
saveSelEnd(selValue); |
|
|
if(!testChar('"')) { |
throwException("end of quoted string expected"); |
} |
|
|
// FIXME check '[WFC: No External Entity References]' |
|
|
return true; |
} |
|
428,17 → 428,17 |
if(buf[bufPos] != '<' || buf[bufPos+1] != '!' || buf[bufPos+2] != '-' || buf[bufPos+3] != '-') { |
return false; |
} |
|
|
bufPos += 4; |
for(;; ++bufPos) { |
if(bufPos+2 >= bufLen && !ensureNext(3)) { |
throwException("unexpected EoF"); |
} |
|
|
if(buf[bufPos] == '-' && buf[bufPos+1] == '-') { |
if(buf[bufPos+2] == '>') { |
bufPos += 3; |
|
|
return true; |
} |
else { |
457,7 → 457,7 |
if(buf[bufPos] != '<' || buf[bufPos+1] != '?') { |
return false; |
} |
|
|
bufPos += 2; |
for(;; ++bufPos) { |
if(bufPos+1 >= bufLen && !ensureNext(2)) { |
466,7 → 466,7 |
|
if(buf[bufPos] == '?' && buf[bufPos+1] == '>') { |
bufPos += 2; |
|
|
return true; |
} |
} |
478,33 → 478,33 |
XmlElement element = new XmlElement(); |
element.elementSel = new XmlSelection(); |
saveSelBegin(element.elementSel); |
|
|
if(!parseStartTag(element)) { |
return null; |
} |
|
++level; |
|
|
if(listener != null) listener.processElementBegin(element); |
|
|
if(!element.isEmpty) { |
if(!parseTagContent(element)) { |
throwException("cannot parse tag content"); |
} |
|
|
XmlSelection selEndName = new XmlSelection(); |
parseEndTag(selEndName); |
|
|
//if(!selectionToString(element.nameSel).equals(selectionToString(selEndName))) { |
// throwException("tag names do not match"); |
//} |
} |
|
|
saveSelEnd(element.elementSel); |
if(listener != null) listener.processElementEnd(element); |
|
|
--level; |
|
|
return element; |
} |
|
517,15 → 517,15 |
if(!testChar('<')) { |
return false; |
} |
|
|
// name |
element.nameSel = new XmlSelection(); |
if(!parseName(element.nameSel)) { |
bufPos = start; |
|
|
return false; |
} |
|
|
// attributes |
XmlSelection selName = new XmlSelection(); |
XmlSelection selValue = new XmlSelection(); |
533,13 → 533,13 |
if(!skipSpaces()) break; |
if(!parseAttribute(selName, selValue)) break; |
} |
|
|
// end |
element.isEmpty = testChar('/'); |
if(!testChar('>')) { |
throwException("end of tag expected"); |
} |
|
|
return true; |
} |
|
550,15 → 550,15 |
if(!testChar('<') || !testChar('/')) { |
throwException("cannot find tag end"); |
} |
|
|
// name |
if(!parseName(sel)) { |
throwException("tag name expected"); |
} |
|
|
// spaces |
skipSpaces(); |
|
|
// end |
if(!testChar('>')) { |
throwException("end of tag expected"); |
574,7 → 574,7 |
throwException("unexpected EoF"); |
} |
if(buf[bufPos] == '<' && buf[bufPos+1] == '/') break; |
|
|
if(parseElement() != null) { |
} |
else if(parseComment()) { |
588,10 → 588,10 |
else { |
throwException("unexpected tag content"); |
} |
|
|
// FIXME allow 'Reference' here |
} |
|
|
return true; |
} |
|
599,23 → 599,23 |
throws XmlException, IOException |
{ |
// FIXME allow 'Reference' here |
|
|
boolean found = false; |
saveSelBegin(sel); |
|
|
for(;; ++bufPos) { |
if(bufPos >= bufLen && !ensureNext(1)) { |
throwException("unexpected EoF"); |
} |
|
|
byte c = buf[bufPos]; |
if(c == '<' || c == '&') { |
break; |
} |
|
|
found = true; |
} |
|
|
saveSelEnd(sel); |
if(listener != null) listener.processCharData(sel); |
|
628,7 → 628,7 |
if(bufPos+8 >= bufLen && !ensureNext(9)) { |
return false; |
} |
if(buf[bufPos] != '<' || buf[bufPos+1] != '!' |
if(buf[bufPos] != '<' || buf[bufPos+1] != '!' |
|| buf[bufPos+2] != '[' || buf[bufPos+3] != 'C' |
|| buf[bufPos+4] != 'D' || buf[bufPos+5] != 'A' |
|| buf[bufPos+6] != 'T' || buf[bufPos+7] != 'A' |
636,10 → 636,10 |
{ |
return false; |
} |
|
|
bufPos += 9; |
saveSelBegin(sel); |
|
|
for(;; ++bufPos) { |
if(bufPos+2 >= bufLen && !ensureNext(3)) { |
throwException("unexpected EoF"); |