Subversion Repositories general

Compare Revisions

Ignore whitespace Rev 1353 → Rev 1354

/xmlparser_java/branches/001_buffer_move/XmlParser.java
13,15 → 13,15
System.err.println("Need file name as parameter");
return;
}
 
long startTime = System.nanoTime();
 
FileInputStream in = new FileInputStream(args[0]);
 
try {
XmlListenerImpl listener = new XmlListenerImpl();
XmlDocument doc = new XmlDocument(listener);
 
doc.parse(in);
System.out.println(listener.count + " elements found");
}
41,31 → 41,31
implements XmlListener
{
public long count = 0;
 
private XmlDocument document;
 
public void init(XmlDocument document)
throws XmlException, IOException
{
this.document = document;
}
 
public void processElementBegin(XmlElement element)
throws XmlException, IOException
{
++count;
}
 
public void processElementEnd(XmlElement element)
throws XmlException, IOException
{
}
 
public void processCharData(XmlSelection sel)
throws XmlException, IOException
{
}
 
public void processCData(XmlSelection sel)
throws XmlException, IOException
{
80,19 → 80,19
 
private long line;
private long linePos;
 
public XmlException(String message, long line, long linePos)
{
super(message);
 
this.line = line;
this.linePos = linePos;
}
 
public long getLine() { return line; }
 
public long getLinePos() { return linePos; }
 
public String toString()
{
return "Error: " + getMessage() + " at " + line + ":" + linePos;
104,7 → 104,7
{
public long begin;
public long end;
 
public long getLength() { return end - begin; }
}
 
113,18 → 113,18
{
public void init(XmlDocument document)
throws XmlException, IOException;
 
public void processElementBegin(XmlElement element)
throws XmlException, IOException;
 
public void processElementEnd(XmlElement element)
throws XmlException, IOException;
 
public void processCharData(XmlSelection sel)
throws XmlException, IOException;
throws XmlException, IOException;
 
public void processCData(XmlSelection sel)
throws XmlException, IOException;
throws XmlException, IOException;
}
 
// --------------------------------------------------------------------------------------------------------------------
150,7 → 150,7
{
this.listener = listener;
}
 
public void parse(InputStream in)
throws XmlException, IOException
{
157,7 → 157,7
if(buf.length % 16 != 0) {
throwException("wrong buffer size: " + buf.length);
}
 
this.in = in;
this.bufLen = 0;
this.bufPos = 0;
165,10 → 165,10
this.level = 0;
 
if(listener != null) listener.init(this);
 
if(parseProlog()) {
parseElement();
 
while(parseMisc());
 
if(bufPos < bufLen) throwException("EoF expected");
191,22 → 191,22
{
return new String(buf, (int)(sel.begin - bufOffset), (int)sel.getLength(), "UTF-8");
}
 
public int getLevel()
{
return level;
}
 
private void saveSelBegin(XmlSelection sel)
{
sel.begin = bufOffset + bufPos;
}
 
private void saveSelEnd(XmlSelection sel)
{
sel.end = bufOffset + bufPos;
}
 
private boolean ensureNext(int count)
throws XmlException, IOException
{
228,7 → 228,7
bufPos -= buf.length / 16 * 15;
bufOffset += buf.length / 16 * 15;
}
 
return (bufPos + count < bufLen);
}
else {
235,7 → 235,7
return true;
}
}
 
private boolean parseProlog()
throws XmlException, IOException
{
252,10 → 252,10
throws XmlException, IOException
{
boolean found = false;
 
for(;;) {
if(bufPos >= bufLen && !ensureNext(1)) break;
 
byte c = buf[bufPos];
if(c == ' ' || c == '\t' || c == '\n' || c == '\r') {
found = true;
265,7 → 265,7
break;
}
}
 
return found;
}
 
278,7 → 278,7
{
return false;
}
 
// attributes
XmlSelection selName = new XmlSelection();
XmlSelection selValue = new XmlSelection();
291,7 → 291,7
if(!testChar('?') || !testChar('>')) {
throwException("end of XML declaration expected");
}
 
return true;
}
 
299,20 → 299,20
throws XmlException, IOException
{
// FIXME not implemented
 
return true;
}
 
private boolean parseName(XmlSelection sel)
throws XmlException, IOException
{
saveSelBegin(sel);
int start = bufPos;
 
if(bufPos >= bufLen && !ensureNext(1)) {
return false;
}
 
byte c = buf[bufPos];
if(('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_' || c == ':') {
++bufPos;
321,12 → 321,12
bufPos = start;
return false;
}
 
for(;;) {
if(bufPos >= bufLen && !ensureNext(1)) {
throwException("unexpected EoF");
}
 
c = buf[bufPos];
if(('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9')
|| c == '.' || c == '-' || c == '_' || c == ':')
338,7 → 338,7
}
}
saveSelEnd(sel);
 
return true;
}
 
348,7 → 348,7
if(bufPos >= bufLen && !ensureNext(1)) {
return false;
}
 
if(buf[bufPos] != c) {
return false;
}
365,7 → 365,7
if(!parseName(selName)) {
return false;
}
 
// eq
skipSpaces();
if(!testChar('=')) {
372,20 → 372,20
throwException("equal sign expected");
}
skipSpaces();
 
// FIXME allow 'Reference' here
 
// value
if(!testChar('"')) {
throwException("quoted string expected");
}
 
saveSelBegin(selValue);
for(;; ++bufPos) {
if(bufPos >= bufLen && !ensureNext(1)) {
throwException("unexpected EoF");
}
 
byte c = buf[bufPos];
if(c == '<' || c == '&' || c == '"') {
break;
392,13 → 392,13
}
}
saveSelEnd(selValue);
 
if(!testChar('"')) {
throwException("end of quoted string expected");
}
 
// FIXME check '[WFC: No External Entity References]'
 
return true;
}
 
428,17 → 428,17
if(buf[bufPos] != '<' || buf[bufPos+1] != '!' || buf[bufPos+2] != '-' || buf[bufPos+3] != '-') {
return false;
}
 
bufPos += 4;
for(;; ++bufPos) {
if(bufPos+2 >= bufLen && !ensureNext(3)) {
throwException("unexpected EoF");
}
 
if(buf[bufPos] == '-' && buf[bufPos+1] == '-') {
if(buf[bufPos+2] == '>') {
bufPos += 3;
 
return true;
}
else {
457,7 → 457,7
if(buf[bufPos] != '<' || buf[bufPos+1] != '?') {
return false;
}
 
bufPos += 2;
for(;; ++bufPos) {
if(bufPos+1 >= bufLen && !ensureNext(2)) {
466,7 → 466,7
 
if(buf[bufPos] == '?' && buf[bufPos+1] == '>') {
bufPos += 2;
 
return true;
}
}
478,33 → 478,33
XmlElement element = new XmlElement();
element.elementSel = new XmlSelection();
saveSelBegin(element.elementSel);
 
if(!parseStartTag(element)) {
return null;
}
 
++level;
 
if(listener != null) listener.processElementBegin(element);
 
if(!element.isEmpty) {
if(!parseTagContent(element)) {
throwException("cannot parse tag content");
}
 
XmlSelection selEndName = new XmlSelection();
parseEndTag(selEndName);
 
//if(!selectionToString(element.nameSel).equals(selectionToString(selEndName))) {
// throwException("tag names do not match");
//}
}
 
saveSelEnd(element.elementSel);
if(listener != null) listener.processElementEnd(element);
 
--level;
 
return element;
}
 
517,15 → 517,15
if(!testChar('<')) {
return false;
}
 
// name
element.nameSel = new XmlSelection();
if(!parseName(element.nameSel)) {
bufPos = start;
 
return false;
}
 
// attributes
XmlSelection selName = new XmlSelection();
XmlSelection selValue = new XmlSelection();
533,13 → 533,13
if(!skipSpaces()) break;
if(!parseAttribute(selName, selValue)) break;
}
 
// end
element.isEmpty = testChar('/');
if(!testChar('>')) {
throwException("end of tag expected");
}
 
return true;
}
 
550,15 → 550,15
if(!testChar('<') || !testChar('/')) {
throwException("cannot find tag end");
}
 
// name
if(!parseName(sel)) {
throwException("tag name expected");
}
 
// spaces
skipSpaces();
 
// end
if(!testChar('>')) {
throwException("end of tag expected");
574,7 → 574,7
throwException("unexpected EoF");
}
if(buf[bufPos] == '<' && buf[bufPos+1] == '/') break;
 
if(parseElement() != null) {
}
else if(parseComment()) {
588,10 → 588,10
else {
throwException("unexpected tag content");
}
 
// FIXME allow 'Reference' here
}
 
return true;
}
 
599,23 → 599,23
throws XmlException, IOException
{
// FIXME allow 'Reference' here
 
boolean found = false;
saveSelBegin(sel);
 
for(;; ++bufPos) {
if(bufPos >= bufLen && !ensureNext(1)) {
throwException("unexpected EoF");
}
 
byte c = buf[bufPos];
if(c == '<' || c == '&') {
break;
}
 
found = true;
}
 
saveSelEnd(sel);
if(listener != null) listener.processCharData(sel);
 
628,7 → 628,7
if(bufPos+8 >= bufLen && !ensureNext(9)) {
return false;
}
if(buf[bufPos] != '<' || buf[bufPos+1] != '!'
if(buf[bufPos] != '<' || buf[bufPos+1] != '!'
|| buf[bufPos+2] != '[' || buf[bufPos+3] != 'C'
|| buf[bufPos+4] != 'D' || buf[bufPos+5] != 'A'
|| buf[bufPos+6] != 'T' || buf[bufPos+7] != 'A'
636,10 → 636,10
{
return false;
}
 
bufPos += 9;
saveSelBegin(sel);
 
for(;; ++bufPos) {
if(bufPos+2 >= bufLen && !ensureNext(3)) {
throwException("unexpected EoF");