Subversion Repositories general

Compare Revisions

No changes between revisions

Ignore whitespace Rev 1355 → Rev 1356

/xmlparser_java/branches/004_buffer_in_onw_class/XmlParser.java
File deleted
/xmlparser_java/branches/004_buffer_in_onw_class
Property changes:
Deleted: svn:ignore
-classes
/xmlparser_java/branches/004_buffer_in_own_class/XmlParser.java
0,0 → 1,714
/*
* input 2072160977 bytes; AMD Athlon 3100+, disk speed 38.0 MB/s
* 59.18s user 9.77s system 92% cpu 1:14.72 total; RES 9420K (empty java - 8200K)
*/
import java.io.*;
 
public class XmlParser
{
public static void main(String[] args)
throws Exception
{
if(args.length < 1) {
System.err.println("Need file name as parameter");
return;
}
 
long startTime = System.nanoTime();
 
FileInputStream in = new FileInputStream(args[0]);
 
try {
XmlListenerImpl listener = new XmlListenerImpl();
XmlDocument doc = new XmlDocument(listener);
 
doc.parse(in);
System.out.println(listener.count + " elements found");
}
catch(XmlException ex) {
System.out.println(ex);
}
finally {
in.close();
}
 
System.out.println("Elapsed: " + (System.nanoTime() - startTime) / 1000000 + "ms");
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlListenerImpl
implements XmlListener
{
public long count = 0;
 
private XmlDocument document;
 
public void init(XmlDocument document)
throws XmlException, IOException
{
this.document = document;
}
 
public void processElementBegin(XmlElement element)
throws XmlException, IOException
{
++count;
}
 
public void processElementEnd(XmlElement element)
throws XmlException, IOException
{
}
 
public void processCharData(XmlSelection sel)
throws XmlException, IOException
{
}
 
public void processCData(XmlSelection sel)
throws XmlException, IOException
{
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlException
extends Exception
{
static final long serialVersionUID = 1;
 
private long line;
private long linePos;
 
public XmlException(String message, long line, long linePos)
{
super(message);
 
this.line = line;
this.linePos = linePos;
}
 
public long getLine() { return line; }
 
public long getLinePos() { return linePos; }
 
public String toString()
{
return "Error: " + getMessage() + " at " + line + ":" + linePos;
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlSelection
{
public long begin;
public long end;
 
public long getLength() { return end - begin; }
}
 
// --------------------------------------------------------------------------------------------------------------------
interface XmlListener
{
public void init(XmlDocument document)
throws XmlException, IOException;
 
public void processElementBegin(XmlElement element)
throws XmlException, IOException;
 
public void processElementEnd(XmlElement element)
throws XmlException, IOException;
 
public void processCharData(XmlSelection sel)
throws XmlException, IOException;
 
public void processCData(XmlSelection sel)
throws XmlException, IOException;
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlElement
{
public boolean isEmpty = false;
public XmlSelection elementSel;
public XmlSelection nameSel;
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlBuffer
{
private InputStream in;
private byte[] buf = new byte[2048]; // (buf.length % 16 == 0)
private int len;
private int pos;
private long offset;
private int marked = -1;
 
public XmlBuffer(InputStream in)
{
if(buf.length % 16 != 0) {
throw new RuntimeException("wrong buffer size: " + buf.length);
}
 
this.in = in;
this.len = 0;
this.pos = 0;
this.offset = 0;
}
 
public byte cur()
throws XmlException, IOException
{
if(pos >= len) {
if(!ensureNext(1)) return 0;
}
 
return buf[pos];
}
 
public byte at(int n)
throws XmlException, IOException
{
if(pos + n >= len) {
if(!ensureNext(n+1)) return 0;
}
 
return buf[pos + n];
}
 
public void toNext()
{
++pos;
}
 
public boolean isEnd()
throws XmlException, IOException
{
return (pos >= len && !ensureNext(1));
}
 
public void skip(int n)
{
pos += n;
}
 
public long getPosition()
{
return (offset + pos);
}
 
public void mark()
{
marked = pos;
}
 
public void reset()
{
if(marked < 0) {
throw new RuntimeException("no position saved");
}
else {
pos = marked;
marked = -1;
}
}
 
public boolean ensureNext(int count)
throws IOException
{
if(pos + count >= len) {
if(len == 0) {
// read full buffer at begin
len = in.read(buf);
pos = 0;
}
else if(len < buf.length) {
// we could not fill full buffer last time - no more data
return false;
}
else {
// move last 1/16 of data to begin, fill rest with new data
System.arraycopy(buf, buf.length / 16 * 15, buf, 0, buf.length / 16);
int read = in.read(buf, buf.length / 16, buf.length / 16 * 15);
len = buf.length / 16 + read;
pos -= buf.length / 16 * 15;
if(marked >= 0) {
marked -= buf.length / 16 * 15;
if(marked < 0) marked = -1;
}
offset += buf.length / 16 * 15;
}
 
return (pos + count < len);
}
else {
return true;
}
}
 
public String toString(long begin, int length)
throws IOException
{
return new String(buf, (int)(begin - offset), length, "UTF-8");
}
}
 
// --------------------------------------------------------------------------------------------------------------------
class XmlDocument
{
private XmlListener listener;
private XmlBuffer buf;
private int level;
 
public XmlDocument(XmlListener listener)
{
this.listener = listener;
}
 
public void parse(InputStream in)
throws XmlException, IOException
{
this.buf = new XmlBuffer(in);
this.level = 0;
 
if(listener != null) listener.init(this);
 
if(parseProlog()) {
parseElement();
 
while(parseMisc());
 
if(!buf.isEnd()) throwException("EoF expected");
}
}
 
private void throwException(String message)
throws XmlException, IOException
{
throw new XmlException(message, 1, buf.getPosition());
}
 
private void log(String message)
{
System.out.println(message);
}
 
public String selectionToString(XmlSelection sel)
throws IOException
{
return buf.toString(sel.begin, (int)sel.getLength());
}
 
private void saveSelBegin(XmlSelection sel)
{
sel.begin = buf.getPosition();
}
 
private void saveSelEnd(XmlSelection sel)
{
sel.end = buf.getPosition();
}
 
private boolean parseProlog()
throws XmlException, IOException
{
parseDecl();
while(parseMisc());
if(parseDoctype()) {
while(parseMisc());
}
 
return true;
}
 
private boolean skipSpaces()
throws XmlException, IOException
{
boolean found = false;
 
for(;;) {
byte c = buf.cur();
if(c == 0) {
break;
}
else if(c == ' ' || c == '\t' || c == '\n' || c == '\r') {
found = true;
buf.toNext();
}
else {
break;
}
}
 
return found;
}
 
private boolean parseDecl()
throws XmlException, IOException
{
// begin
if(!testChar('<') || !testChar('?') || !testChar('x')
|| !testChar('m') || !testChar('l'))
{
return false;
}
 
// attributes
XmlSelection selName = new XmlSelection();
XmlSelection selValue = new XmlSelection();
for(;;) {
if(!skipSpaces()) break;
if(!parseAttribute(selName, selValue)) break;
}
 
// end
if(!testChar('?') || !testChar('>')) {
throwException("end of XML declaration expected");
}
 
return true;
}
 
private boolean parseDoctype()
throws XmlException, IOException
{
// FIXME not implemented
 
return true;
}
 
private boolean parseName(XmlSelection sel)
throws XmlException, IOException
{
saveSelBegin(sel);
 
byte c = buf.cur();
if(('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_' || c == ':') {
buf.toNext();
}
else {
return false;
}
 
for(;;) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
 
c = buf.cur();
if(('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9')
|| c == '.' || c == '-' || c == '_' || c == ':')
{
buf.toNext();
}
else {
break;
}
}
saveSelEnd(sel);
 
return true;
}
 
private boolean testChar(char c)
throws XmlException, IOException
{
if(buf.cur() != c) {
return false;
}
else {
buf.toNext();
return true;
}
}
 
private boolean parseAttribute(XmlSelection selName, XmlSelection selValue)
throws XmlException, IOException
{
// name
buf.mark();
if(!parseName(selName)) {
buf.reset();
return false;
}
 
// eq
skipSpaces();
if(!testChar('=')) {
throwException("equal sign expected");
}
skipSpaces();
 
// FIXME allow 'Reference' here
 
// value
if(!testChar('"')) {
throwException("quoted string expected");
}
 
saveSelBegin(selValue);
for(;; buf.toNext()) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
 
byte c = buf.cur();
if(c == '<' || c == '&' || c == '"') {
break;
}
}
saveSelEnd(selValue);
 
if(!testChar('"')) {
throwException("end of quoted string expected");
}
 
// FIXME check '[WFC: No External Entity References]'
 
return true;
}
 
private boolean parseMisc()
throws XmlException, IOException
{
if(parseComment()) {
return true;
}
else if(parseProcessInstruction()) {
return true;
}
else if(skipSpaces()) {
return true;
}
else {
return false;
}
}
 
private boolean parseComment()
throws XmlException, IOException
{
if(buf.at(0) != '<' || buf.at(1) != '!' || buf.at(2) != '-' || buf.at(3) != '-') {
return false;
}
 
buf.skip(4);
for(;; buf.toNext()) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
 
if(buf.at(0) == '-' && buf.at(1) == '-') {
if(buf.at(2) == '>') {
buf.skip(3);
 
return true;
}
else {
throwException("Sequence '--' is not allowed in comment");
}
}
}
}
 
private boolean parseProcessInstruction()
throws XmlException, IOException
{
if(buf.at(0) != '<' || buf.at(1) != '?') {
return false;
}
 
buf.skip(2);
for(;; buf.toNext()) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
 
if(buf.at(0) == '?' && buf.at(1) == '>') {
buf.skip(2);
 
return true;
}
}
}
 
private XmlElement parseElement()
throws XmlException, IOException
{
XmlElement element = new XmlElement();
element.elementSel = new XmlSelection();
saveSelBegin(element.elementSel);
 
if(!parseStartTag(element)) {
return null;
}
 
++level;
 
if(listener != null) listener.processElementBegin(element);
 
if(!element.isEmpty) {
if(!parseTagContent(element)) {
throwException("cannot parse tag content");
}
 
XmlSelection selEndName = new XmlSelection();
parseEndTag(selEndName);
 
//if(!selectionToString(element.nameSel).equals(selectionToString(selEndName))) {
// throwException("tag names do not match");
//}
}
 
saveSelEnd(element.elementSel);
if(listener != null) listener.processElementEnd(element);
 
--level;
 
return element;
}
 
private boolean parseStartTag(XmlElement element)
throws XmlException, IOException
{
// begin
if(!testChar('<')) {
return false;
}
 
// name
buf.mark();
element.nameSel = new XmlSelection();
if(!parseName(element.nameSel)) {
buf.reset();
 
return false;
}
 
// attributes
XmlSelection selName = new XmlSelection();
XmlSelection selValue = new XmlSelection();
for(;;) {
if(!skipSpaces()) break;
if(!parseAttribute(selName, selValue)) break;
}
 
// end
element.isEmpty = testChar('/');
if(!testChar('>')) {
throwException("end of tag expected");
}
 
return true;
}
 
private void parseEndTag(XmlSelection sel)
throws XmlException, IOException
{
// begin
if(!testChar('<') || !testChar('/')) {
throwException("cannot find tag end");
}
 
// name
if(!parseName(sel)) {
throwException("tag name expected");
}
 
// spaces
skipSpaces();
 
// end
if(!testChar('>')) {
throwException("end of tag expected");
}
}
 
private boolean parseTagContent(XmlElement element)
throws XmlException, IOException
{
XmlSelection sel = new XmlSelection();
for(;;) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
if(buf.at(0) == '<' && buf.at(1) == '/') break;
 
if(parseElement() != null) {
}
else if(parseComment()) {
}
else if(parseCData(sel)) {
}
else if(parseProcessInstruction()) {
}
else if(parseCharData(sel)) {
}
else {
throwException("unexpected tag content");
}
 
// FIXME allow 'Reference' here
}
 
return true;
}
 
private boolean parseCharData(XmlSelection sel)
throws XmlException, IOException
{
// FIXME allow 'Reference' here
 
boolean found = false;
saveSelBegin(sel);
 
for(;; buf.toNext()) {
byte c = buf.cur();
if(buf.isEnd()) {
throwException("unexpected EoF");
}
 
if(c == '<' || c == '&') {
break;
}
 
found = true;
}
 
saveSelEnd(sel);
if(listener != null) listener.processCharData(sel);
 
return found;
}
 
private boolean parseCData(XmlSelection sel)
throws XmlException, IOException
{
if(buf.at(0) != '<' || buf.at(1) != '!'
|| buf.at(2) != '[' || buf.at(3) != 'C'
|| buf.at(4) != 'D' || buf.at(5) != 'A'
|| buf.at(6) != 'T' || buf.at(7) != 'A'
|| buf.at(8) != '[')
{
return false;
}
 
buf.skip(9);
saveSelBegin(sel);
 
for(;; buf.toNext()) {
if(buf.isEnd()) {
throwException("unexpected EoF");
}
 
if(buf.at(0) == ']' && buf.at(1) == ']' && buf.at(2) == '>') {
saveSelEnd(sel);
buf.skip(3);
if(listener != null) listener.processCData(sel);
 
return true;
}
}
}
}
/xmlparser_java/branches/004_buffer_in_own_class
Property changes:
Added: svn:ignore
+classes