Subversion Repositories general

Compare Revisions

Ignore whitespace Rev 1350 → Rev 1351

/xmlparser_java/branches/003_cycle_buffer/XmlParser.java
80,42 → 80,48
class XmlBuffer
{
private InputStream in;
private byte[] buf = new byte[2048]; // (buf.length % 16 == 0)
private int len;
private int pos;
private long offset;
private int marked = -1;
private byte[] buf = new byte[2048]; // (buf.length % 2 == 0)
private int size = buf.length;
private int half = size/2;
private long len = -1;
private long pos = 0;
private long marked = -1;
private boolean second = true; // which half of buffer is used
private boolean end = false;
 
public XmlBuffer(InputStream in)
throws IOException
{
if(buf.length % 16 != 0) {
if(buf.length % 2 != 0) {
throw new RuntimeException("wrong buffer size: " + buf.length);
}
this.in = in;
this.len = 0;
this.pos = 0;
this.offset = 0;
this.in = in;
ensure(0);
}
 
public byte cur()
throws XmlException, IOException
throws IOException
{
if(pos >= len) {
if(!ensureNext(1)) return 0;
int p = (int)(pos % size);
 
if(!second && p >= half || second && p < half) {
if(!ensure(p)) return 0;
}
 
return buf[pos];
return buf[p];
}
 
public byte at(int n)
throws XmlException, IOException
throws IOException
{
if(pos + n >= len) {
if(!ensureNext(n+1)) return 0;
int p = (int)((pos + n) % size);
 
if(!second && p >= half || second && p < half) {
if(!ensure(p)) return 0;
}
 
return buf[pos + n];
return buf[p];
}
 
public void toNext()
124,9 → 130,8
}
 
public boolean isEnd()
throws XmlException, IOException
{
return (pos >= len && !ensureNext(1));
return end;
}
 
public void skip(int n)
134,21 → 139,11
pos += n;
}
 
public long getOffset()
public long getPosition()
{
return offset;
}
 
public int getPosition()
{
return pos;
}
 
public long getAbsPosition()
{
return (offset + pos);
}
 
public void mark()
{
marked = pos;
165,44 → 160,60
}
}
 
public boolean ensureNext(int count)
private boolean ensure(int p)
throws IOException
{
if(pos + count >= len) {
//log("ensureNext start " + pos + " " + count);
if(len == 0) {
// read full buffer at begin
len = in.read(buf);
pos = 0;
}
else if(len < buf.length) {
// we could not fill full buffer last time - no more data
return false;
}
else {
// move last 1/16 of data to begin, fill rest with new data
System.arraycopy(buf, buf.length / 16 * 15, buf, 0, buf.length / 16);
int read = in.read(buf, buf.length / 16, buf.length / 16 * 15);
len = buf.length / 16 + read;
pos -= buf.length / 16 * 15;
if(marked >= 0) {
marked -= buf.length / 16 * 15;
if(marked < 0) marked = -1;
}
offset += buf.length / 16 * 15;
}
return (pos + count < len);
if(end) return false;
 
if(len >= 0 && (!second && p < half || second && p >= half)) {
return true;
}
 
if(len < 0) {
len = 0;
second = false;
}
else {
return true;
second = !second;
}
 
int read = in.read(buf, (second ? half : 0), half);
if(read < 0) return false;
 
if(marked >= 0 && marked < len - half) {
marked = -1;
}
 
len += read;
System.out.println("ensure " + len + " " + second);
 
return true;
}
public String toString(long begin, int length)
throws IOException
{
return new String(buf, (int)(begin - offset), length, "UTF-8");
if(begin < (len/half-1)*half) return "";
if(length > half) return "";
 
int p1 = (int)(begin % size);
int p2 = (int)((begin+length) % size);
 
if(p1 > p2) {
if(second) return "";
 
return new String(buf, p1, size, "UTF-8")
+ new String(buf, 0, p2, "UTF-8");
}
else if(p1 < half && p2 >= half) {
if(!second) return "";
 
return new String(buf, p1, half-p1, "UTF-8")
+ new String(buf, half, p2-half, "UTF-8");
}
else {
return new String(buf, p1, p2-p1, "UTF-8");
}
}
}
 
233,7 → 244,7
private void throwException(String message)
throws XmlException, IOException
{
throw new XmlException(message, line, /*linePos*/ buf.getAbsPosition());
throw new XmlException(message, line, /*linePos*/ buf.getPosition());
}
 
private void log(String message)
249,12 → 260,12
private void saveSelBegin(XmlSelection sel)
{
sel.begin = buf.getAbsPosition();
sel.begin = buf.getPosition();
}
private void saveSelEnd(XmlSelection sel)
{
sel.end = buf.getAbsPosition();
sel.end = buf.getPosition();
}
private boolean parseProlog()
272,7 → 283,7
private boolean skipSpaces()
throws XmlException, IOException
{
//log("skipSpaces begin " + bufPos);
//log("skipSpaces begin " + buf.getPosition());
boolean found = false;
for(;;) {
289,7 → 300,7
}
}
//log("skipSpaces " + found + " " + bufPos);
//log("skipSpaces " + found + " " + buf.getPosition());
return found;
}
 
296,13 → 307,13
private boolean parseDecl()
throws XmlException, IOException
{
//log("parseDecl begin " + bufPos);
//log("parseDecl begin " + buf.getPosition());
// begin
if(!testChar('<') || !testChar('?') || !testChar('x')
|| !testChar('m') || !testChar('l'))
{
//log("parseDecl no 'xml' " + bufPos);
//log("parseDecl no 'xml' " + buf.getPosition());
return false;
}
319,7 → 330,7
throwException("end of XML declaration expected");
}
//log("parseDecl ok " + bufPos);
//log("parseDecl ok " + buf.getPosition());
return true;
}
 
334,7 → 345,7
private boolean parseName(XmlSelection sel)
throws XmlException, IOException
{
//log("parseName begin " + bufPos);
//log("parseName begin " + buf.getPosition());
saveSelBegin(sel);
 
byte c = buf.cur();
342,7 → 353,7
buf.toNext();
}
else {
//log("parseName not a name " + bufPos);
//log("parseName not a name " + buf.getPosition());
return false;
}
363,7 → 374,7
}
saveSelEnd(sel);
//log("parseName ok " + bufPos);
//log("parseName ok " + buf.getPosition());
return true;
}
 
382,11 → 393,11
private boolean parseAttribute(XmlSelection selName, XmlSelection selValue)
throws XmlException, IOException
{
//log("parseAttribute begin " + bufPos);
//log("parseAttribute begin " + buf.getPosition());
// name
buf.mark();
if(!parseName(selName)) {
//log("parseAttribute no name " + bufPos);
//log("parseAttribute no name " + buf.getPosition());
buf.reset();
return false;
}
424,7 → 435,7
// FIXME check '[WFC: No External Entity References]'
//log("parseAttribute ok " + bufPos);
//log("parseAttribute ok " + buf.getPosition());
return true;
}
 
431,21 → 442,21
private boolean parseMisc()
throws XmlException, IOException
{
//log("parseMisc begin " + bufPos);
//log("parseMisc begin " + buf.getPosition());
if(parseComment()) {
//log("parseMisc comment ok " + bufPos);
//log("parseMisc comment ok " + buf.getPosition());
return true;
}
else if(parseProcessInstruction()) {
//log("parseMisc pi ok " + bufPos);
//log("parseMisc pi ok " + buf.getPosition());
return true;
}
else if(skipSpaces()) {
//log("parseMisc spaces ok " + bufPos);
//log("parseMisc spaces ok " + buf.getPosition());
return true;
}
else {
//log("parseMisc false " + bufPos);
//log("parseMisc false " + buf.getPosition());
return false;
}
}
453,9 → 464,9
private boolean parseComment()
throws XmlException, IOException
{
//log("parseComment begin " + bufPos);
//log("parseComment begin " + buf.getPosition());
if(buf.at(0) != '<' || buf.at(1) != '!' || buf.at(2) != '-' || buf.at(3) != '-') {
//log("parseComment no signature " + bufPos);
//log("parseComment no signature " + buf.getPosition());
return false;
}
468,7 → 479,7
if(buf.at(0) == '-' && buf.at(1) == '-') {
if(buf.at(2) == '>') {
buf.skip(3);
//log("parseComment ok " + bufPos);
//log("parseComment ok " + buf.getPosition());
return true;
}
else {
501,12 → 512,12
private XmlElement parseElement()
throws XmlException, IOException
{
//log("parseElement begin " + bufPos);
//log("parseElement begin " + buf.getPosition());
 
XmlElement element = new XmlElement();
if(!parseStartTag(element)) {
//log("parseElement no start tag " + bufPos);
//log("parseElement no start tag " + buf.getPosition());
return null;
}
522,7 → 533,7
// throwException("tag names do not match");
}
//log("parseElement ok " + bufPos);
//log("parseElement ok " + buf.getPosition());
return element;
}
 
529,10 → 540,10
private boolean parseStartTag(XmlElement element)
throws XmlException, IOException
{
//log("parseStartTag begin " + bufPos);
//log("parseStartTag begin " + buf.getPosition());
// begin
if(!testChar('<')) {
//log("parseStartTag no signature " + bufPos);
//log("parseStartTag no signature " + buf.getPosition());
return false;
}
543,7 → 554,7
buf.reset();
return false;
}
//System.out.print("[" + toString(sel) + "]");
System.out.print("[" + toString(sel) + "]");
// attributes
XmlSelection selName = new XmlSelection();
551,9 → 562,9
for(;;) {
if(!skipSpaces()) break;
if(!parseAttribute(selName, selValue)) break;
//System.out.print(" [" + toString(selName) + "]=[" + toString(selValue) + "]");
System.out.print(" [" + toString(selName) + "]=[" + toString(selValue) + "]");
}
//System.out.println();
System.out.println();
// end
element.isEmpty = testChar('/');
561,7 → 572,7
throwException("end of tag expected");
}
//log("parseStartTag ok " + bufPos);
//log("parseStartTag ok " + buf.getPosition());
return true;
}
 
568,7 → 579,7
private void parseEndTag(XmlSelection sel)
throws XmlException, IOException
{
//log("parseEndTag begin " + bufPos);
//log("parseEndTag begin " + buf.getPosition());
// begin
if(!testChar('<') || !testChar('/')) {
throwException("cannot find tag end");
587,13 → 598,13
throwException("end of tag expected");
}
//log("parseEndTag ok " + bufPos);
//log("parseEndTag ok " + buf.getPosition());
}
 
private boolean parseTagContent(XmlElement element)
throws XmlException, IOException
{
//log("parseTagContent begin " + bufPos);
//log("parseTagContent begin " + buf.getPosition());
XmlSelection sel = new XmlSelection();
for(;;) {
if(buf.isEnd()) {
615,7 → 626,7
// FIXME allow 'Reference' here
}
//log("parseTagContent ok " + bufPos);
//log("parseTagContent ok " + buf.getPosition());
return true;
}
 
623,7 → 634,7
private boolean parseCharData(XmlSelection sel)
throws XmlException, IOException
{
//log("parseCharData begin " + bufPos);
//log("parseCharData begin " + buf.getPosition());
boolean found = false;
saveSelBegin(sel);
636,7 → 647,7
found = true;
}
//log("parseCharData " + found + " " + bufPos);
//log("parseCharData " + found + " " + buf.getPosition());
saveSelEnd(sel);
return found;
}