From 70685bd233d5c32b9f762a5fc0e4d7fa42a013ee Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Wed, 11 Jul 2012 23:20:48 +0900 Subject: [PATCH] Rewritten Xml2XmlParser Now it is push parser + utility function for file parsing. --- src/Xml2XmlParser.cc | 147 ++++++++++++++++++++++------------------- src/Xml2XmlParser.h | 41 ++++++++++-- src/metalink_helper.cc | 21 +++++- src/rpc_helper.cc | 2 +- 4 files changed, 136 insertions(+), 75 deletions(-) diff --git a/src/Xml2XmlParser.cc b/src/Xml2XmlParser.cc index 58c6f1abb..196eeffc1 100644 --- a/src/Xml2XmlParser.cc +++ b/src/Xml2XmlParser.cc @@ -2,7 +2,7 @@ /* * aria2 - The high speed download utility * - * Copyright (C) 2011 Tatsuhiro Tsujikawa + * Copyright (C) 2012 Tatsuhiro Tsujikawa * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,28 +36,17 @@ #include #include -#include - -#include #include "a2io.h" -#include "BinaryStream.h" #include "ParserStateMachine.h" #include "A2STR.h" #include "a2functional.h" #include "XmlAttr.h" +#include "util.h" namespace aria2 { -namespace { -struct SessionData { - std::deque charactersStack_; - ParserStateMachine* psm_; - SessionData(ParserStateMachine* psm) - : psm_(psm) - {} -}; -} // namespace +namespace xml { namespace { void mlStartElement @@ -88,13 +77,13 @@ void mlStartElement xmlAttr.valueLength = pattrs[i+4]-xmlAttr.value; xmlAttrs.push_back(xmlAttr); } - sd->psm_->beginElement + sd->psm->beginElement (reinterpret_cast(localname), reinterpret_cast(prefix), reinterpret_cast(nsUri), xmlAttrs); - if(sd->psm_->needsCharactersBuffering()) { - sd->charactersStack_.push_front(A2STR::NIL); + if(sd->psm->needsCharactersBuffering()) { + sd->charactersStack.push_front(A2STR::NIL); } } } // namespace @@ -108,11 +97,11 @@ void mlEndElement { SessionData* sd = reinterpret_cast(userData); std::string characters; - if(sd->psm_->needsCharactersBuffering()) { - characters = sd->charactersStack_.front(); - sd->charactersStack_.pop_front(); + if(sd->psm->needsCharactersBuffering()) { + characters = sd->charactersStack.front(); + sd->charactersStack.pop_front(); } - sd->psm_->endElement + sd->psm->endElement (reinterpret_cast(localname), reinterpret_cast(prefix), reinterpret_cast(nsUri), @@ -124,8 +113,8 @@ namespace { void mlCharacters(void* userData, const xmlChar* ch, int len) { SessionData* sd = reinterpret_cast(userData); - if(sd->psm_->needsCharactersBuffering()) { - sd->charactersStack_.front().append(&ch[0], &ch[len]); + if(sd->psm->needsCharactersBuffering()) { + sd->charactersStack.front().append(&ch[0], &ch[len]); } } } // namespace @@ -169,61 +158,85 @@ xmlSAXHandler mySAXHandler = } // namespace XmlParser::XmlParser(ParserStateMachine* psm) - : psm_(psm) + : psm_(psm), + sessionData_(psm), + ctx_(xmlCreatePushParserCtxt(&mySAXHandler, &sessionData_, 0, 0, 0)), + lastError_(0) {} -XmlParser::~XmlParser() {} - -bool XmlParser::parseFile(const char* filename) +XmlParser::~XmlParser() { - SessionData sessionData(psm_); - // Old libxml2(at least 2.7.6, Ubuntu 10.04LTS) does not read stdin - // when "/dev/stdin" is passed as filename while 2.7.7 does. So we - // convert DEV_STDIN to "-" for compatibility. - const char* nfilename; - if(strcmp(filename, DEV_STDIN) == 0) { - nfilename = "-"; - } else { - nfilename = filename; - } - int r = xmlSAXUserParseFile(&mySAXHandler, &sessionData, nfilename); - return r == 0 && psm_->finished(); + xmlFreeParserCtxt(ctx_); } -bool XmlParser::parseBinaryStream(BinaryStream* bs) +ssize_t XmlParser::parseUpdate(const char* data, size_t size) { - const size_t bufSize = 4096; - unsigned char buf[bufSize]; - ssize_t res = bs->readData(buf, 4, 0); - if(res != 4) { - return false; + if(lastError_ != 0) { + return lastError_; } - SessionData sessionData(psm_); - xmlParserCtxtPtr ctx = xmlCreatePushParserCtxt - (&mySAXHandler, &sessionData, - reinterpret_cast(buf), res, 0); - auto_delete deleter(ctx, xmlFreeParserCtxt); - off_t readOffset = res; - while(1) { - ssize_t res = bs->readData(buf, bufSize, readOffset); - if(res == 0) { - break; - } - if(xmlParseChunk(ctx, reinterpret_cast(buf), res, 0) != 0) { - // TODO we need this? Just break is not suffice? + int rv = xmlParseChunk(ctx_, data, size, 0); + if(rv != 0) { + return lastError_ = ERR_XML_PARSE; + } else { + return size; + } +} + +ssize_t XmlParser::parseFinal(const char* data, size_t size) +{ + if(lastError_ != 0) { + return lastError_; + } + int rv = xmlParseChunk(ctx_, data, size, 1); + if(rv != 0) { + return lastError_ = ERR_XML_PARSE; + } else { + return size; + } +} + +int XmlParser::reset() +{ + // TODO psm must be reset + sessionData_.reset(); + int rv = xmlCtxtResetPush(ctx_, 0, 0, 0, 0); + if(rv != 0) { + return lastError_ = ERR_RESET; + } else { + return 0; + } +} + +bool parseFile(const std::string& filename, ParserStateMachine* psm) +{ + int fd; + if(filename == DEV_STDIN) { + fd = STDIN_FILENO; + } else { + while((fd = a2open(utf8ToWChar(filename).c_str(), + O_BINARY | O_RDONLY, OPEN_MODE)) == -1 && fd != EINTR); + if(fd == -1) { return false; } - readOffset += res; } - xmlParseChunk(ctx, reinterpret_cast(buf), 0, 1); - return psm_->finished(); + XmlParser ps(psm); + char buf[4096]; + ssize_t nread; + bool retval = true; + while((nread = read(fd, buf, sizeof(buf))) > 0) { + if(ps.parseUpdate(buf, nread) < 0) { + retval = false; + break; + } + } + if(nread == 0 && retval) { + if(ps.parseFinal(0, 0) < 0) { + retval = false; + } + } + return retval; } -bool XmlParser::parseMemory(const char* xml, size_t len) -{ - SessionData sessionData(psm_); - int r = xmlSAXUserParseMemory(&mySAXHandler, &sessionData, xml, len); - return r == 0 && psm_->finished(); -} +} // namespace xml } // namespace aria2 diff --git a/src/Xml2XmlParser.h b/src/Xml2XmlParser.h index 66cc1a8be..c608b6c76 100644 --- a/src/Xml2XmlParser.h +++ b/src/Xml2XmlParser.h @@ -2,7 +2,7 @@ /* * aria2 - The high speed download utility * - * Copyright (C) 2011 Tatsuhiro Tsujikawa + * Copyright (C) 2012 Tatsuhiro Tsujikawa * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -37,25 +37,56 @@ #include "common.h" +#include + #include +#include +#include + +#include namespace aria2 { -class BinaryStream; class ParserStateMachine; +namespace xml { + +enum XmlError { + ERR_XML_PARSE = -1, + ERR_RESET = -2 +}; + +struct SessionData { + std::deque charactersStack; + ParserStateMachine* psm; + SessionData(ParserStateMachine* psm) + : psm(psm) + {} + void reset() + { + charactersStack.clear(); + } +}; + class XmlParser { public: // This object does not delete psm. XmlParser(ParserStateMachine* psm); ~XmlParser(); - bool parseFile(const char* filename); - bool parseBinaryStream(BinaryStream* binaryStream); - bool parseMemory(const char* xml, size_t size); + ssize_t parseUpdate(const char* data, size_t size); + ssize_t parseFinal(const char* data, size_t size); + int reset(); private: ParserStateMachine* psm_; + SessionData sessionData_; + xmlParserCtxtPtr ctx_; + int lastError_; }; +bool parseFile(const std::string& filename, ParserStateMachine* psm); + +} // namespace xml + } // namespace aria2 #endif // D_XML2_XML_PARSER_H diff --git a/src/metalink_helper.cc b/src/metalink_helper.cc index 8ac10d598..639d98e0d 100644 --- a/src/metalink_helper.cc +++ b/src/metalink_helper.cc @@ -125,7 +125,7 @@ SharedHandle parseFile { MetalinkParserStateMachine psm; psm.setBaseUri(baseUri); - if(!XmlParser(&psm).parseFile(filename.c_str())) { + if(!xml::parseFile(filename, &psm)) { throw DL_ABORT_EX2("Could not parse Metalink XML document.", error_code::METALINK_PARSE_ERROR); } @@ -142,7 +142,24 @@ SharedHandle parseBinaryStream { MetalinkParserStateMachine psm; psm.setBaseUri(baseUri); - if(!XmlParser(&psm).parseBinaryStream(bs)) { + xml::XmlParser ps(&psm); + unsigned char buf[4096]; + ssize_t nread; + off_t offread = 0; + bool retval = true; + while((nread = bs->readData(buf, sizeof(buf), offread)) > 0) { + if(ps.parseUpdate(reinterpret_cast(buf), nread) < 0) { + retval = false; + break; + } + offread += nread; + } + if(nread == 0 && retval) { + if(ps.parseFinal(0, 0) < 0) { + retval = false; + } + } + if(!retval) { throw DL_ABORT_EX2("Could not parse Metalink XML document.", error_code::METALINK_PARSE_ERROR); } diff --git a/src/rpc_helper.cc b/src/rpc_helper.cc index 054c7fbaa..2d5b1ef08 100644 --- a/src/rpc_helper.cc +++ b/src/rpc_helper.cc @@ -53,7 +53,7 @@ namespace rpc { RpcRequest xmlParseMemory(const char* xml, size_t size) { XmlRpcRequestParserStateMachine psm; - if(!XmlParser(&psm).parseMemory(xml, size)) { + if(xml::XmlParser(&psm).parseFinal(xml, size) < 0) { throw DL_ABORT_EX(MSG_CANNOT_PARSE_XML_RPC_REQUEST); } SharedHandle params;