/* CXml2Txt implementation Last modified: 14.12.03 */ #include "xml2txt.h" CXml2Txt::CXml2Txt() : m_fin(NULL), m_fout(NULL), m_sSrc(""), m_sDst(""), m_tags_per_part(0), m_pause(false), m_verbose(false), m_ch(0), m_last_ch(0), m_line_cnt(1), m_state(eText) { m_szError[0] = '\0'; m_buf[0] = '\0'; m_idx = m_max = -1; } CXml2Txt::~CXml2Txt() { if(m_fin) fclose(m_fin); if(m_fout) fclose(m_fout); } void CXml2Txt::SetPause(bool flag /* = true */) { m_pause= flag; } void CXml2Txt::BeVerbose(bool flag /* = true */) { m_verbose = flag; } void CXml2Txt::SplitOutput(unsigned long n_tags /* = 0 */) { m_tags_per_part = n_tags; } void CXml2Txt::Open(string sSrc, string sDst) { CXml2Txt::Close(); if( (m_fin = fopen(sSrc.c_str(), "rb")) == NULL) { throw Error("Can't open source file."); } if( (m_fout = fopen(sDst.c_str(), "wb")) == NULL) { throw Error("Can't create destination file."); } // Save filenames: m_sSrc = sSrc; m_sDst = sDst; } unsigned long CXml2Txt::Convert(const char sep /* = '\t' */) { m_sep = sep; while( true ) { CXml2Txt::GetNext(); switch(m_state) { case eText: CXml2Txt::ParseText(); break; case eTag: CXml2Txt::ParseTag(); break; case eError: throw(Error(m_szError)); case eEOF: CXml2Txt::On_EndOfFile(); return m_line_cnt - 1; } } } void CXml2Txt::Close(f_close f /* = f_close::ALL */) { if( m_fin && (f == ALL || f == SRC) ) { //m_sSrc = ""; fclose(m_fin); m_fin = NULL; } if( m_fout && (f == ALL || f == DST) ) { //m_sDst = ""; fclose(m_fout); m_fout = NULL; } } void CXml2Txt::On_EndOfFile() { if( ferror(m_fin) ) printf("\nError while reading data from source file."); else printf("\nAll data was successfully read."); if( ferror(m_fout) ) printf("\nError while writing data to destination file."); else printf("\nAll data was successfully written."); } inline char CXml2Txt::GetNext() { m_last_ch = m_ch; if( m_idx == m_max ) { if( (m_max = fread(m_buf, 1, STREAM_BUF_SIZE, m_fin)) < 1 || ferror(m_fout) ) ChangeState(eEOF); else { m_idx = 0; // Progress: if( m_verbose ) { fpos_t pos; fgetpos(m_fin, &pos); printf("\rConverting: %-I64u bytes complete.", pos); } } } m_ch = m_buf[m_idx++]; return m_ch; /* // No buffer: m_last_ch = m_ch; if ( (m_ch = fgetc(m_fin)) == EOF) { ChangeState(eEOF); } return m_ch; */ } void CXml2Txt::ChangeState(state eNewState) { m_prev_state = m_state; m_state = eNewState; } void CXml2Txt::ParseText() { switch(m_ch) { case '<': ChangeState(eTag); break; case '\n': m_line_cnt++; break; default: break; } // Skip other chars. } void CXml2Txt::ParseTag() { tag tTag; attrib pAttrib; ParseTagName(tTag.sName); while( m_state == eTag ) { pAttrib.sName = ""; ParseTagAttribName(pAttrib.sName); if(m_state != eTag) break; pAttrib.sValue = ""; ParseTagAttribVal(pAttrib.sValue); if(m_state != eTag) break; tTag.lAttribList.push_back(pAttrib); ParseTagEnd(); } AnalyzeTag(tTag); if( !tTag.lAttribList.empty() ) tTag.lAttribList.erase(tTag.lAttribList.begin(), tTag.lAttribList.end()); } void CXml2Txt::ParseTagName(string &sTagName) { unsigned int chars = 0; // Skip spaces: while( m_ch == ' ' && m_state == eTag ) CXml2Txt::GetNext(); while( m_state == eTag ) { switch(m_ch) { case '>': ChangeState(eText); case ' ': return; case '\n': m_line_cnt++; break; } if( m_ch != '\r' && m_ch != '\n' ) { if( chars < MAX_TAG_NAME ) { if( m_ch == m_sep) { //printf("\nWarning: separator in tag name. Skipped."); } else { sTagName += m_ch; chars++; } } else { ChangeState(eError); _snprintf(m_szError, ERROR_BUF_SIZE, "Increase maximum tag name buffer. [%u]", chars); m_szError[ERROR_BUF_SIZE - 1] = '\0'; return; } } CXml2Txt::GetNext(); } } void CXml2Txt::ParseTagAttribName(string &sTagAttribName) { unsigned int chars = 0; // Skip spaces: while( m_ch == ' ' && m_state == eTag ) CXml2Txt::GetNext(); while( m_state == eTag ) { switch(m_ch) { case '>': ChangeState(eText); return; case '=': CXml2Txt::GetNext(); return; case '\n': m_line_cnt++; break; case ' ': CXml2Txt::GetNext(); continue; } if( m_ch != '\r' && m_ch != '\n' ) { if( chars < MAX_TAG_ATTRIB_NAME ) { if( m_ch == m_sep ) { //printf("\nWarning: separator in attribute name. Skipped."); } else { sTagAttribName += m_ch; chars++; } } else { ChangeState(eError); _snprintf(m_szError, ERROR_BUF_SIZE, "Increase maximum tag attribute name buffer. [%u]", chars); m_szError[ERROR_BUF_SIZE - 1] = '\0'; return; } } CXml2Txt::GetNext(); } } void CXml2Txt::ParseTagAttribVal(string &sTagAttribVal) { char used_quotes; unsigned int chars = 0; // Skip spaces: while( m_ch == ' ' && m_state == eTag ) CXml2Txt::GetNext(); while( m_state == eTag ) { switch(m_ch) { case '>': ChangeState(eText); return; case '\n': m_line_cnt++; break; case ' ': CXml2Txt::GetNext(); continue; } if( m_ch == '\'' || m_ch == '"' ) { char wnd[3], i; wnd[0] = used_quotes = m_ch; // Fill the window: for(i = 1; i < 3; i++) wnd[i] = CXml2Txt::GetNext(); // Check for empty value case: if( wnd[0] == used_quotes && wnd[1] == used_quotes && wnd[2] != used_quotes) { return; } else { // Assume wnd[0] = first char of value: for(i = 1; i < 3; i++) wnd[i-1] = wnd[i]; wnd[2] = CXml2Txt::GetNext(); } // Read the value: while( m_state == eTag && chars < MAX_TAG_ATTRIB_VAL) { // Skip one quote: if( wnd[0] == used_quotes && wnd[1] == used_quotes) { // Move window: for(i = 1; i < 3; i++) wnd[i-1] = wnd[i]; wnd[2] = CXml2Txt::GetNext(); } // Store the char: if( wnd[0] == m_sep ) { //printf("\nWarning: separator in attribute value. Skipped."); } else if( wnd[0] == '\n' ) { m_line_cnt++; } else { if( wnd[0] != '\r' ) { sTagAttribVal += wnd[0]; chars++; } } // Check for ending quote: if( (wnd[1] == used_quotes && wnd[2] != used_quotes) || m_state != eTag ) break; // Move window: for(i = 1; i < 3; i++) wnd[i-1] = wnd[i]; wnd[2] = CXml2Txt::GetNext(); } if ( chars >= MAX_TAG_ATTRIB_VAL ) { ChangeState(eError); _snprintf(m_szError, ERROR_BUF_SIZE, "Increase maximum tag attribute value buffer. [%u]", chars); m_szError[ERROR_BUF_SIZE - 1] = '\0'; return; } return; } else { ChangeState(eError); _snprintf(m_szError, ERROR_BUF_SIZE, "Line %lu: value is not quoted.", m_line_cnt); m_szError[ERROR_BUF_SIZE - 1] = '\0'; return; } } } void CXml2Txt::ParseTagEnd() { // Skip spaces: while( m_ch == ' ' && m_state == eTag ) CXml2Txt::GetNext(); switch(m_ch) { case '>': ChangeState(eText); break; case '/': CXml2Txt::GetNext(); break; } } void CXml2Txt::AnalyzeTag(tag &tTag) { fprintf(m_fout, "%s", tTag.sName.c_str()); while( tTag.lAttribList.size() > 0 ) { fprintf(m_fout, "%c%s=%s", m_sep, tTag.lAttribList.front().sName.c_str(), tTag.lAttribList.front().sValue.c_str()); tTag.lAttribList.pop_front(); } fprintf(m_fout, "\r\n"); } //