Skip to content

Commit

Permalink
refs #50 adds support for parsing internal dtd
Browse files Browse the repository at this point in the history
  • Loading branch information
onelson committed Mar 11, 2018
1 parent 46c2a56 commit 4dd7aea
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
55 changes: 53 additions & 2 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ enum SpecificError {
ExpectedAttributeValue,

ExpectedCData,

ExpectedCharacterData,

ExpectedComment,
Expand All @@ -58,6 +57,7 @@ enum SpecificError {
ExpectedWhitespace,

ExpectedDocumentTypeName,
ExpectedInternalDTD,
ExpectedSystemLiteral,

ExpectedClosingQuote(&'static str),
Expand Down Expand Up @@ -151,6 +151,7 @@ impl error::Error for SpecificError {
ExpectedYesNo => "expected yes or no",
ExpectedWhitespace => "expected whitespace",
ExpectedDocumentTypeName => "expected document type name",
ExpectedInternalDTD => "expected Internal DTD definition",
ExpectedSystemLiteral => "expected system literal",
ExpectedClosingQuote(_) => "expected closing quote",
ExpectedOpeningQuote(_) => "expected opening quote",
Expand Down Expand Up @@ -262,6 +263,7 @@ trait PrivateXmlParseExt<'a> {
fn consume_hex_chars(&self) -> XmlProgress<'a, &'a str>;
fn consume_char_data(&self) -> XmlProgress<'a, &'a str>;
fn consume_cdata(&self) -> XmlProgress<'a, &'a str>;
fn consume_internal_dtd(&self) -> XmlProgress<'a, &'a str>;
fn consume_comment(&self) -> XmlProgress<'a, &'a str>;
fn consume_pi_value(&self) -> XmlProgress<'a, &'a str>;
fn consume_start_tag(&self) -> XmlProgress<'a, &'a str>;
Expand Down Expand Up @@ -289,6 +291,10 @@ impl<'a> PrivateXmlParseExt<'a> for StringPoint<'a> {
self.consume_to(self.s.end_of_cdata()).map_err(|_| SpecificError::ExpectedCData)
}

fn consume_internal_dtd(&self) -> XmlProgress<'a, &'a str> {
self.consume_to(self.s.end_of_internal_dtd()).map_err(|_| SpecificError::ExpectedInternalDTD)
}

fn consume_comment(&self) -> XmlProgress<'a, &'a str> {
self.consume_to(self.s.end_of_comment()).map_err(|_| SpecificError::ExpectedCommentBody)
}
Expand Down Expand Up @@ -489,12 +495,34 @@ fn parse_external_id<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>)
success(external_id, xml)
}

fn parse_internal_dtd<'a>(_pm: &mut XmlMaster<'a>, xml: StringPoint<'a>)
-> XmlProgress<'a, &'a str>
{
let (xml, _) = try_parse!(xml.expect_space());
let (xml, _) = try_parse!(xml.expect_literal("["));
let (xml, _) = xml.consume_space().optional(xml);
let (xml, elements) = try_parse!(
xml.consume_internal_dtd().map_err(|_| SpecificError::ExpectedInternalDTD)
);
let (xml, _) = try_parse!(xml.expect_literal("]"));

success(elements, xml)
}

/* without the optional intSubset */
fn parse_document_type_declaration<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) -> XmlProgress<'a, Token<'a>> {
let (xml, _) = try_parse!(xml.expect_literal("<!DOCTYPE"));
let (xml, _) = try_parse!(xml.expect_space());
let (xml, _type_name) = try_parse!(xml.consume_name().map_err(|_| SpecificError::ExpectedDocumentTypeName));
let (xml, _external_id) = try_parse!(parse_external_id(pm, xml));


let (xml, _id) = try_parse!(
pm.alternate()
.one(|p| parse_external_id(p, xml))
.one(|p| parse_internal_dtd(p, xml))
.finish()
);

let (xml, _) = xml.consume_space().optional(xml);
let (xml, _) = try_parse!(xml.expect_literal(">"));

Expand Down Expand Up @@ -1322,6 +1350,29 @@ mod test {
assert_qname_eq!(top.name(), "hello");
}

#[test]
fn a_prolog_with_an_internal_document_type_declaration() {
let package = quick_parse(r#"<?xml version="1.0"?>
<!DOCTYPE note [
<!ELEMENT note (to,from,heading,body)>
<!ELEMENT to (#PCDATA)>
<!ELEMENT from (#PCDATA)>
<!ELEMENT heading (#PCDATA)>
<!ELEMENT body (#PCDATA)>
]>
<note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don't forget me this weekend</body>
</note>
"#);
let doc = package.as_document();
let top = top(&doc);

assert_qname_eq!(top.name(), "note");
}

#[test]
fn a_document_with_a_single_element() {
let package = quick_parse("<hello />");
Expand Down
9 changes: 9 additions & 0 deletions src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ pub trait XmlStr {
/// Find the end of the starting tag
fn end_of_start_tag(&self) -> Option<usize>;
fn end_of_encoding(&self) -> Option<usize>;
/// Find the end of the internal doc type declaration, not including the ]
fn end_of_internal_dtd(&self) -> Option<usize>;
}

impl<'a> XmlStr for &'a str {
Expand Down Expand Up @@ -143,6 +145,8 @@ impl<'a> XmlStr for &'a str {
fn end_of_encoding(&self) -> Option<usize> {
self.end_of_start_rest(|c| c.is_encoding_start_char(), |c| c.is_encoding_rest_char())
}

fn end_of_internal_dtd(&self) -> Option<usize> { self.find("]") }
}

/// Predicates used when parsing an characters in an XML document.
Expand Down Expand Up @@ -297,4 +301,9 @@ mod test {
fn end_of_char_data_includes_multiple_right_squares() {
assert_eq!("hello]]world".end_of_char_data(), Some("hello]]world".len()));
}

#[test]
fn end_of_internal_dtd_excludes_right_square() {
assert_eq!("hello]>world".end_of_internal_dtd(), Some("hello".len()))
}
}

0 comments on commit 4dd7aea

Please sign in to comment.