C++ ときどき ごはん、わりとてぃーぶれいく☆

USAGI.NETWORKのなかのひとのブログ。主にC++。

小さな XML のパース速度で比べる rust の XML crate たち; roxmltree vs. minidom ≈ quick-xml vs. sxd-document vs. sxd-xpath vs. amxml

結果

test benches::amxml_root        ... bench:      40,783 ns/iter (+/- 6,365)
test benches::amxml_sec         ... bench:      28,487 ns/iter (+/- 1,295)
test benches::minidom_root      ... bench:       1,942 ns/iter (+/- 73)
test benches::minidom_sec       ... bench:       2,127 ns/iter (+/- 1,183)
test benches::roxmltree_root    ... bench:         999 ns/iter (+/- 36)
test benches::roxmltree_sec     ... bench:       1,171 ns/iter (+/- 205)
test benches::sxd_document_root ... bench:       3,453 ns/iter (+/- 123)
test benches::sxd_document_sec  ... bench:       3,566 ns/iter (+/- 136)
test benches::sxd_xpath_root    ... bench:      13,206 ns/iter (+/- 2,809)
test benches::sxd_xpath_sec     ... bench:      16,499 ns/iter (+/- 1,370)
  • 速度: roxmltree >> minidom >> sxd-document >>>> sxd-xpath >>>>>>>> amxml 👉 roxmltree はやい
    • note: 但し roxmltree には XPath を扱うオプションはありません
    • note: minidom ≈ quick-xml
    • note: amxml は遅いし放棄気味だけど XPath-3.1 対応でコードも書きやすいです
  • sxd-document vs. sxd-xpath 👉 同じ操作でも XPath を噛むと4倍処理時間が長くなってしまう
    • note: 代わりに XPath を使えるという事は特に実行時のパース定義もしやすくなるなど利点は得られます
    • note: sxd は若干DOM操作のコードに癖がある気がしますが DOM ごりごりにも XPath にも同じエンジンを共有して対応できる点は嬉しい事もありそうです

そーす

Cargo.toml/dependencies:

[dependencies]
roxmltree = "0.13.0"
sxd-xpath = "0.4.2"
sxd-document = "0.3.2"
amxml = "0.5.3"
quick-xml = "0.18.1"

benches/bench.rs:

#![feature(test)]
extern crate test;

include!("../tests/test.rs");

#[cfg(test)]
mod benches
{
 use test::Bencher;

 #[bench]
 fn minidom_root(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::minidom_root());
 }

 #[bench]
 fn minidom_sec(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::minidom_sec());
 }

 #[bench]
 fn roxmltree_root(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::roxmltree_root());
 }

 #[bench]
 fn roxmltree_sec(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::roxmltree_sec());
 }

 #[bench]
 fn amxml_root(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::amxml_root());
 }

 #[bench]
 fn amxml_sec(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::amxml_sec());
 }

 #[bench]
 fn sxd_document_root(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::sxd_document_root());
 }

 #[bench]
 fn sxd_document_sec(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::sxd_document_sec());
 }

 #[bench]
 fn sxd_xpath_root(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::sxd_xpath_root());
 }

 #[bench]
 fn sxd_xpath_sec(bencher: &mut Bencher)
 {
  bencher.iter(|| crate::tests::sxd_xpath_sec());
 }
}

tests/test.rs:

#[cfg(test)]
mod tests
{
 const INPUT: &str = "<abc><x/><y/><z/></abc>";

 #[test]
 pub fn minidom_root()
 {
  let root: minidom::Element = INPUT.parse().unwrap();
  let root_name = root.name();
  assert_eq!(root_name, "abc");
 }

 #[test]
 pub fn minidom_sec()
 {
  let root: minidom::Element = INPUT.parse().unwrap();
  let sec_names: Vec<&str> = root.children().into_iter().map(|element| element.name()).collect();
  assert_eq!(sec_names, ["x", "y", "z"])
 }

 #[test]
 pub fn roxmltree_root()
 {
  let document = roxmltree::Document::parse(INPUT).unwrap();
  let root_name = document.root_element().tag_name().name();
  assert_eq!(root_name, "abc");
 }

 #[test]
 pub fn roxmltree_sec()
 {
  let document = roxmltree::Document::parse(INPUT).unwrap();
  let sec_nodes = document.root_element().children();
  let sec_names: Vec<&str> = sec_nodes.into_iter().map(|node| node.tag_name().name()).collect();
  assert_eq!(sec_names, ["x", "y", "z"])
 }

 #[test]
 pub fn amxml_root()
 {
  let document = amxml::dom::new_document(INPUT).unwrap();
  // let xpath_result = document.eval_xpath("/*");
  // let root_name = xpath_result.unwrap().get_item(0).as_nodeptr().unwrap().name();
  let xpath_result = document.eval_xpath("/*/name()");
  let root_name = xpath_result.unwrap().get_item(0).to_string();
  assert_eq!(root_name, "\"abc\"");
 }

 #[test]
 pub fn amxml_sec()
 {
  let mut sec_names: Vec<String> = vec![];

  let document = amxml::dom::new_document(INPUT).unwrap();
  document
   .each_node("/*/*", |node| {
    sec_names.push(node.name().clone());
   })
   .unwrap();
  assert_eq!(sec_names, ["x", "y", "z"])
 }

 #[test]
 pub fn sxd_document_root()
 {
  let package = sxd_document::parser::parse(INPUT).unwrap();
  let document = package.as_document();
  let root_element = document.root().children().first().unwrap().element().unwrap();
  let root_name = root_element.name().local_part();
  assert_eq!(root_name, "abc");
 }

 #[test]
 pub fn sxd_document_sec()
 {
  let package = sxd_document::parser::parse(INPUT).unwrap();
  let document = package.as_document();
  let root_element = document.root().children().first().unwrap().element().unwrap();
  let sec_elements = root_element.children();
  let sec_names: Vec<&str> = sec_elements
   .iter()
   .map(|child_of_element| child_of_element.element().unwrap().name().local_part())
   .collect();
  assert_eq!(sec_names, ["x", "y", "z"])
 }

 #[test]
 pub fn sxd_xpath_root()
 {
  let package = sxd_document::parser::parse(INPUT).unwrap();
  let document = package.as_document();
  let xpath_result = sxd_xpath::evaluate_xpath(&document, "/*");
  let value = xpath_result.unwrap();
  let root_name = match value
  {
   sxd_xpath::Value::Nodeset(nodes) =>
   {
    let node = nodes.iter().next().unwrap();
    let qname = node.expanded_name().unwrap();
    qname.local_part()
   },
   _ => panic!("Failed: `Value` -> `Nodeset`")
  };
  assert_eq!(root_name, "abc");
 }

 #[test]
 pub fn sxd_xpath_sec()
 {
  let package = sxd_document::parser::parse(INPUT).unwrap();
  let document = package.as_document();
  let xpath_result = sxd_xpath::evaluate_xpath(&document, "/*/*");
  let value = xpath_result.unwrap();

  let sec_names: Vec<String> = match value
  {
   sxd_xpath::Value::Nodeset(nodes) =>
   {
    nodes
     .iter()
     .map(|node| node.expanded_name().unwrap().local_part().to_string())
     .collect()
   },
   _ => panic!("Failed: `Value` -> `Nodeset`")
  };

  let expected = ["x", "y", "z"];
  assert_eq!(sec_names.len(), expected.len());
  for element in expected.iter()
  {
   assert!(sec_names.contains(&element.to_string()));
  }
 }
}