Class: SAXXMLReader
- Inherits:
-
Object
- Object
- SAXXMLReader
- Defined in:
- backend/app/converters/lib/sax_xml_reader.rb
Defined Under Namespace
Classes: InnerReaderWithNodeClearing
Instance Method Summary collapse
-
#each(&block) ⇒ Object
-
#initialize(source_xml) ⇒ SAXXMLReader
constructor
A new instance of SAXXMLReader.
Constructor Details
#initialize(source_xml) ⇒ SAXXMLReader
Returns a new instance of SAXXMLReader.
5 6 7 |
# File 'backend/app/converters/lib/sax_xml_reader.rb', line 5 def initialize(source_xml) @source_xml = source_xml end |
Instance Method Details
#each(&block) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'backend/app/converters/lib/sax_xml_reader.rb', line 9 def each(&block) empty_node_indexes = Set.new # First pass: calculate our empty node indexes. A node is empty if it has no # children, or if all of its children are blank text nodes or comments. maybe_empty = [] inner_reader.each_with_index do |node, i| ignorable = ( (node.node_type == Nokogiri::XML::Reader::TYPE_COMMENT) || (node.node_type == Nokogiri::XML::Reader::TYPE_WHITESPACE) || (node.node_type == Nokogiri::XML::Reader::TYPE_SIGNIFICANT_WHITESPACE) || (node.node_type == Nokogiri::XML::Reader::TYPE_TEXT && node.value !~ /\S/) || (node.node_type == Nokogiri::XML::Reader::TYPE_CDATA && node.value !~ /\S/) ) # This element doesn't count towards making its containing element non-empty next if ignorable # Otherwise, any "maybe empty" elements with a depth less than this # (i.e. further up in the tree) are not empty. while maybe_empty.length > 0 && maybe_empty.last[:depth] < node.depth maybe_empty.pop end if maybe_empty.length > 0 && maybe_empty.last[:depth] <= node.depth # Either this is a closer for our pending element, or the original element was # self-closing. Either way, if it's still sitting in `maybe_empty`, it must # have been empty. empty_node_indexes << maybe_empty.pop[:index] end # We'll need to keep checking to work out if this one is empty. if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT maybe_empty << {index: i, depth: node.depth} end end # Second pass: iterate the same nodes and indicate which ones are empty to the # caller. inner_reader.each_with_index do |node, i| block.call(node, empty_node_indexes.include?(i)) end end |