Class: SAXXMLReader

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
backend/app/converters/lib/sax_xml_reader.rb

Defined Under Namespace

Classes: InnerReaderWithNodeClearing

Instance Method Summary collapse

Constructor Details

#initialize(source_xml) ⇒ SAXXMLReader

Returns a new instance of SAXXMLReader.



5
6
7
# File 'backend/app/converters/lib/sax_xml_reader.rb', line 5

def initialize(source_xml)
  @source_xml = source_xml
end

Instance Method Details

#each(&block) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'backend/app/converters/lib/sax_xml_reader.rb', line 9

def each(&block)
  empty_node_indexes = Set.new

  # First pass: calculate our empty node indexes.  A node is empty if it has no
  # children, or if all of its children are blank text nodes or comments.
  maybe_empty = []

  inner_reader.each_with_index do |node, i|
    ignorable = (
      (node.node_type == Nokogiri::XML::Reader::TYPE_COMMENT) ||
      (node.node_type == Nokogiri::XML::Reader::TYPE_WHITESPACE) ||
      (node.node_type == Nokogiri::XML::Reader::TYPE_SIGNIFICANT_WHITESPACE) ||
      (node.node_type == Nokogiri::XML::Reader::TYPE_TEXT && node.value !~ /\S/) ||
      (node.node_type == Nokogiri::XML::Reader::TYPE_CDATA && node.value !~ /\S/)
    )

    # This element doesn't count towards making its containing element non-empty
    next if ignorable

    # Otherwise, any "maybe empty" elements with a depth less than this
    # (i.e. further up in the tree) are not empty.
    while maybe_empty.length > 0 && maybe_empty.last[:depth] < node.depth
      maybe_empty.pop
    end

    if maybe_empty.length > 0 && maybe_empty.last[:depth] <= node.depth
      # Either this is a closer for our pending element, or the original element was
      # self-closing.  Either way, if it's still sitting in `maybe_empty`, it must
      # have been empty.
      empty_node_indexes << maybe_empty.pop[:index]
    end

    # We'll need to keep checking to work out if this one is empty.
    if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
      maybe_empty << {index: i, depth: node.depth}
    end
  end

  # Second pass: iterate the same nodes and indicate which ones are empty to the
  # caller.
  inner_reader.each_with_index do |node, i|
    block.call(node, empty_node_indexes.include?(i))
  end
end