The remove
function detaches an element from the tree and therefore removes the XML node (Element, PI or Comment), its content (the descendant items) and the tail
text. Here, preserving the tail
text is superfluous because it only contains whitespaces and a newline, which can be considered ignorable whitespaces.
To remove a element (and its content), preserving its tail
, you can use the following function:
def remove_node(child, keep_content=False):
"""
Remove an XML element, preserving its tail text.
:param child: XML element to remove
:param keep_content: ``True`` to keep child text and sub-elements.
"""
parent = child.getparent()
parent_text = parent.text or u""
prev_node = child.getprevious()
if keep_content:
# insert: child text
child_text = child.text or u""
if prev_node is None:
parent.text = u"{0}{1}".format(parent_text, child_text) or None
else:
prev_tail = prev_node.tail or u""
prev_node.tail = u"{0}{1}".format(prev_tail, child_text) or None
# insert: child elements
index = parent.index(child)
parent[index:index] = child[:]
# insert: child tail
parent_text = parent.text or u""
prev_node = child.getprevious()
child_tail = child.tail or u""
if prev_node is None:
parent.text = u"{0}{1}".format(parent_text, child_tail) or None
else:
prev_tail = prev_node.tail or u""
prev_node.tail = u"{0}{1}".format(prev_tail, child_tail) or None
# remove: child
parent.remove(child)
Here is a demo:
from lxml import etree
tree = etree.XML(u"<root>text <bad>before <bad>inner</bad> after</bad> tail</root>")
bad1 = tree.xpath("//bad[1]")[0]
remove_node(bad1)
etree.dump(tree)
# <root>text tail</root>
If you want to preserve the content, you can do:
tree = etree.XML(u"<root>text <bad>before <bad>inner</bad> after</bad> tail</root>")
bad1 = tree.xpath("//bad[1]")[0]
remove_node(bad1, keep_content=True)
etree.dump(tree)
# <root>text before <bad>inner</bad> after tail</root>