PK!lAxyyxml4h-develop/genindex.html Index — xml4h 0.2.0 documentation

Index

_ | A | B | C | D | E | F | G | H | I | K | L | N | P | R | S | T | U | V | W | X

_

__call__() (xml4h.nodes.NodeList method)
__getattr__() (xml4h.nodes.NodeAttrAndChildElementLookupsMixin method)
__getitem__() (xml4h.nodes.NodeAttrAndChildElementLookupsMixin method)
__init__() (xml4h.nodes.Node method)
__weakref__ (xml4h.nodes.AttributeDict attribute)
(xml4h.nodes.Node attribute)
(xml4h.nodes.NodeAttrAndChildElementLookupsMixin attribute)
(xml4h.nodes.NodeList attribute)
(xml4h.nodes.XPathMixin attribute)
_convert_nodelist() (xml4h.nodes.Node method)

A

a() (xml4h.builder.Builder method)
adapter (xml4h.nodes.Node attribute)
adapter_class (xml4h.nodes.Node attribute)
add_cdata() (xml4h.nodes.Element method)
add_comment() (xml4h.nodes.Element method)
add_element() (xml4h.nodes.Element method)
add_instruction() (xml4h.nodes.Element method)
add_text() (xml4h.nodes.Element method)
ancestors (xml4h.nodes.Node attribute)
attrib() (xml4h.nodes.Element method)
Attribute (class in xml4h.nodes)
attribute_node() (xml4h.nodes.Element method)
attribute_nodes (xml4h.nodes.Element attribute)
AttributeDict (class in xml4h.nodes)
attributes (xml4h.nodes.Element attribute)
attributes() (xml4h.builder.Builder method)
attrs() (xml4h.builder.Builder method)
(xml4h.nodes.Element method)

B

best_adapter (in module xml4h)
build() (in module xml4h)
Builder (class in xml4h.builder)
builder (xml4h.nodes.Element attribute)

C

c() (xml4h.builder.Builder method)
CDATA (class in xml4h.nodes)
cdata() (xml4h.builder.Builder method)
child() (xml4h.nodes.Node method)
children (xml4h.nodes.Node attribute)
clone() (xml4h.builder.Builder method)
clone_node() (xml4h.nodes.Node method)
Comment (class in xml4h.nodes)
comment() (xml4h.builder.Builder method)

D

d() (xml4h.builder.Builder method)
data (xml4h.nodes.ProcessingInstruction attribute)
data() (xml4h.builder.Builder method)
delete() (xml4h.nodes.Node method)
Document (class in xml4h.nodes)
document (xml4h.builder.Builder attribute)
(xml4h.nodes.Node attribute)
DocumentFragment (class in xml4h.nodes)
DocumentType (class in xml4h.nodes)
dom_element (xml4h.builder.Builder attribute)

E

e() (xml4h.builder.Builder method)
elem() (xml4h.builder.Builder method)
Element (class in xml4h.nodes)
element (xml4h.nodes.AttributeDict attribute)
element() (xml4h.builder.Builder method)
Entity (class in xml4h.nodes)
EntityReference (class in xml4h.nodes)

F

FeatureUnavailableException
filter() (xml4h.nodes.NodeList method)
find() (xml4h.builder.Builder method)
(xml4h.nodes.Node method)
find_doc() (xml4h.builder.Builder method)
(xml4h.nodes.Node method)
find_first() (xml4h.nodes.Node method)
find_node_elements() (xml4h.impls.interface.XmlImplAdapter method)
(xml4h.impls.lxml_etree.LXMLAdapter method)
first (xml4h.nodes.NodeList attribute)

G

get_node_text() (xml4h.impls.xml_dom_minidom.XmlDomImplAdapter method)
get_ns_info_from_node_name() (xml4h.impls.interface.XmlImplAdapter method)

H

has_feature() (xml4h.impls.interface.XmlImplAdapter class method)
(xml4h.nodes.Node method)

I

i() (xml4h.builder.Builder method)
ignore_whitespace_text_nodes() (xml4h.impls.interface.XmlImplAdapter class method)
impl_attributes (xml4h.nodes.AttributeDict attribute)
impl_document (xml4h.nodes.Node attribute)
impl_node (xml4h.nodes.Node attribute)
IncorrectArgumentTypeException
instruction() (xml4h.builder.Builder method)
is_attribute (xml4h.nodes.Node attribute)
is_available() (xml4h.impls.interface.XmlImplAdapter class method)
is_cdata (xml4h.nodes.Node attribute)
is_comment (xml4h.nodes.Node attribute)
is_document (xml4h.nodes.Node attribute)
is_document_fragment (xml4h.nodes.Node attribute)
is_document_type (xml4h.nodes.Node attribute)
is_element (xml4h.nodes.Node attribute)
is_entity (xml4h.nodes.Node attribute)
is_entity_reference (xml4h.nodes.Node attribute)
is_notation (xml4h.nodes.Node attribute)
is_processing_instruction (xml4h.nodes.Node attribute)
is_root (xml4h.nodes.Node attribute)
is_text (xml4h.nodes.Node attribute)
is_type() (xml4h.nodes.Node method)
items() (xml4h.nodes.AttributeDict method)

K

keys() (xml4h.nodes.AttributeDict method)

L

local_name (xml4h.nodes.NameValueNodeMixin attribute)
LXMLAdapter (class in xml4h.impls.lxml_etree)

N

name (xml4h.nodes.NameValueNodeMixin attribute)
namespace_uri (xml4h.nodes.Node attribute)
namespace_uri() (xml4h.nodes.AttributeDict method)
NameValueNodeMixin (class in xml4h.nodes)
Node (class in xml4h.nodes)
node_type (xml4h.nodes.Node attribute)
NodeAttrAndChildElementLookupsMixin (class in xml4h.nodes)
NodeList (class in xml4h.nodes)
Notation (class in xml4h.nodes)
ns_prefix() (xml4h.builder.Builder method)
ns_uri (xml4h.nodes.Node attribute)

P

parent (xml4h.nodes.Node attribute)
parse() (in module xml4h)
prefix (xml4h.nodes.NameValueNodeMixin attribute)
prefix() (xml4h.nodes.AttributeDict method)
processing_instruction() (xml4h.builder.Builder method)
ProcessingInstruction (class in xml4h.nodes)

R

root (xml4h.builder.Builder attribute)
(xml4h.nodes.Node attribute)

S

set_attributes() (xml4h.nodes.Element method)
set_node_text() (xml4h.impls.xml_dom_minidom.XmlDomImplAdapter method)
set_ns_prefix() (xml4h.nodes.Element method)
siblings (xml4h.nodes.Node attribute)
siblings_after (xml4h.nodes.Node attribute)
siblings_before (xml4h.nodes.Node attribute)

T

t() (xml4h.builder.Builder method)
target (xml4h.nodes.ProcessingInstruction attribute)
Text (class in xml4h.nodes)
text (xml4h.nodes.Element attribute)
text() (xml4h.builder.Builder method)
to_dict (xml4h.nodes.AttributeDict attribute)
transplant() (xml4h.builder.Builder method)
transplant_node() (xml4h.nodes.Node method)

U

up() (xml4h.builder.Builder method)

V

value (xml4h.nodes.NameValueNodeMixin attribute)
values() (xml4h.nodes.AttributeDict method)

W

write() (xml4h.builder.Builder method)
(xml4h.nodes.Node method)
write_doc() (xml4h.builder.Builder method)
(xml4h.nodes.Node method)
write_node() (in module xml4h.writer)

X

xml() (xml4h.nodes.Node method)
xml4h (module)
xml4h.builder (module)
xml4h.exceptions (module)
xml4h.impls.interface (module)
xml4h.impls.lxml_etree (module)
xml4h.impls.xml_dom_minidom (module)
xml4h.nodes (module)
xml4h.writer (module)
Xml4hException
xml_doc() (xml4h.nodes.Node method)
XmlDomImplAdapter (class in xml4h.impls.xml_dom_minidom)
XmlImplAdapter (class in xml4h.impls.interface)
XMLNS_URI (xml4h.nodes.Node attribute)
xpath() (xml4h.nodes.XPathMixin method)
xpath_on_node() (xml4h.impls.lxml_etree.LXMLAdapter method)
XPathMixin (class in xml4h.nodes)

Project Versions

PK!lAl>]))xml4h-develop/builder.html Builder — xml4h 0.2.0 documentation

Builder

xml4h includes a document builder tool that makes it easy to create valid, well-formed XML documents using relatively sparse python code. It makes it so easy to create XML that you will no longer be tempted to cobble together documents with error-prone methods like manual string concatenation or a templating library.

Internally, the builder uses the DOM-building features of an underlying XML library which means it is (almost) impossible to construct an invalid document.

Here is some example code to build a document about Monty Python films:

>>> import xml4h
>>> xmlb = (xml4h.build('MontyPythonFilms')
...     .attributes({'source': 'http://en.wikipedia.org/wiki/Monty_Python'})
...     .element('Film')
...         .attributes({'year': 1971})
...         .element('Title')
...             .text('And Now for Something Completely Different')
...             .up()
...         .elem('Description').t(
...             "A collection of sketches from the first and second TV"
...             " series of Monty Python's Flying Circus purposely"
...             " re-enacted and shot for film.")
...             .up()
...         .up()
...     .elem('Film')
...         .attrs(year=1974)
...         .e('Title')
...             .t('Monty Python and the Holy Grail')
...             .up()
...         .e('Description').t(
...             "King Arthur and his knights embark on a low-budget search"
...             " for the Holy Grail, encountering humorous obstacles along"
...             " the way. Some of these turned into standalone sketches."
...             ).up()
...     )

The code above produces the following XML document (abbreviated):

>>> xmlb.write_doc(indent=True)  
<?xml version="1.0" encoding="utf-8"?>
<MontyPythonFilms source="http://en.wikipedia.org/wiki/Monty_Python">
    <Film year="1971">
        <Title>And Now for Something Completely Different</Title>
        <Description>A collection of sketches from the first and second...
    </Film>
    <Film year="1974">
        <Title>Monty Python and the Holy Grail</Title>
        <Description>King Arthur and his knights embark on a low-budget...
    </Film>
</MontyPythonFilms>

Getting Started

You typically create a new XML document builder by calling the xml4h.build() function with the name of the root element:

>>> root_b = xml4h.build('RootElement')

The function returns a Builder object that represents the RootElement and allows you to manipulate this element’s attributes or to add child elements.

Once you have the first builder instance, every action you perform to add content to the XML document will return another instance of the Builder class:

>>> # Add attributes to the root element's Builder
>>> root_b = root_b.attributes({'a': 1, 'b': 2}, c=3)

>>> root_b  
<xml4h.builder.Builder object ...

The Builder class always represents an underlying element in the DOM. The dom_element attribute returns the element node:

>>> root_b.dom_element
<xml4h.nodes.Element: "RootElement">

>>> root_b.dom_element.attributes
<xml4h.nodes.AttributeDict: [('a', '1'), ('b', '2'), ('c', '3')]>

When you add a new child element, the result is a builder instance representing that child element, not the original element:

>>> child1_b = root_b.element('ChildElement1')
>>> child2_b = root_b.element('ChildElement2')

>>> # The element method returns a Builder wrapping the new child element
>>> child2_b.dom_element
<xml4h.nodes.Element: "ChildElement2">
>>> child2_b.dom_element.parent
<xml4h.nodes.Element: "RootElement">

This feature of the builder can be a little confusing, but it allows for the very convenient method-chaining feature that gives the builder its power.

Method Chaining

Because every builder method that adds content to the XML document returns a builder instance representing the nearest (or newest) element, you can chain together many method calls to construct your document without any need for intermediate variables.

For example, the example code in the previous section used the variables root_b, child1_b and child2_b to represent builder instances but this is not necessary. Here is how you can use method-chaining to build the same document with less code:

>>> b = (xml4h
...     .build('RootElement').attributes({'a': 1, 'b': 2}, c=3)
...         .element('ChildElement1').up()  # NOTE the up() method
...         .element('ChildElement2')
...     )

>>> b.write_doc(indent=4)
<?xml version="1.0" encoding="utf-8"?>
<RootElement a="1" b="2" c="3">
    <ChildElement1/>
    <ChildElement2/>
</RootElement>

Notice how you can use chained method calls to write code with a structure that mirrors that of the XML document you want to produce? This makes it much easier to spot errors in your code than it would be if you were to concatenate strings.

Note

It is a good idea to wrap the build() function call and all following chained methods in parentheses, so you don’t need to put backslash (\) characters at the end of every line.

The code above introduces a very important builder method: up(). This method returns a builder instance representing the current element’s parent, or indeed any ancestor.

Without the up() method, every time you created a child element with the builder you would end up deeper in the document structure with no way to return to prior elements to add sibling nodes or hierarchies.

To help reduce the number of up() method calls you need to include in your code, this method can also jump up multiple levels or to a named ancestor element:

>>> # A builder that references a deeply-nested element:
>>> deep_b = (xml4h.build('Root')
...     .element('Deep')
...         .element('AndDeeper')
...             .element('AndDeeperStill')
...                 .element('UntilWeGetThere')
...     )
>>> deep_b.dom_element
<xml4h.nodes.Element: "UntilWeGetThere">

>>> # Jump up 4 levels, back to the root element
>>> deep_b.up(4).dom_element
<xml4h.nodes.Element: "Root">

>>> # Jump up to a named ancestor element
>>> deep_b.up('Root').dom_element
<xml4h.nodes.Element: "Root">

Shorthand Methods

To make your XML-producing code even less verbose and quicker to type, the builder has shorthand “alias” methods corresponding to the full names.

For example, instead of calling element() to create a new child element, you can instead use the equivalent elem() or e() methods. Similarly, instead of typing attributes() you can use attrs() or a().

Here are the methods and method aliases for adding content to an XML document:

XML Node Created Builder method Aliases
Element element elem, e
Attribute attributes attrs, a
Text text t
CDATA cdata data, d
Comment comment c
Process Instruction processing_instruction inst, i

These shorthand method aliases are convenient and lead to even less cruft around the actual XML content you are interested in. But on the other hand they are much less explicit than the longer versions, so use them judiciously.

Access the DOM

The XML builder is merely a layer of convenience methods that sits on the xml4h.nodes DOM API. This means you can quickly access the underlying nodes from a builder if you need to inspect them or manipulate them in a way the builder doesn’t allow:

  • The dom_element attribute returns a builder’s underlying Element
  • The root attribute returns the document’s root element.
  • The document attribute returns a builder’s underlying Document.

See the DOM Nodes API documentation to find out how to work with DOM element nodes once you get them.

Building on an Existing DOM

When you are building an XML document from scratch you will generally use the build() function described in Getting Started. However, what if you want to add content to a parsed XML document DOM you have already?

To wrap an Element DOM node with a builder you simply provide the element node to the same builder() method used previously and it will do the right thing.

Here is an example of parsing an existing XML document, locating an element of interest, constructing a builder from that element, and adding some new content. Luckily, the code is simpler than that description...

>>> # Parse an XML document
>>> doc = xml4h.parse('tests/data/monty_python_films.xml')

>>> # Find an Element node of interest
>>> lob_film_elem = doc.MontyPythonFilms.Film[2]
>>> lob_film_elem.Title.text
"Monty Python's Life of Brian"

>>> # Construct a builder from the element
>>> lob_builder = xml4h.build(lob_film_elem)

>>> # Add content
>>> b = (lob_builder.attrs(stars=5)
...     .elem('Review').t('One of my favourite films!').up())

>>> # See the results
>>> lob_builder.write(indent=True)  
<Film stars="5" year="1979">
    <Title>Monty Python's Life of Brian</Title>
    <Description>Brian is born on the first Christmas, in the stable...
    <Review>One of my favourite films!</Review>
</Film>

Hydra-Builder

Because each builder class instance is independent, an advanced technique for constructing complex documents is to use multiple builders anchored at different places in the DOM. In some situations, the ability to add content to different places in the same document can be very handy.

Here is a trivial example of this technique:

>>> # Create two Elements in a doc to store even or odd numbers
>>> odd_b = xml4h.build('EvenAndOdd').elem('Odd')
>>> even_b = odd_b.up().elem('Even')

>>> # Populate the numbers from a loop
>>> for i in range(1, 11):  
...     if i % 2 == 0:
...         even_b.elem('Number').text(i)
...     else:
...         odd_b.elem('Number').text(i)
<...

>>> # Check the final document
>>> odd_b.write_doc(indent=True)
<?xml version="1.0" encoding="utf-8"?>
<EvenAndOdd>
    <Odd>
        <Number>1</Number>
        <Number>3</Number>
        <Number>5</Number>
        <Number>7</Number>
        <Number>9</Number>
    </Odd>
    <Even>
        <Number>2</Number>
        <Number>4</Number>
        <Number>6</Number>
        <Number>8</Number>
        <Number>10</Number>
    </Even>
</EvenAndOdd>

Project Versions

Table Of Contents

Previous topic

Parser

Next topic

Writer

This Page

PK!lA66xml4h-develop/objects.inv# Sphinx inventory version 2 # Project: xml4h # Version: 0.2.0 # The remainder of this file is compressed using zlib. xڭZMo8 Wݹ4L4bۛr"T Ine9vLsJbERTLrY}blU IWf;[?Tqfh*:MEtZ1SE(wSostdݎMnq2EE<"`iCXԿ3Qf$nG8y" èwc0nقּ]Ҍǻq &Y>JoRZںĊ7*sVDT$nbÌEo7QӜ[F8wr0~>J%i&-5Fg5nP#"ʴ?>%"ja2"kσ“Kdt)@)Ym(6 48:eg ʬf w֝ĬPyS1i8VMcY2:#dާج"j`B(H. k/w5Kf%^]bh{ 23%rn!.^VgaNv̊gF8l2Porz*x ,nw.;<(f\"nLag\ 0ZX͝hW8l -|"\%ME,5 L%?j6cpinr@''S04 N"ل5ǟ׏Ռc\(ɰBp џkXk_̺Sxs}gԓ_Q(`񥟥y} ~LW޾vGay~G )9k\IW*r og.lЄ ='юL:֮̕Zmsb83.wjoK<1"P1nCVxVy"Nr lv#ė!gmCsհC1YY JmqC!̓/lXŮx,:Nn?fW>`@MȢ՝\UL<2V]{xD2wwV  208hzp )VGt洧"E@n8?o} A>;+/Xҡi)D,O m1p6 g%()B .R x A v]Մ*v:65 .߇Tw .9<Аiav '9yD5gT]5.~7T)|r4cg^9(@rX_e<9X,?Xp&:THr = pVg cFVڕrM.pˆ@ Python Module Index — xml4h 0.2.0 documentation

Project Versions

PK!lAC5ssxml4h-develop/index.html xml4h: XML for Humans in Python — xml4h 0.2.0 documentation

xml4h: XML for Humans in Python

xml4h is an ISC licensed library for Python to make working with XML a human-friendly activity.

This library exists because Python is awesome, XML is everywhere, and combining the two should be a pleasure. With xml4h, it can be.

Features

xml4h is a simplification layer over existing Python XML processing libraries such as lxml and the minidom. It provides:

  • a rich pythonic API to traverse and manipulate the XML DOM.
  • a document builder to simply and safely construct complex documents with minimal code.
  • a writer that serialises XML documents with the structure and format that you expect, unlike the machine- but not human-friendly output you tend to get from other libraries.

The xml4h abstraction layer also offers some other benefits, beyond a nice API and tool set:

  • A common interface to different underlying XML libraries, so code written against xml4h need not be rewritten if you switch implementations.
  • You can easily move between xml4h and the underlying implementation: parse your document using the fastest implementation, manipulate the DOM with human-friendly code using xml4h, then get back to the underlying implementation if you need to.

Installation

Install xml4h with pip:

$ pip install xml4h

Introduction

Here is an example of parsing and reading data from an XML document using “magic” element and attribute lookups:

>>> import xml4h
>>> doc = xml4h.parse('tests/data/monty_python_films.xml')

>>> for film in doc.MontyPythonFilms.Film[:3]:
...     print film['year'], ':', film.Title.text
1971 : And Now for Something Completely Different
1974 : Monty Python and the Holy Grail
1979 : Monty Python's Life of Brian

You can also use a more traditional approach to traverse the DOM:

>>> for film in doc.child('MontyPythonFilms').children('Film')[:3]:
...     print film.attributes['year'], ':', film.children.first.text
1971 : And Now for Something Completely Different
1974 : Monty Python and the Holy Grail
1979 : Monty Python's Life of Brian

The xml4h builder makes programmatic document creation simple, with a method-chaining feature that allows for expressive but sparse code that mirrors the document itself:

>>> b = (xml4h.build('MontyPythonFilms')
...     .attributes({'source': 'http://en.wikipedia.org/wiki/Monty_Python'})
...     .element('Film')
...         .attributes({'year': 1971})
...         .element('Title')
...             .text('And Now for Something Completely Different')
...             .up()
...         .elem('Description').t(
...             "A collection of sketches from the first and second TV"
...             " series of Monty Python's Flying Circus purposely"
...             " re-enacted and shot for film.").up()
...         .up()
...     )

>>> # A builder object can be re-used
>>> b = (b.e('Film')
...     .attrs(year=1974)
...     .e('Title').t('Monty Python and the Holy Grail').up()
...     .e('Description').t(
...         "King Arthur and his knights embark on a low-budget search"
...         " for the Holy Grail, encountering humorous obstacles along"
...         " the way. Some of these turned into standalone sketches."
...         ).up()
...     .up()
... )

Pretty-print your XML document with the flexible write() and xml() methods:

>>> b.write_doc(indent=4, newline=True) 
<?xml version="1.0" encoding="utf-8"?>
<MontyPythonFilms source="http://en.wikipedia.org/wiki/Monty_Python">
    <Film year="1971">
        <Title>And Now for Something Completely Different</Title>
        <Description>A collection of sketches from ...</Description>
    </Film>
    <Film year="1974">
        <Title>Monty Python and the Holy Grail</Title>
        <Description>King Arthur and his knights embark ...</Description>
    </Film>
</MontyPythonFilms>

Why?

Python has three popular libraries for working with XML, none of which are particularly easy to use:

  • xml.dom.minidom is a light-weight, moderately-featured implementation of the W3C DOM that is included in the standard library. Unfortunately the W3C DOM API is terrible – the very opposite of pythonic – and the minidom does not support XPath expressions.
  • xml.etree.ElementTree is a fast hierarchical data container that is included in the standard library and can be used to represent XML, mostly. The API is fairly pythonic and supports XPath, but it lacks some DOM traversal niceties you might expect (e.g. to get an element’s parent) and when using it you often feel like your working with something subtly different from XML, because you are.
  • lxml is a fast, full-featured XML library with an API based on ElementTree but extended. It is your best choice for doing serious work with XML in Python but it is not included in the standard library, it can be difficult to install, and it gives you the same it’s-XML-but-not-quite feeling as its ElementTree forebear.

Given these three options it can be difficult to choose which library to use, especially if you’re new to XML processing in Python and haven’t already used (struggled with) any of them.

In the past your best bet would have been to go with lxml for the most flexibility, even though it might be overkill, because at least then you wouldn’t have to rewrite your code if you later find you need XPath support or powerful DOM traversal methods.

This is where xml4h comes in. It provides an abstraction layer over the existing XML libraries, taking advantage of their power while offering an improved API and tool set.

This project is heavily inspired by the work of Kenneth Reitz such as the excellent Requests HTTP library.

Development Status: αlphα

Currently xml4h includes two adapter implementations that support key XML processing tasks, using either the minidom or lxml‘s ElementTree libraries.

The project is still at the alpha stage, where I am playing with ideas and tweaking the APIs to try and get them right before I build out the feature set.

This project is likely to be in flux for a while yet, so be aware that individual APIs and even broad approaches may change.

Indices and tables

Project Versions

Table Of Contents

Next topic

Parser

This Page

PK!lAL fFfFxml4h-develop/searchindex.jsSearch.setIndex({objects:{"":{xml4h:[4,0,1,""]},"xml4h.impls.interface":{XmlImplAdapter:[4,3,1,""]},"xml4h.impls.xml_dom_minidom.XmlDomImplAdapter":{set_node_text:[4,2,1,""],get_node_text:[4,2,1,""]},"xml4h.nodes.NodeAttrAndChildElementLookupsMixin":{"__weakref__":[4,1,1,""],"__getattr__":[4,2,1,""],"__getitem__":[4,2,1,""]},"xml4h.builder.Builder":{comment:[4,2,1,""],text:[4,2,1,""],elem:[4,2,1,""],attrs:[4,2,1,""],find:[4,2,1,""],transplant:[4,2,1,""],ns_prefix:[4,2,1,""],write:[4,2,1,""],document:[4,1,1,""],dom_element:[4,1,1,""],clone:[4,2,1,""],cdata:[4,2,1,""],data:[4,2,1,""],find_doc:[4,2,1,""],a:[4,2,1,""],c:[4,2,1,""],write_doc:[4,2,1,""],e:[4,2,1,""],d:[4,2,1,""],i:[4,2,1,""],instruction:[4,2,1,""],up:[4,2,1,""],element:[4,2,1,""],processing_instruction:[4,2,1,""],t:[4,2,1,""],attributes:[4,2,1,""],root:[4,1,1,""]},"xml4h.nodes.AttributeDict":{keys:[4,2,1,""],items:[4,2,1,""],element:[4,1,1,""],prefix:[4,2,1,""],namespace_uri:[4,2,1,""],values:[4,2,1,""],impl_attributes:[4,1,1,""],"__weakref__":[4,1,1,""],to_dict:[4,1,1,""]},"xml4h.nodes.NodeList":{filter:[4,2,1,""],"__call__":[4,2,1,""],"__weakref__":[4,1,1,""],first:[4,1,1,""]},"xml4h.impls":{"interface":[4,0,1,""],lxml_etree:[4,0,1,""],xml_dom_minidom:[4,0,1,""]},"xml4h.impls.lxml_etree.LXMLAdapter":{find_node_elements:[4,2,1,""],xpath_on_node:[4,2,1,""]},"xml4h.nodes.XPathMixin":{xpath:[4,2,1,""],"__weakref__":[4,1,1,""]},"xml4h.impls.xml_dom_minidom":{XmlDomImplAdapter:[4,3,1,""]},"xml4h.nodes.NameValueNodeMixin":{prefix:[4,1,1,""],name:[4,1,1,""],value:[4,1,1,""],local_name:[4,1,1,""]},"xml4h.nodes":{Node:[4,3,1,""],Comment:[4,3,1,""],DocumentFragment:[4,3,1,""],NodeList:[4,3,1,""],XPathMixin:[4,3,1,""],Notation:[4,3,1,""],NodeAttrAndChildElementLookupsMixin:[4,3,1,""],Text:[4,3,1,""],Element:[4,3,1,""],NameValueNodeMixin:[4,3,1,""],Entity:[4,3,1,""],ProcessingInstruction:[4,3,1,""],EntityReference:[4,3,1,""],CDATA:[4,3,1,""],DocumentType:[4,3,1,""],Attribute:[4,3,1,""],Document:[4,3,1,""],AttributeDict:[4,3,1,""]},"xml4h.exceptions":{FeatureUnavailableException:[4,4,1,""],Xml4hException:[4,4,1,""],IncorrectArgumentTypeException:[4,4,1,""]},"xml4h.nodes.ProcessingInstruction":{data:[4,1,1,""],target:[4,1,1,""]},"xml4h.writer":{write_node:[4,5,1,""]},"xml4h.impls.lxml_etree":{LXMLAdapter:[4,3,1,""]},"xml4h.nodes.Node":{is_entity_reference:[4,1,1,""],siblings_before:[4,1,1,""],is_entity:[4,1,1,""],is_attribute:[4,1,1,""],child:[4,2,1,""],XMLNS_URI:[4,1,1,""],node_type:[4,1,1,""],is_document_fragment:[4,1,1,""],"__weakref__":[4,1,1,""],children:[4,1,1,""],"__init__":[4,2,1,""],xml:[4,2,1,""],write:[4,2,1,""],is_processing_instruction:[4,1,1,""],ancestors:[4,1,1,""],find:[4,2,1,""],is_comment:[4,1,1,""],namespace_uri:[4,1,1,""],has_feature:[4,2,1,""],ns_uri:[4,1,1,""],document:[4,1,1,""],is_notation:[4,1,1,""],parent:[4,1,1,""],is_element:[4,1,1,""],find_first:[4,2,1,""],is_text:[4,1,1,""],is_root:[4,1,1,""],xml_doc:[4,2,1,""],siblings:[4,1,1,""],is_type:[4,2,1,""],find_doc:[4,2,1,""],write_doc:[4,2,1,""],siblings_after:[4,1,1,""],impl_node:[4,1,1,""],root:[4,1,1,""],adapter:[4,1,1,""],transplant_node:[4,2,1,""],is_cdata:[4,1,1,""],is_document_type:[4,1,1,""],is_document:[4,1,1,""],"_convert_nodelist":[4,2,1,""],"delete":[4,2,1,""],clone_node:[4,2,1,""],adapter_class:[4,1,1,""],impl_document:[4,1,1,""]},"xml4h.nodes.Element":{add_comment:[4,2,1,""],text:[4,1,1,""],set_attributes:[4,2,1,""],add_text:[4,2,1,""],add_cdata:[4,2,1,""],set_ns_prefix:[4,2,1,""],attrs:[4,2,1,""],attribute_node:[4,2,1,""],attributes:[4,1,1,""],attrib:[4,2,1,""],add_instruction:[4,2,1,""],attribute_nodes:[4,1,1,""],builder:[4,1,1,""],add_element:[4,2,1,""]},"xml4h.builder":{Builder:[4,3,1,""]},xml4h:{builder:[4,0,1,""],writer:[4,0,1,""],parse:[4,5,1,""],best_adapter:[4,1,1,""],build:[4,5,1,""],exceptions:[4,0,1,""],nodes:[4,0,1,""]},"xml4h.impls.interface.XmlImplAdapter":{get_ns_info_from_node_name:[4,2,1,""],has_feature:[4,6,1,""],is_available:[4,6,1,""],find_node_elements:[4,2,1,""],ignore_whitespace_text_nodes:[4,6,1,""]}},terms:{represent:4,all:[2,4,5,6],code:[0,2,3,4,5,6],liar:5,forget:6,chain:[0,2],queri:[0,5,4,6],lack:0,xmldomimpladapt:[4,6],four:[4,3],prefix:[0,5,4,6],has_featur:[4,6],subclass:4,abil:2,follow:[2,4],faster:6,children:[4,0,5,1,6],wrongnam:5,hierarch:0,whose:4,decid:6,middl:5,depend:[5,4],wish:6,xml:[0,1,2,3,4,5,6],proxim:4,intermedi:2,friendli:[0,4],send:3,present:1,new_film_elem:5,scratch:2,leav:[5,4],introduc:[2,4],star:2,sourc:[4,0,2,3,1],everi:2,string:[0,1,2,3,4,5],documenttyp:4,far:[5,6],devolv:6,util:4,set_node_text:4,attr2:6,monti:[0,2,3,5],desc_elem:5,attr1:6,veri:[0,2,6],affect:[4,6],tagnam:6,monty_python_film:[0,1,2,3,5,6],relev:6,tri:6,is_document_frag:4,magic:[0,5,4],doesn:[2,4,6],level:[2,4,5,6],did:5,knight:[0,2,3],list:[4,0,5,1,6],fewer:6,readabl:1,"try":[0,5,6],item:[5,4,6],unsupport:4,first_film_elem:3,plain:[5,6],readthedoc:0,quicker:[2,5],slower:6,request:0,qname:4,jump:[2,6],second:[0,2,3,5,6],featureunavailableexcept:[4,6],lawyer:5,pass:[4,5,1],minimis:5,odd:2,append:4,attr_impl_nod:4,even:[0,2],index:0,what:[2,4,5,6],section:[4,2,3,5,6],find:[0,1,2,3,4,5,6],current:[0,2,3,4,5,6],delet:[0,5,4],abbrevi:2,version:[0,2,3,5,6],child1_b:2,"new":[0,2,4,5,6],method:[0,1,2,3,4,5,6],anddeep:2,fals:[4,5,1,6],impact:6,full:[0,2,3,5,6],oopswrongnam:5,absolut:6,gener:[4,2,3],never:[5,4],onli:[5,4,6],len:[5,1,6],behaviour:[5,6],accur:[5,1],let:[5,6],excess:[4,1],path:[4,1],along:[0,2],becom:4,sinc:[4,5,1,6],valu:[4,0,5,1,6],search:[0,2,4,5],produc:[2,3],transplant_nod:[5,4],add_el:[5,4,6],prior:2,base:[0,4,6],doctest:[0,2,3,5,6],action:2,opinion:6,implement:[0,1,3,4,5,6],chanc:5,cobbl:2,via:[5,4],shorthand:[0,2,4,6],although:6,extra:[1,6],appli:[4,5,3],modul:0,xmlns_uri:4,childelement1:2,ask:6,attitud:5,api:[0,2,4,5,6],siblings_befor:[5,4],immut:6,evenandodd:2,add_com:4,text_nod:[5,4],unnecessari:6,xmlimpladapt:[4,6],node_typ:4,from:[0,1,2,3,4,5,6],describ:[2,4,6],would:[0,2,3,5,6],prove:6,two:[0,2,4,5,6],cruft:2,implic:6,hydra:[0,2],live:5,call:[4,2,3,5,6],scope:5,simpler:[2,5],type:[4,2,3,5],tell:[5,6],minor:5,more:[4,0,5,1,6],sort:4,write_nod:4,yuck:5,jmurti:0,notic:[2,5],impl_attribut:4,flag:1,parse_fil:4,indic:[0,4],holi:[0,2,3,5],particular:6,known:[5,4],tagname_or_el:4,effort:6,must:[5,4,6],fly:[0,2,5],best_adapt:[4,6],none:[0,5,4,6],word:4,str_writer:3,attr_nam:4,alia:[2,4,5,6],expected_typ:4,work:[0,1,2,3,4,5,6],omit_declar:[4,3],uniqu:6,ignore_whitespace_text_nod:[4,1],remain:6,itself:[0,6],del:5,can:[0,1,2,3,4,5,6],xml4h_elem:5,purpos:[0,2,5],root:[1,2,3,4,5,6],piec:6,control:[3,6],nearest:[2,4],favourit:2,trump:4,stream:4,give:[0,2],process:[4,0,2,1,6],share:6,is_document_typ:4,accept:[4,5,3],tag:[4,6],to_nam:4,want:[2,3,1],serial:[4,3,1],keep:1,occur:4,everywher:0,alwai:[2,4,6],lxml:[4,0,5,3,6],end:[2,4,5],newlin:[4,0,3],quot:[4,3],rather:[5,4],anoth:[2,4,5],get:[0,2,3,4,5,6],write:[4,0,2,3,6],brian:[0,2,5],namespace_uri:[5,4,6],ns_uri:[5,4,6],"__init__":4,pure:4,instead:[2,4,5,6],ancestor:[2,4,5,6],simpl:[0,5,1,6],updat:4,map:[4,6],transplant:[5,4],resourc:6,reduc:[2,6],overridden:4,mess:4,clone:[5,4],earlier:6,spot:2,usabl:6,reflect:4,befor:[4,0,5,3],wrong:5,inst:2,attent:5,backslash:2,multipl:[2,6],underscor:[5,4],documentfrag:4,associ:[4,6],attrib:[5,4],circumst:5,write_doc:[4,0,2,3],attempt:4,third:6,classmethod:4,light:0,wrap_nod:5,minim:[0,6],explicit:[2,6],embark:[0,2,3],element:[0,1,2,3,4,5,6],issu:0,alias:[2,5,6],"switch":[0,6],maintain:6,environ:[4,6],allow:[0,2,4,5,6],callabl:5,volum:5,first:[0,2,3,4,5,6],order:[5,4],root_b:2,help:2,demand:6,over:[0,5,6],move:0,namevaluenodemixin:4,addit:[5,4],find_node_el:4,through:[4,6],seamless:6,appar:6,hierarchi:[2,4],flexibl:0,"__weakref__":4,paramet:4,style:[4,6],xml_file:1,fit:[5,6],how:[4,2,3,5,6],is_attribut:[5,4],"__class__":3,better:6,drawback:6,curli:4,therefor:6,might:[0,5,6],easier:[2,5,6],loop:2,wouldn:[0,6],non:[4,6],good:[2,6],"return":[4,2,3,5,6],fourth:6,thei:[2,1,6],lob_build:2,python:[0,2,3,4,5,6],impl_el:4,spell:6,handi:2,number:[2,4,5,6],node_type_const:4,overkil:0,mydoc:6,xml_text:1,discuss:5,oper:[5,4,6],introduct:0,outweigh:6,choic:[0,6],somewher:5,name:[0,2,4,5,6],deep_b:2,focuss:[4,6],separ:4,easili:[0,5,3],compris:[5,6],each:[2,4,6],fiddli:6,fulli:6,unicod:4,needn:6,attributeerror:4,side:6,mean:[2,4,5,6],everyth:6,weight:0,processinginstruct:[5,4],lxml_doc:5,replac:[5,4],individu:0,strip:[4,0,1],idea:[0,2],realli:6,oppos:4,element_nod:4,year:[0,2,3,5],our:5,happen:[5,6],beyond:[0,5],special:[5,6],out:[4,0,2,3,6],variabl:2,impl_nod:[4,5,3,6],get_node_text:4,space:[4,3],open:[3,1],newli:4,access:[4,0,2,3,5],identifi:[4,6],your:[0,2,5,6],content:[4,2,3,1,5],rewrit:[0,6],adapt:[0,5,4,6],rel:[2,4,5,6],print:[4,0,5,3,6],xpath_on_nod:4,even_b:2,to_dict:4,qualifi:6,after:[4,5,1],advanc:[0,2,6],elem2:6,manipul:[0,2,4,5,6],situat:[2,6],given:[0,5,4,6],undo:5,standard:[0,4,6],standalon:[0,2],xml_doc:[4,3],believ:6,dictionari:[5,4,6],tempt:2,put:[2,5],org:[0,1,2,3,4,6],untilwegetther:2,care:6,elementmak:5,indent:[4,0,2,3],puzzl:6,could:5,omit:4,count:4,struggl:0,filter:[0,5,4,6],thing:[2,5],perhap:6,place:[2,4,6],isn:6,fledg:6,isc:0,xml4h:[0,1,2,3,4,5,6],confus:[2,6],assign:[4,6],lambda:5,origin:[2,5],to_pars:4,major:[5,6],directli:[4,5,3,6],feel:[0,5],onc:2,hoop:6,independ:2,parser:[0,1],scene:[5,4],montypythonfilm:[0,1,2,3,5,6],mai:[0,5,4,6],instruct:[2,4],alreadi:[0,2,4,6],done:6,wrapper:[4,1],owner:5,stabl:2,miss:3,avail:[4,5,3,6],differ:[0,2,3,4,5,6],rewritten:0,start:[0,2,3,4,5,6],data:[0,1,2,3,4,5,6],top:[3,6],sometim:6,least:[4,0,5,3],circu:[0,2,5],necessarili:5,iter:5,too:[5,6],element_nam:4,similarli:[2,6],hollywood:5,conveni:[4,2,3,5],offer:0,"final":[2,5],store:[2,6],prone:2,luckili:2,xmln:[4,6],option:[4,0,5,3,6],especi:0,namespac:[0,5,4,6],tool:[0,2,6],copi:[5,4],pretty_print:3,is_text:[5,4],specifi:[4,6],xmlb:2,arbitrari:4,is_entity_refer:4,anddeeperstil:2,pars:[0,1,2,3,4,5,6],cdata:[2,4,5,6],exactli:[5,4,6],than:[2,4,5,6],mechan:6,past:0,liter:[4,1],target:[4,3],keyword:[5,4,6],instanc:[2,4],provid:[0,1,2,3,4,5,6],remov:[4,5,1],richer:4,structur:[0,2,5],charact:[4,2,3,1,5],project:[0,6],matter:[5,3],find_doc:[5,4],were:[2,5],consumpt:3,bet:0,seri:[0,2,5],pre:6,local_nam:[5,4,6],siblings_aft:[5,4],terribl:0,respons:6,mind:3,ani:[0,2,3,4,5,6],correspond:2,packag:[0,6],complet:[0,2,3,5],have:[0,2,3,5,6],tabl:0,need:[0,2,1,4,5,6],turn:[0,2],saw:5,odd_b:2,techniqu:[2,5],equival:2,inform:[5,4],destroi:[5,4],self:4,lob_film_elem:2,note:[1,2,3,4,5,6],adaptor:4,preced:4,read:[0,5,3,1],build:[0,2,4,5,6],which:[0,2,3,4,5,6],tupl:4,is_typ:4,combin:0,brace:[4,6],noth:[4,6],singl:[4,5,3],uppercas:[5,4],begin:4,unless:[4,6],normal:[5,3,6],add_text:[5,4],object:[0,1,2,3,4,5,6],reach:5,deleg:4,precent:4,most:[0,5],plai:0,pair:[5,4],alpha:0,judici:2,"class":[2,4,5,6],tradit:0,simplic:6,don:[2,3,1],dom:[0,2,4,5,6],doc:[0,1,2,3,4,5,6],urn:6,later:[0,6],cover:5,uri:[0,5,4,6],doe:[4,0,5,1,6],inde:2,declar:[4,3],clean:4,rootel:2,"_convert_nodelist":4,unchang:5,lxml_b:6,someth:[0,2,3,5],awesom:0,shot:[0,2,5],mostli:[0,4],correcli:6,opposit:0,sent:4,serialis:[0,3],syntax:[5,6],particularli:0,subtli:[0,4],radic:6,newest:2,trivial:2,anywai:6,setter:5,incorrectargumenttypeexcept:4,despit:4,giant:6,coerc:4,locat:2,nois:4,pretti:[4,0,3],"true":[0,2,3,4,5,6],writer:[4,0,3,6],activ:0,written:[0,5,6],should:[0,4,6],dict:[5,4],obstacl:[0,2],rich:0,"__call__":4,local:[0,5,4,6],entityrefer:4,coment:4,serv:4,nodelist:[5,4],king:[0,2,3],familiar:5,express:0,pypi:0,kind:5,becaus:[4,0,2,3,6],regularli:5,cannot:[5,6],fastest:0,deepli:2,utf:[4,0,2,3],requir:[4,6],yetanotherwaytonamespac:6,templat:2,possibl:[4,6],"default":[4,5,3,1,6],bad:6,common:[0,5,4],mediat:[4,6],contain:[4,0,5,1],grab:5,where:[0,5,4,6],black:5,wiki:[0,2,3,1],set:[4,0,5,3,6],aspect:6,quirk:[0,6],is_com:4,spars:[0,2,4],see:[2,3,5,6],filter_fn:[5,4],result:[1,2,3,4,5,6],arg:4,xml_dom_minidom:4,retriev:[5,4],dom_el:[2,4,6],awar:0,statu:0,still:0,parent:[0,2,4,5],pattern:4,review:2,easi:[0,2,4,5,6],wikipedia:[0,2,3,1],tend:0,behind:[5,4],xml4hexcept:4,between:[0,1],is_avail:4,"import":[0,1,2,3,5,6],entiti:4,approach:[0,5,6],wrapped_nod:4,attribut:[0,2,4,5,6],altern:6,before_this_el:[5,4],grandpar:4,appreci:6,kei:[0,5,4],pointer:3,sole:4,weak:4,orig_titl:5,philosph:6,xml4h_doc:5,lowercas:[5,4],cut:5,here:[0,2,3,5,6],extent:6,distinguish:1,come:[0,6],popul:2,minidom:[0,4,6],both:5,delimit:4,howev:[2,1,6],against:0,etc:[5,4,6],entir:[4,5,3,6],impl:4,context:4,collect:[4,0,2,3,5],improv:[0,6],deletedfilm:5,com:0,comment:[2,4,5],is_ent:[5,4],simpli:[0,2,6],processing_instruct:[2,4],technic:6,point:6,etre:[0,3],inspir:0,form:[4,2,1],header:4,exclud:4,monty_python:[0,2,3,1],respect:[5,3],guid:0,assum:[4,3],damag:5,quit:0,ultim:6,pleasur:0,compos:4,empti:[4,6],accessor:[5,4],compon:[0,5,4,6],much:[2,3,5],treat:[5,4],untouch:4,modif:4,is_el:[5,4],lxml_etre:4,valueerror:4,quickli:[2,5],life:[0,2,5],adapter_class:4,identifii:6,deeper:2,quote_char:4,convert:[5,4],argument:[4,5,3,6],bulk:6,made:6,understand:6,togeth:2,child:[0,2,4,5],rang:[2,3],those:4,"case":[4,5,3,6],main:[0,5,4],look:[4,5,1],raw:4,tostr:3,properti:5,budget:[0,2,3],aim:[5,6],defin:[5,4,6],"while":[0,5,6],match:[5,4],abov:[2,5,6],error:2,invoc:4,anchor:2,child_nam:4,childelement2:2,seven:5,ordereddict:4,advantag:[0,5,6],stdout:[4,3],best:[0,4,6],implementat:5,forebear:0,them:[0,2,4,5],worri:3,pip:0,kwarg:4,lightweight:6,explicitli:6,feed:1,greater:4,niceti:0,develop:0,"__getitem__":4,author:[3,6],perform:[2,4,5,6],make:[0,2,1,4,5,6],belong:[5,6],bowl:5,same:[0,2,4,5],member:4,fragment:4,complex:[0,2,5,6],interact:[5,4],descend:[4,0,5,3,6],grand:5,pai:5,document:[0,1,2,3,4,5,6],difficult:0,oblivi:6,impl_docu:[5,4],http:[0,1,2,3,4,6],grail:[0,2,3,5],contaten:4,nest:2,chainabl:4,wherea:5,effect:[5,4],elem1:6,fairli:0,lxml_elem:5,rais:4,user:[0,4],mani:[2,4,5],typic:2,travers:[0,5,4],appropri:[5,6],moder:0,els:[2,6],inconsist:5,whole:3,builder:[0,2,4,5,6],sit:[2,3],well:[2,4,5],parenthes:2,inherit:6,without:[2,5,6],nodeattrandchildelementlookupsmixin:[5,4],exampl:[0,2,3,5,6],expens:6,thi:[0,1,2,3,4,5,6],choos:[0,6],model:5,"_element":3,sibl:[2,4,5],left:5,load:5,construct:[0,2,4],set_attribut:[5,4],just:[5,6],less:2,xml4h_node:5,obtain:5,rest:6,elementtre:0,human:[4,0,3,1],heavili:0,regardless:[5,4],speed:6,yet:0,previous:2,task:[0,5],now:[0,2,3,5,6],expos:[5,6],also:[0,2,4,5,6],except:[0,4,6],littl:[2,3,6],add:[2,4,5],other:[0,2,3,4,5,6],tweak:0,feature_nam:4,unlik:[0,3],smart:6,is_processing_instruct:4,enact:[0,2,5],take:[0,5,4,6],real:[4,6],around:[2,1,6],format:[0,3],handl:[5,6],prefer:[5,3],elem4:6,intuit:5,reitz:0,usual:[4,1],world:6,characterist:6,lxmladapt:[5,4,6],licens:0,shoulder:6,like:[0,1,2,3,4,5,6],specif:[4,5,3,6],whitespac:[4,0,1],anyth:4,manual:2,integ:4,attributedict:[2,4,5],kenneth:0,benefit:[0,6],"boolean":[5,4],necessari:[2,6],either:0,popular:0,output:[4,0,3],architectur:[0,6],page:0,underli:[0,1,2,3,4,5,6],encount:[0,2,6],www:[4,6],right:[0,2,5,6],often:[0,5,3],deal:[5,6],creation:0,some:[0,2,3,4,5,6],maxim:6,add_cdata:4,autobiographi:5,intern:2,instal:0,impl_nodelist:4,intact:5,librari:[0,1,2,3,4,5,6],myelement:5,tmp:3,first_onli:[5,4],humor:[0,2],layer:[0,2,6],flux:0,xml_declar:3,avoid:[5,3,1,6],though:[0,5],definit:4,arthur:[0,2,3],"__getattr__":4,when:[0,1,2,3,4,5,6],larg:1,iso:3,film_build:5,three:[4,0,5,1,6],refer:[2,4,5],machin:[0,3],core:6,pepper:5,previou:2,run:5,truth:5,power:[0,2,5,6],imposs:2,inspect:2,find_first:[5,4],confer:5,clearer:5,is_root:4,wari:5,docroot:5,stage:0,about:[2,5,6],rare:1,xpathmixin:4,constraint:[5,4],is_not:4,memori:[5,6],unfortun:0,node_depth:4,includ:[0,2,3,4,5,6],act:4,clone_nod:[5,4],libarari:[0,4],wrap_docu:5,disabl:1,restrict:5,set_ns_prefix:[4,6],own:[0,4,6],anotherel:6,within:[5,4],encod:[4,0,2,3],sketch:[4,0,2,3,5],automat:4,due:6,ellipsi:[0,2,3,5,6],child2_b:2,been:[4,0,1],elem3:6,"\u03b1lph\u03b1":0,wrap:[0,2,4,5,6],chang:0,bool:4,parse_str:4,mere:2,ns_prefix:[4,6],w3c:[0,6],get_ns_info_from_node_nam:4,wai:[0,2,3,4,5,6],pictur:3,quirki:3,support:[0,5,4,6],elem:[0,2,4,5,6],fast:[0,6],custom:[0,5,4],verbos:2,almost:[2,5,6],reli:6,interfac:[0,4,6],low:[0,2,3,4,5,6],lot:6,suit:6,overhead:1,attr_obj:4,"function":[4,0,2,1,5],node_to_transpl:5,properli:6,programmat:[0,5],interest:[2,1],unexpect:5,unwrap:[0,5],altogeth:6,great:[5,4],attribute_nod:[5,4],keyerror:4,back:[0,2,5],github:0,state:4,link:0,getvalu:3,gori:5,line:[2,4],ns4:6,longer:[2,3,5],extend:[0,6],concaten:2,immedi:4,flavour:4,attr:[0,2,4,5,6],tab:3,born:2,whether:[4,6],notat:4,add_instruct:4,simplif:0,anel:6,lead:2,funni:5,below:[5,3,6],unusu:5,limit:[4,6],time:[2,5,6],hand:2,film_elem:5,otherwis:4,similar:5,implementaiton:4,expect:[4,0,5,3],title_elem:5,featur:[0,1,2,3,4,5,6],constant:4,creat:[2,4,5,6],"int":4,nsmap:6,"abstract":[0,4,6],mirror:[0,2],deep:2,repres:[0,2,4,5,6],year_attr:5,exist:[0,2,4,5,6],is_cdata:4,file:[4,3,1],seriou:0,minidom_doc:6,check:[2,5,6],film:[0,2,3,1,5],probabl:[5,6],assembl:4,denot:4,xpath:[0,5,4,6],somenam:5,know:6,titl:[0,2,3,5],excel:[0,6],detail:5,invalid:2,valid:[2,4],lookup:[0,5,4],christma:2,test:[0,1,2,3,5,6],ignor:[4,5,1,6],you:[0,1,2,3,4,5,6],nice:0,node:[0,1,2,3,4,5,6],intend:3,attr_dict:4,stringio:3,actual:[2,5,6],why:[0,3],is_docu:4,lxml_root_nod:3,releas:5,consid:6,leaf:5,myelement_:5,haven:0,algorithm:6,bottom:6,descript:[0,2,3,5],rule:4,portion:[5,6],safe:0,text:[0,1,2,3,4,5,6],broad:0},objtypes:{"0":"py:module","1":"py:attribute","2":"py:method","3":"py:class","4":"py:exception","5":"py:function","6":"py:classmethod"},titles:["xml4h: XML for Humans in Python","Parser","Builder","Writer","API","DOM Nodes","Advanced"],objnames:{"0":["py","module","Python module"],"1":["py","attribute","Python attribute"],"2":["py","method","Python method"],"3":["py","class","Python class"],"4":["py","exception","Python exception"],"5":["py","function","Python function"],"6":["py","classmethod","Python class method"]},filenames:["index","parser","builder","writer","api","nodes","advanced"]})PK!lA^xml4h-develop/.buildinfo# Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. config: 827ecdf33157ef4e42e3e4d145e6d6a5 tags: fbb0d17656682115ca4d033fb2f83ba1 PK!lAGGxml4h-develop/nodes.html DOM Nodes — xml4h 0.2.0 documentation

DOM Nodes

xml4h provides node objects and convenience methods that make it easier to work with an in-memory XML document object model (DOM).

This section of the document covers the main features of xml4h nodes. For the full API-level documentation see DOM Nodes API.

Traversing Nodes

xml4h aims to provide a simple and intuitive API for traversing and manipulating the XML DOM. To that end it includes a number of convenience methods for performing common tasks:

  • Get the Document or root Element from any node via the document and root attributes respectively.
  • You can get the name attribute of nodes that have a name, or look up the different name components with prefix to get the namespace prefix (if any) and local_name to get the name portion without the prefix.
  • Nodes that have a value expose it via the value attribute.
  • A node’s parent attribute returns its parent, while the ancestors attribute returns a list containing its parent, grand-parent, great-grand-parent etc.
  • A node’s children attribute returns the child nodes that belong to it, while the siblings attribute returns all other nodes that belong to its parent. You can also get the siblings_before or siblings_after the current node.
  • Look up a node’s namespace URI with namespace_uri or the alias ns_uri.
  • Check what type of Node you have with Boolean attributes like is_element, is_text, is_entity etc.

Searching with Find and XPath

There are two ways to search for elements within an xml4h document: find and xpath.

The find methods provided by the library are easy to use but can only perform relatively simple searches that return Element results, whereas you need to be familiar with XPath query syntax to search effectively with the xpath method but you can perform more complex searches and get results other than just elements.

Below are some examples of both kinds of search, but first we need to load an example document to search:

>>> # Parse an example XML document about Monty Python films
>>> import xml4h
>>> doc = xml4h.parse('tests/data/monty_python_films.xml')

Find Methods

xml4h provides three different find methods:

  • find() searches descendants of the current node for elements matching the given constraints. You can search by element name, by namespace URI, or with no constraints at all:

    >>> # Find ALL elements in the document
    >>> elems = doc.find()
    >>> [e.name for e in elems]  
    [u'MontyPythonFilms', u'Film', u'Title', u'Description', u'Film', u'Title', u'Description',...
    
    >>> # Find the seven <Film> elements in the XML document
    >>> film_elems = doc.find('Film')
    >>> [e.Title.text for e in film_elems]  
    ['And Now for Something Completely Different', 'Monty Python and the Holy Grail',...
    

    Note that the find() method only finds descendants of the node you run it on:

    >>> # Find <Title> elements in a single <Film> element; there's only one
    >>> film_elem = doc.find('Film', first_only=True)
    >>> film_elem.find('Title')
    [<xml4h.nodes.Element: "Title">]
    
  • find_first() searches descendants of the current node but only returns the first result element, not a list. If there are no matching element results this method returns None:

    >>> # Find the first <Film> element in the document
    >>> doc.find_first('Film')
    <xml4h.nodes.Element: "Film">
    
    >>> # Search for an element that does not exist
    >>> print doc.find_first('OopsWrongName')
    None
    

    If you were paying attention you may have noticed in the example above that you can make the find() method do exactly same thing as find_first() by passing the keyword argument first_only=True.

  • find_doc() is a convenience method that searches the entire document no matter which node you run it on:

    >>> # Normal find only searches descendants of the current node
    >>> len(film_elem.find('Title'))
    1
    
    >>> # find_doc searches the entire document
    >>> len(film_elem.find_doc('Title'))
    7
    

    This method is exactly like calling xml4h_node.document.find(), which is actually what happens behind the scenes.

XPath Querying

xml4h provides a single XPath search method which is available on Document and Element nodes:

  • xpath() takes an XPath query string and returns the result which may be a list of elements, a list of attributes, a list of values, or a single value. The result depends entirely on the kind of query you perform.

    XPath queries are well beyond the scope of this documentation but here are some examples like the find queries we saw above, as well as some more complex queries:

    >>> # Query for ALL elements in the document
    >>> elems = doc.xpath('//*')  
    >>> [e.name for e in elems]  
    [u'MontyPythonFilms', u'Film', u'Title', u'Description', u'Film', u'Title', u'Description',...
    
    >>> # Query for the seven <Film> elements in the XML document
    >>> film_elems = doc.xpath('//Film')
    >>> [e.Title.text for e in film_elems]  
    ['And Now for Something Completely Different', 'Monty Python and the Holy Grail',...
    
    >>> # Query for the first <Film> element in the document (returns list)
    >>> doc.xpath('//Film[1]')
    [<xml4h.nodes.Element: "Film">]
    
    >>> # Query for <Title> elements in a single <Film> element; there's only one
    >>> film_elem = doc.xpath('Film[1]')[0]
    >>> film_elem.xpath('Title')
    [<xml4h.nodes.Element: "Title">]
    
    >>> # Query for all year attributes
    >>> doc.xpath('//@year')
    ['1971', '1974', '1979', '1982', '1983', '2009', '2012']
    
    >>> # Query for the title of the film released in 1982
    >>> doc.xpath('//Film[@year="1982"]/Title/text()')
    ['Monty Python Live at the Hollywood Bowl']
    

Filtering Node Lists

Many xml4h node attributes return a list of nodes as a NodeList object which confers some special filtering powers. You get this special node list object from attributes like children, ancestors, and siblings, and from the find search method if it has element results.

Here are some examples of how you can easily filter a NodeList to get just the nodes you need:

  • Get the first child node using the filter method:

    >>> # Filter to get just the first child
    >>> doc.root.children.filter(first_only=True)
    <xml4h.nodes.Element: "Film">
    
    >>> # The document has 7 <Film> element children of the root
    >>> len(doc.root.children)
    7
    
  • Get the first child node by treating children as a callable:

    >>> doc.root.children(first_only=True)
    <xml4h.nodes.Element: "Film">
    

    When you treat the node list as a callable it calls the filter method behind the scenes, but since doing it the callable way is quicker and clearer in code we will use that approach from now on.

  • Get the first child node with the child filtering method, which accepts the same constraints as the filter method:

    >>> doc.root.child()
    <xml4h.nodes.Element: "Film">
    
    >>> # Apply filtering with child
    >>> print doc.root.child('WrongName')
    None
    
  • Get the first of a set of children with the first attribute:

    >>> doc.root.children.first
    <xml4h.nodes.Element: "Film">
    
  • Filter the node list by name:

    >>> for n in doc.root.children('Film'):
    ...     print n.Title.text
    And Now for Something Completely Different
    Monty Python and the Holy Grail
    Monty Python's Life of Brian
    Monty Python Live at the Hollywood Bowl
    Monty Python's The Meaning of Life
    Monty Python: Almost the Truth (The Lawyer's Cut)
    A Liar's Autobiography: Volume IV
    
    >>> len(doc.root.children('WrongName'))
    0
    

    Note

    Passing a node name as the first argument will match the local name of a node. You can match the full node name, which might include a prefix for example, with a call like: .children(name='SomeName').

  • Filter with a custom function:

    >>> # Filter to films released in the year 1979
    >>> for n in doc.root.children('Film',
    ...         filter_fn=lambda node: node.attributes['year'] == '1979'):
    ...     print n.Title.text
    Monty Python's Life of Brian
    

“Magical” Node Traversal

To make it easy to traverse XML documents with a known structure xml4h performs some minor magic when you look up attributes or keys on Document and Element nodes. If you like, you can take advantage of magical traversal to avoid peppering your code with find and xpath searches, or with filter constraints on children node attributes.

Depending on how you feel about magical behaviour this feature might feel like a great convenience, or black magic that makes you wary. The right attitude probably lies somewhere in the middle...

Here is an example of retrieving information from our Monty Python films document using element names as Python attributes (MontyPythonFilms, Film, Title) and XML attribute names as Python keys (year):

>>> for film in doc.MontyPythonFilms.Film:
...     print film['year'], ':', film.Title.text  
1971 : And Now for Something Completely Different
1974 : Monty Python and the Holy Grail
...

To minimise the chances of unexpected behaviour from too much black magic, xml4h has restrictions on the kind of Python attribute names it will accept when looking up child Elements. The attribute name:

  • cannot start with any underscore characters
  • must contain at least one uppercase character, or
  • if your XML element names are all lowercase (yuck!) you can tell xml4h to treat it specially by adding a single underscore character to the end of the name. For example, to traverse a child element named myelement you would use the Python attribute name myelement_.

There are more gory details in the documentation at NodeAttrAndChildElementLookupsMixin.

Note

The behaviour of namespaced XML elements and attributes is inconsistent. You can do magical traversal of elements regardless of what namespace the elements are in, but to look up XML attributes with a namespace prefix you must include that prefix in the name e.g. prefix:attribute-name.

Manipulating Nodes and Elements

xml4h provides simple methods to manipulate the structure and content of an XML DOM. The methods available depend on the kind of node you are interacting with, and by far the majority are for working with Element nodes.

Delete a Node

Any node can be removes from its owner document with delete():

>>> # Before deleting a Film element there are 7 films
>>> len(doc.MontyPythonFilms.Film)
7

>>> doc.MontyPythonFilms.children('Film')[-1].delete()
>>> len(doc.MontyPythonFilms.Film)
6

Note

By default deleting a node also destroys it, but it can optionally be left intact after removal from the document by including the destroy=False option.

Name and Value Attributes

Many nodes have low-level name and value properties that can be read from and written to. Nodes with names and values include Text, CDATA, Comment, ProcessingInstruction, Attribute, and Element nodes.

Here is an example of accessing the low-level name and value properties of a Text node:

>>> text_node = doc.MontyPythonFilms.child('Film').child('Title').child()
>>> text_node.is_text
True

>>> text_node.name
u'#text'
>>> text_node.value
u'And Now for Something Completely Different'

And here is the same for an Attribute node:

>>> # Access the name/value properties of an Attribute node
>>> year_attr = doc.MontyPythonFilms.child('Film').attribute_node('year')
>>> year_attr.is_attribute
True

>>> year_attr.name
u'year'
>>> year_attr.value
u'1971'

The name attribute of a node is not necessarily a plain string, in the case of nodes within a defined namespaced the name attribute may comprise two components: a prefix that represents the namespace, and a local_name which is the plain name of the node ignoring the namespace. For more information on namespaces see Namespaces.

Import a Node and its Descendants

In addition to manipulating nodes in a single XML document directly, you can also import a node (and all its descendant) from another document using a node clone or transplant operation.

There are two ways to import a node and its descendants:

  • Use the clone_node() Node method or clone() Builder method to copy a node into your document without removing it from its original document.
  • Use the transplant_node() Node method or transplant() Builder method to transplant a node into your document and remove it from its original document.

Here is an example of transplanting a node into a document (which also happens to undo the damage we did to our example DOM in the delete() example above):

>>> # Build a new document containing a Film element
>>> film_builder = (xml4h.build('DeletedFilm')
...     .element('Film').attrs(year='1971')
...         .element('Title')
...             .text('And Now for Something Completely Different').up()
...         .element('Description').text(
...             "A collection of sketches from the first and second TV"
...             " series of Monty Python's Flying Circus purposely"
...             " re-enacted and shot for film.")
...     )

>>> # Transplant the Film element from the new document
>>> node_to_transplant = film_builder.root.child('Film')
>>> doc.MontyPythonFilms.transplant_node(node_to_transplant)
>>> len(doc.MontyPythonFilms.Film)
7

When you transplant a node from another document it is removed from that document:

>>> # After transplanting the Film node it is no longer in the original doc
>>> len(film_builder.root.find('Film'))
0

If you need to leave the original document unchanged when importing a node use the clone methods instead.

Working with Elements

Element nodes have the most methods to access and manipulate their content, which is fitting since this is the most useful type of node and you will deal with elements regularly.

The leaf elements in XML documents often have one or more Text node children that contain the element’s data content. While you could iterate over such text nodes as child nodes, xml4h provides the more convenient text accessors you would expect:

>>> title_elem = doc.MontyPythonFilms.Film[0].Title
>>> orig_title = title_elem.text
>>> orig_title
'And Now for Something Completely Different'

>>> title_elem.text = 'A new, and wrong, title'
>>> title_elem.text
'A new, and wrong, title'

>>> # Let's put it back the way it was...
>>> title_elem.text = orig_title

Elements also have attributes that can be manipulated in a number of ways.

Look up an element’s attributes with:

  • the attributes() attribute (or aliases attrib and attrs) that return an ordered dictionary of attribute names and values:

    >>> film_elem = doc.MontyPythonFilms.Film[0]
    >>> film_elem.attributes
    <xml4h.nodes.AttributeDict: [('year', '1971')]>
    
  • or by obtaining an element’s attributes as Attribute nodes, though that is only likely to be useful in unusual circumstances:

    >>> film_elem.attribute_nodes
    [<xml4h.nodes.Attribute: "year">]
    
    >>> # Get a specific attribute node by name or namespace URI
    >>> film_elem.attribute_node('year')
    <xml4h.nodes.Attribute: "year">
    
  • and there’s also the “magical” keyword lookup technique discussed in “Magical” Node Traversal for quickly grabbing attribute values.

Set attribute values with:

  • the set_attributes() method, which allows you to add attributes without replacing existing ones. This method also supports defining XML attributes as a dictionary, list of name/value pairs, or keyword arguments:

    >>> # Set/add attributes as a dictionary
    >>> film_elem.set_attributes({'a1': 'v1'})
    
    >>> # Set/add attributes as a list of name/value pairs
    >>> film_elem.set_attributes([('a2', 'v2')])
    
    >>> # Set/add attributes as keyword arguments
    >>> film_elem.set_attributes(a3='v3', a4=4)
    
    >>> film_elem.attributes
    <xml4h.nodes.AttributeDict: [('a1', 'v1'), ('a2', 'v2'), ('a3', 'v3'), ('a4', '4'), ('year', '1971')]>
    
  • the setter version of the attributes attribute, which replaces any existing attributes with the new set:

    >>> film_elem.attributes = {'year': '1971', 'note': 'funny'}
    >>> film_elem.attributes
    <xml4h.nodes.AttributeDict: [('note', 'funny'), ('year', '1971')]>
    

Delete attributes from an element by:

  • using Python’s delete-in-dict technique:

    >>> del(film_elem.attributes['note'])
    >>> film_elem.attributes
    <xml4h.nodes.AttributeDict: [('year', '1971')]>
    
  • or by calling the delete() method on an Attribute node.

Finally, the Element class provides a number of methods for programmatically adding child nodes, for cases where you would rather work directly with nodes instead of using a Builder.

The most complex of these methods is add_element() which allows you to add a named child element, and to optionally to set the new element’s namespace, text content, and attributes all at the same time. Let’s try an example:

>>> # Add a Film element with an attribute
>>> new_film_elem = doc.MontyPythonFilms.add_element(
...     'Film', attributes={'year': 'never'})

>>> # Add a Description element with text content
>>> desc_elem = new_film_elem.add_element(
...     'Description', text='Just testing...')

>>> # Add a Title element with text *before* the description element
>>> title_elem = desc_elem.add_element(
...     'Title', text='The Film that Never Was', before_this_element=True)

>>> print doc.MontyPythonFilms.Film[-1].xml()
<Film year="never">
    <Title>The Film that Never Was</Title>
    <Description>Just testing...</Description>
</Film>

There are similar methods for handling simpler cases like adding text nodes, comments etc. Here is an example of adding text nodes:

>>> # Add a text node
>>> title_elem = doc.MontyPythonFilms.Film[-1].Title
>>> title_elem.add_text(', and Never Will Be')

>>> title_elem.text
'The Film that Never Was, and Never Will Be'

Refer to the Element documentation for more information about the other methods for adding nodes.

Wrapping and Unwrapping xml4h Nodes

You can easily convert to or from xml4h‘s wrapped version of an implementation node. For example, if you prefer the lxml library’s ElementMaker document builder approach to the xml4h Builder, you can create a document in lxml...

>>> from lxml.builder import ElementMaker
>>> E = ElementMaker()
>>> lxml_doc = E.DocRoot(
...     E.Item(
...         E.Name('Item 1'),
...         E.Value('Value 1')
...     ),
...     E.Item(
...         E.Name('Item 2'),
...         E.Value('Value 2')
...     )
... )
>>> lxml_doc  
<Element DocRoot at ...

...and then convert (or, more accurately, wrap) the lxml nodes with the appropriate adapter to make them xml4h versions:

>>> # Convert lxml Document to xml4h version
>>> xml4h_doc = xml4h.LXMLAdapter.wrap_document(lxml_doc)
>>> xml4h_doc.children
[<xml4h.nodes.Element: "Item">, <xml4h.nodes.Element: "Item">]

>>> # Get an element within the lxml document
>>> lxml_elem = list(lxml_doc)[0]
>>> lxml_elem  
<Element Item at ...

>>> # Convert lxml Element to xml4h version
>>> xml4h_elem = xml4h.LXMLAdapter.wrap_node(lxml_elem, lxml_doc)
>>> xml4h_elem  
<xml4h.nodes.Element: "Item">

You can reach the underlying XML implementation document or node at any time from an xml4h node:

>>> # Get an xml4h node's underlying implementation node
>>> xml4h_elem.impl_node  
<Element Item at ...
>>> xml4h_elem.impl_node == lxml_elem
True

>>> # Get the underlying implementatation document from any node
>>> xml4h_elem.impl_document  
<Element DocRoot at ...
>>> xml4h_elem.impl_document == lxml_doc
True
PK!lA,7LJJxml4h-develop/search.html Search — xml4h 0.2.0 documentation

Search

Please activate JavaScript to enable the search functionality.

From here you can search these documents. Enter your search words into the box below and click "search". Note that the search function will automatically search for all of the words. Pages containing fewer words won't appear in the result list.

Project Versions

PK!lAN&n*n*xml4h-develop/api.html API — xml4h 0.2.0 documentation

API

Main Interface

xml4h.parse(to_parse, ignore_whitespace_text_nodes=True, adapter=None)[source]

Parse an XML document into an xml4h-wrapped DOM representation using an underlying XML library implementation.

Parameters:
  • to_parse (a file-like object or string) – an XML document file, document string, or the path to an XML file. If a string value is given that contains a < character it is treated as literal XML data, otherwise a string value is treated as a file path.
  • ignore_whitespace_text_nodes (bool) – if True pure whitespace nodes are stripped from the parsed document, since these are usually noise introduced by XML docs serialized to be human-friendly.
  • adapter (adapter class or None) – the xml4h implementation adapter class used to parse the document and to interact with the resulting nodes. If None, best_adapter will be used.
Returns:

an xml4h.nodes.Document node representing the parsed document.

Delegates to an adapter’s parse_string() or parse_file() implementation.

xml4h.build(tagname_or_element, ns_uri=None, adapter=None)[source]

Return a Builder that represents an element in a new or existing XML DOM and provides “chainable” methods focussed specifically on adding XML content.

Parameters:
  • tagname_or_element (string or Element node) – a string name for the root node of a new XML document, or an Element node in an existing document.
  • ns_uri (string or None) – a namespace URI to apply to the new root node. This argument has no effect this method is acting on an element.
  • adapter (adapter class or None) – the xml4h implementation adapter class used to interact with the document DOM nodes. If None, best_adapter will be used.
Returns:

a Builder instance that represents an Element node in an XML DOM.

xml4h.best_adapter

The best adapter available in the Python environment. This adapter is the default when parsing or creating XML documents, unless overridden by passing a specific adapter class.

alias of XmlDomImplAdapter

Builder

Builder is a utility class that makes it easy to create valid, well-formed XML documents using relatively sparse python code. The builder class works by wrapping an xml4h.nodes.Element node to provide “chainable” methods focussed specifically on adding XML content.

Each method that adds content returns a Builder instance representing the current or the newly-added element. Behind the scenes, the builder uses the xml4h.nodes node traversal and manipulation methods to add content directly to the underlying DOM.

You will not generally create Builder instances directly, but will instead call the xml4h.builder() method with the name for a new root element or with an existing xml4h.nodes.Element node.

class xml4h.builder.Builder(element)[source]

Builder class that wraps an xml4h.nodes.Element node with methods for adding XML content to an underlying DOM.

a(*args, **kwargs)

Alias of attributes()

attributes(*args, **kwargs)[source]

Add one or more attributes to the xml4h.nodes.Element node represented by this Builder.

Returns:the current Builder.

Delegates to xml4h.nodes.Element.set_attributes().

attrs(*args, **kwargs)

Alias of attributes()

c(text)

Alias of comment()

cdata(text)[source]

Add a CDATA node to the xml4h.nodes.Element node represented by this Builder.

Returns:the current Builder.

Delegates to xml4h.nodes.Element.add_cdata().

clone(node)[source]

Clone a node from another document to become a child of the xml4h.nodes.Element node represented by this Builder.

Returns:a new Builder that represents the current element (not the cloned node).

Delegates to xml4h.nodes.Node.clone_node().

comment(text)[source]

Add a coment node to the xml4h.nodes.Element node represented by this Builder.

Returns:the current Builder.

Delegates to xml4h.nodes.Element.add_comment().

d(text)

Alias of cdata()

data(text)

Alias of cdata()

document[source]
Returns:the xml4h.nodes.Document node that contains the element represented by this Builder.
dom_element[source]
Returns:the xml4h.nodes.Element node represented by this Builder.
e(*args, **kwargs)

Alias of element()

elem(*args, **kwargs)

Alias of element()

element(*args, **kwargs)[source]

Add a child element to the xml4h.nodes.Element node represented by this Builder.

Returns:a new Builder that represents the child element.

Delegates to xml4h.nodes.Element.add_element().

find(**kwargs)[source]

Find descendants of the element represented by this builder that match the given constraints.

Returns:a list of xml4h.nodes.Element nodes

Delegates to xml4h.nodes.Node.find()

find_doc(**kwargs)[source]

Find nodes in this element’s owning xml4h.nodes.Document that match the given constraints.

Returns:a list of xml4h.nodes.Element nodes

Delegates to xml4h.nodes.Node.find_doc().

i(target, data)

Alias of processing_instruction()

instruction(target, data)

Alias of processing_instruction()

ns_prefix(prefix, ns_uri)[source]

Set the namespace prefix of the xml4h.nodes.Element node represented by this Builder.

Returns:the current Builder.

Delegates to xml4h.nodes.Element.set_ns_prefix().

processing_instruction(target, data)[source]

Add a processing instruction node to the xml4h.nodes.Element node represented by this Builder.

Returns:the current Builder.

Delegates to xml4h.nodes.Element.add_instruction().

root[source]
Returns:the xml4h.nodes.Element root node ancestor of the element represented by this Builder
t(text)

Alias of text()

text(text)[source]

Add a text node to the xml4h.nodes.Element node represented by this Builder.

Returns:the current Builder.

Delegates to xml4h.nodes.Element.add_text().

transplant(node)[source]

Transplant a node from another document to become a child of the xml4h.nodes.Element node represented by this Builder.

Returns:a new Builder that represents the current element (not the transplanted node).

Delegates to xml4h.nodes.Node.transplant_node().

up(count=1, to_name=None)[source]
Returns:

a builder representing an ancestor of the current element, by default the parent element.

Parameters:
  • count (integer >= 1 or None) – return the n’th ancestor element; defaults to 1 which means the immediate parent. If count is greater than the number of number of ancestors return the document’s root element.
  • to_name (string or None) – return the nearest ancestor element with the matching name, or the document’s root element if there are no matching elements. This argument trumps the count argument.
write(*args, **kwargs)[source]

Write XML text for the element represented by this builder.

Delegates to xml4h.nodes.Node.write().

write_doc(*args, **kwargs)[source]

Write XML text for the Document containing the element represented by this builder.

Delegates to xml4h.nodes.Node.write_doc().

Writer

Writer to serialize XML DOM documents or sections to text.

xml4h.writer.write_node(node, writer=None, encoding='utf-8', indent=0, newline='', omit_declaration=False, node_depth=0, quote_char='"')[source]

Serialize an xml4h DOM node and its descendants to text, writing the output to a given writer or to stdout.

Parameters:
  • node (an xml4h.nodes.Node or subclass) – the DOM node whose content and descendants will be serialized.
  • writer (a file, stream, etc or None) – an object such as a file or stream to which XML text is sent. If None text is sent to sys.stdout.
  • encoding (string) – the character encoding for serialized text.
  • indent (string, int, bool, or None) –

    indentation prefix to apply to descendent nodes for pretty-printing. The value can take many forms:

    • int: the number of spaces to indent. 0 means no indent.
    • string: a literal prefix for indented nodes, such as \t.
    • bool: no indent if False, four spaces indent if True.
    • None: no indent.
  • newline (string, bool, or None) –

    the string value used to separate lines of output. The value can take a number of forms:

    • string: the literal newline value, such as \n or \r. An empty string means no newline.
    • bool: no newline if False, \n newline if True.
    • None: no newline.
  • omit_declaration (boolean) – if True the XML declaration header is omitted, otherwise it is included. Note that the declaration is only output when serializing an xml4h.nodes.Document node.
  • node_depth (int) – the indentation level to start at, such as 2 to indent output as if the given node has two ancestors. This parameter will only be useful if you need to output XML text fragments that can be assembled into a document. This parameter has no effect unless indentation is applied.
  • quote_char (string) – the character that delimits quoted content. You should never need to mess with this.

DOM Nodes API

class xml4h.nodes.Attribute(node, adapter)[source]

Node representing an attribute of a Document or Element node.

class xml4h.nodes.AttributeDict(attr_impl_nodes, impl_element, adapter)[source]

Dictionary-like object of element attributes that always reflects the state of the underlying element node, and that allows for in-place modifications that will immediately affect the element.

__weakref__

list of weak references to the object (if defined)

element[source]
Returns:the Element that contains these attributes.
impl_attributes[source]
Returns:the attribute node objects from the underlying XML implementation.
items()[source]
Returns:a list of name/value attribute pairs sorted by attribute name.
keys()[source]
Returns:a list of attribute name strings.
namespace_uri(name)[source]
Parameters:name (string) – the name of an attribute to look up.
Returns:the namespace URI associated with the named attribute, or None.
prefix(name)[source]
Parameters:name (string) – the name of an attribute to look up.
Returns:the prefix component of the named attribute’s name, or None.
to_dict[source]
Returns:an OrderedDict of attribute name/value pairs.
values()[source]
Returns:a list of attribute value strings.
class xml4h.nodes.CDATA(node, adapter)[source]

Node representing character data in an XML document.

class xml4h.nodes.Comment(node, adapter)[source]

Node representing a comment in an XML document.

class xml4h.nodes.Document(node, adapter)[source]

Node representing an entire XML document.

class xml4h.nodes.DocumentFragment(node, adapter)[source]

Node representing an XML document fragment.

class xml4h.nodes.DocumentType(node, adapter)[source]

Node representing the type of an XML document.

class xml4h.nodes.Element(node, adapter)[source]

Node representing an element in an XML document, with support for manipulating and adding content to the element.

add_cdata(data)[source]

Add a character data node to this element.

Parameters:data (string) – text content to add as character data.
add_comment(text)[source]

Add a comment node to this element.

Parameters:text (string) – text content to add as a comment.
add_element(name, ns_uri=None, attributes=None, text=None, before_this_element=False)[source]

Add a new child element to this element, with an optional namespace definition. If no namespace is provided the child will be assigned to the default namespace.

Parameters:
  • name (string) –

    a name for the child node. The name may be used to apply a namespace to the child by including:

    • a prefix component in the name of the form ns_prefix:element_name, where the prefix has already been defined for a namespace URI (such as via set_ns_prefix()).
    • a literal namespace URI value delimited by curly braces, of the form {ns_uri}element_name.
  • ns_uri (string or None) – a URI specifying the new element’s namespace. If the name parameter specifies a namespace this parameter is ignored.
  • attributes (dict, list, tuple, or None) – collection of attributes to assign to the new child.
  • text (string or None) – text value to assign to the new child.
  • before_this_element (bool) – if True the new element is added as a sibling preceding this element, instead of as a child. In other words, the new element will be a child of this element’s parent node, and will immediately precent this element in the DOM.
Returns:

the new child as a an Element node.

add_instruction(target, data)[source]

Add an instruction node to this element.

Parameters:text (string) – text content to add as an instruction.
add_text(text)[source]

Add a text node to this element.

Adding text with this method is subtly different from assigning a new text value with text() accessor, because it “appends” to rather than replacing this element’s set of text nodes.

Parameters:
  • text – text content to add to this element.
  • type – string or anything that can be coerced by unicode().
attrib(attr_obj=None, ns_uri=None, **attr_dict)

Alias of set_attributes()

attribute_node(name, ns_uri=None)[source]
Parameters:
  • name (string) – the name of the attribute to return.
  • ns_uri (string or None) – a URI defining a namespace constraint on the attribute.
Returns:

this element’s attributes that match ns_uri as Attribute nodes.

attribute_nodes[source]
Returns:a list of this element’s attributes as Attribute nodes.
attributes[source]

Get or set this element’s attributes as name/value pairs.

Note

Setting element attributes via this accessor will remove any existing attributes, as opposed to the set_attributes() method which only updates and replaces them.

attrs(attr_obj=None, ns_uri=None, **attr_dict)

Alias of set_attributes()

builder[source]
Returns:a Builder representing this element with convenience methods for adding XML content.
set_attributes(attr_obj=None, ns_uri=None, **attr_dict)[source]

Add or update this element’s attributes, where attributes can be specified in a number of ways.

Parameters:
  • attr_obj (dict, list, tuple, or None) – a dictionary or list of attribute name/value pairs.
  • ns_uri (string or None) – a URI defining a namespace for the new attributes.
  • attr_dict (dict) – attribute name and values specified as keyword arguments.
set_ns_prefix(prefix, ns_uri)[source]

Define a namespace prefix that will serve as shorthand for the given namespace URI in element names.

Parameters:
  • prefix (string) – prefix that will serve as an alias for a the namespace URI.
  • ns_uri (string) – namespace URI that will be denoted by the prefix.
text[source]

Get or set the text content of this element.

class xml4h.nodes.Entity(node, adapter)[source]

Node representing an entity in an XML document.

class xml4h.nodes.EntityReference(node, adapter)[source]

Node representing an entity reference in an XML document.

class xml4h.nodes.NameValueNodeMixin(node, adapter)[source]

Provide methods to access node name and value attributes, where the node name may also be composed of “prefix” and “local” components.

local_name[source]
Returns:the local component of a node name excluding any prefix.
name[source]
Get or set the name of a node, possibly including prefix and local
components.
prefix[source]
Returns:the namespace prefix component of a node name, or None.
value[source]

Get or set the value of a node.

class xml4h.nodes.Node(node, adapter)[source]

Base class for xml4h DOM nodes that represent and interact with a node in the underlying XML implementation.

XMLNS_URI = 'http://www.w3.org/2000/xmlns/'

URI constant for XMLNS

__init__(node, adapter)[source]

Construct an object that represents and wraps a DOM node in the underlying XML implementation.

Parameters:
  • node – node object from the underlying XML implementation.
  • adapter – the xml4h.impls.XmlImplAdapter subclass implementation to mediate operations on the node in the underlying XML implementation.
__weakref__

list of weak references to the object (if defined)

_convert_nodelist(impl_nodelist)[source]

Convert a list of underlying implementation nodes into a list of xml4h wrapper nodes.

adapter[source]
Returns:the xml4h.impls.XmlImplAdapter subclass implementation that mediates operations on the node in the underlying XML implementation.
adapter_class[source]
Returns:the class of the xml4h.impls.XmlImplAdapter subclass implementation that mediates operations on the node in the underlying XML implementation.
ancestors[source]
Returns:the ancestors of this node in a list ordered by proximity to this node, that is: parent, grandparent, great-grandparent etc.
child(local_name=None, name=None, ns_uri=None, node_type=None, filter_fn=None)[source]
Returns:the first child node matching the given constraints, or None if there are no matching child nodes.

Delegates to NodeList.filter().

children[source]
Returns:a NodeList of this node’s child nodes.
clone_node(node)[source]

Clone a node from another document to become a child of this node, by copying the node’s data into this document but leaving the node untouched in the source document. The node to be cloned can be a Node based on the same underlying XML library implementation and adapter, or a “raw” node from that implementation.

Parameters:node (xml4h or implementation node) – the node in another document to clone.
delete(destroy=True)[source]

Delete this node from the owning document.

Parameters:destroy (bool) – if True the child node will be destroyed in addition to being removed from the document.
Returns:the removed child node, or None if the child was destroyed.
document[source]
Returns:the Document node that contains this node, or self if this node is the document.
find(name=None, ns_uri=None, first_only=False)[source]

Find Element node descendants of this node, with optional constraints to limit the results.

Parameters:
  • name (string or None) – limit results to elements with this name. If None or '*' all element names are matched.
  • ns_uri (string or None) – limit results to elements within this namespace URI. If None all elements are matched, regardless of namespace.
  • first_only (bool) – if True only return the first result node or None if there is no matching node.
Returns:

a list of Element nodes matching any given constraints, or a single node if first_only=True.

find_doc(name=None, ns_uri=None, first_only=False)[source]

Find Element node descendants of the document containing this node, with optional constraints to limit the results.

Delegates to find() applied to this node’s owning document.

find_first(name=None, ns_uri=None)[source]

Find the first Element node descendant of this node that matches any optional constraints, or None if there are no matching elements.

Delegates to find() with first_only=True.

has_feature(feature_name)[source]
Returns:True if a named feature is supported by the adapter implementation underlying this node.
impl_document[source]
Returns:the document object from the underlying XML implementation that contains the node represented by this xml4h node.
impl_node[source]
Returns:the node object from the underlying XML implementation that is represented by this xml4h node.
is_attribute[source]
Returns:True if this is an Attribute node.
is_cdata[source]
Returns:True if this is a CDATA node.
is_comment[source]
Returns:True if this is a Comment node.
is_document[source]
Returns:True if this is a Document node.
is_document_fragment[source]
Returns:True if this is a DocumentFragment node.
is_document_type[source]
Returns:True if this is a DocumentType node.
is_element[source]
Returns:True if this is an Element node.
is_entity[source]
Returns:True if this is an Entity node.
is_entity_reference[source]
Returns:True if this is an EntityReference node.
is_notation[source]
Returns:True if this is a Notation node.
is_processing_instruction[source]
Returns:True if this is a ProcessingInstruction node.
is_root[source]
Returns:True if this node is the document’s root element
is_text[source]
Returns:True if this is a Text node.
is_type(node_type_constant)[source]
Returns:True if this node’s int type matches the given value.
namespace_uri[source]
Returns:this node’s namespace URI or None.
node_type[source]
Returns:an int constant value that identifies the type of this node, such as ELEMENT_NODE or TEXT_NODE.
ns_uri

Alias for namespace_uri()

parent[source]
Returns:the parent of this node, or None of the node has no parent.
root[source]
Returns:the root Element node of the document that contains this node, or self if this node is the root element.
siblings[source]
Returns:a list of this node’s sibling nodes.
Return type:NodeList
siblings_after[source]
Returns:a list of this node’s siblings that occur after this node in the DOM.
siblings_before[source]
Returns:a list of this node’s siblings that occur before this node in the DOM.
transplant_node(node, copy=False)[source]

Transplant a node from another document to become a child of this node, removing it from the source document. The node to be transplanted can be a Node based on the same underlying XML library implementation and adapter, or a “raw” node from that implementation.

Parameters:node (xml4h or implementation node) – the node in another document to transplant.
write(writer=None, encoding='utf-8', indent=0, newline='', omit_declaration=False, node_depth=0, quote_char='"')[source]

Serialize this node and its descendants to text, writing the output to a given writer or to stdout.

Parameters:
  • writer (a file, stream, etc or None) – an object such as a file or stream to which XML text is sent. If None text is sent to sys.stdout.
  • encoding (string) – the character encoding for serialized text.
  • indent (string, int, bool, or None) –

    indentation prefix to apply to descendent nodes for pretty-printing. The value can take many forms:

    • int: the number of spaces to indent. 0 means no indent.
    • string: a literal prefix for indented nodes, such as \t.
    • bool: no indent if False, four spaces indent if True.
    • None: no indent
  • newline (string, bool, or None) –

    the string value used to separate lines of output. The value can take a number of forms:

    • string: the literal newline value, such as \n or \r. An empty string means no newline.
    • bool: no newline if False, \n newline if True.
    • None: no newline.
  • omit_declaration (boolean) – if True the XML declaration header is omitted, otherwise it is included. Note that the declaration is only output when serializing an xml4h.nodes.Document node.
  • node_depth (int) – the indentation level to start at, such as 2 to indent output as if the given node has two ancestors. This parameter will only be useful if you need to output XML text fragments that can be assembled into a document. This parameter has no effect unless indentation is applied.
  • quote_char (string) – the character that delimits quoted content. You should never need to mess with this.

Delegates to xml4h.writer.write_node() applied to this node.

write_doc(*args, **kwargs)[source]

Serialize to text the document containing this node, writing the output to a given writer or stdout.

Delegates to write()

xml(indent=4, **kwargs)[source]
Returns:this node as XML text.

Delegates to write()

xml_doc(**kwargs)[source]
Returns:the document containing this node as XML text.

Delegates to xml()

class xml4h.nodes.NodeAttrAndChildElementLookupsMixin[source]

Perform “magical” lookup of a node’s attributes via dict-style keyword reference, and child elements via class attribute reference.

__getattr__(child_name)[source]

Retrieve this node’s child element by tag name regardless of the elements namespace, assuming the name given doesn’t match an existing attribute of this class.

Parameters:child_name (string) –

tag name of the child element. The name must match the following pattern rules for xml4h to attempt a child element lookup, otherwise an AttributeError will be raised immediately:

  • name contains one or more uppercase characters, or
  • name is all lowercase but ends with a single underscore character
  • in all cases the name does not begin with an underscore character.
Returns:the type of the return value depends on how many child elements match the name:
  • a single Element node if only one child element matches
  • a list of Element nodes if there is more than 1 match.
Raise :AttributeError if the node has no child element with the given name, or if the given name does not match the required pattern.
__getitem__(attr_name)[source]

Retrieve this node’s attribute value by name using dict-style keyword lookup.

Parameters:attr_name (string) – name of the attribute. If the attribute has a namespace prefix that must be included, in other words the name must be a qname not local name.
Raise :KeyError if the node has no such attribute.
__weakref__

list of weak references to the object (if defined)

class xml4h.nodes.NodeList[source]

Custom implementation for Node lists that provides additional functionality, such as node filtering.

__call__(local_name=None, name=None, ns_uri=None, node_type=None, filter_fn=None, first_only=False)

Alias for filter().

__weakref__

list of weak references to the object (if defined)

filter(local_name=None, name=None, ns_uri=None, node_type=None, filter_fn=None, first_only=False)[source]

Apply filters to the set of nodes in this list.

Parameters:
  • local_name (string or None) – a local name used to filter the nodes.
  • name (string or None) – a name used to filter the nodes.
  • ns_uri (string or None) – a namespace URI used to filter the nodes. If None all nodes are returned regardless of namespace.
  • node_type (int node type constant, class, or None) – a node type definition used to filter the nodes.
  • filter_fn (function or None) –

    an arbitrary function to filter nodes in this list. This function must accept a single Node argument and return a bool indicating whether to include the node in the filtered results.

    Note

    if filter_fn is provided all other filter arguments are ignore.

Returns:

the type of the return value depends on the value of the first_only parameter and how many nodes match the filter:

  • if first_only=False return a NodeList of filtered nodes, which will be empty if there are no matching nodes.
  • if first_only=True and at least one node matches, return the first matching Node
  • if first_only=True and there are no matching nodes, return None

first[source]
Returns:the first of the available children nodes, or None if there are no children.
class xml4h.nodes.Notation(node, adapter)[source]

Node representing a notation in an XML document.

class xml4h.nodes.ProcessingInstruction(node, adapter)[source]

Node representing a processing instruction in an XML document.

data

Get or set the value of a node.

target
Get or set the name of a node, possibly including prefix and local
components.
class xml4h.nodes.Text(node, adapter)[source]

Node representing text content in an XML document.

class xml4h.nodes.XPathMixin[source]

Provide xpath() method to nodes that support XPath searching.

__weakref__

list of weak references to the object (if defined)

xpath(xpath, **kwargs)[source]

Perform an XPath query on the current node.

Parameters:
  • xpath (string) – XPath query.
  • kwargs (dict) – Optional keyword arguments that are passed through to the underlying XML library implementation.
Returns:

results of the query as a list of Node objects, or a list of base type objects if the XPath query does not reference node objects.

XML Libarary Adapters

class xml4h.impls.interface.XmlImplAdapter(document)[source]

Base class that defines how xml4h interacts with an underlying XML library that the adaptor “wraps” to provide additional (or at least different) functionality.

This class should be treated as an abstract class. It provides some common implementation code used by all xml4h adapter implementations, but mostly it sketches out the methods the real implementaiton subclasses must provide.

find_node_elements(node, name='*', ns_uri='*')[source]
Returns:

element node descendents of the given node that match the search constraints.

Parameters:
  • node – a node object from the underlying XML library.
  • name (string) – only elements with a matching name will be returned. If the value is * all names will match.
  • ns_uri (string) – only elements with a matching namespace URI will be returned. If the value is * all namespaces will match.
get_ns_info_from_node_name(name, impl_node)[source]

Return a three-element tuple with the prefix, local name, and namespace URI for the given element/attribute name (in the context of the given node’s hierarchy). If the name has no associated prefix or namespace information, None is return for those tuple members.

classmethod has_feature(feature_name)[source]
Returns:True if a named feature is supported by this adapter.
classmethod ignore_whitespace_text_nodes(wrapped_node)[source]

Find and delete any text nodes containing nothing but whitespace in in the given node and its descendents.

This is useful for cleaning up excess low-value text nodes in a document DOM after parsing a pretty-printed XML document.

classmethod is_available()[source]
Returns:True if this adapter’s underlying XML library is available in the Python environment.
class xml4h.impls.lxml_etree.LXMLAdapter(document)[source]

Adapter to the lxml XML library implementation.

find_node_elements(node, name='*', ns_uri='*')[source]
Returns:

element node descendents of the given node that match the search constraints.

Parameters:
  • node – a node object from the underlying XML library.
  • name (string) – only elements with a matching name will be returned. If the value is * all names will match.
  • ns_uri (string) – only elements with a matching namespace URI will be returned. If the value is * all namespaces will match.
xpath_on_node(node, xpath, **kwargs)[source]

Return result of performing the given XPath query on the given node.

All known namespace prefix-to-URI mappings in the document are automatically included in the XPath invocation.

If an empty/default namespace (i.e. None) is defined, this is converted to the prefix name ‘_’ so it can be used despite empty namespace prefixes being unsupported by XPath.

class xml4h.impls.xml_dom_minidom.XmlDomImplAdapter(document)[source]

Adapter to the minidom XML library implementation.

get_node_text(node)[source]

Return contatenated value of all text node children of this element

set_node_text(node, text)[source]

Set text value as sole Text child node of element; any existing Text nodes are removed

Custom Exceptions

Custom xml4h exceptions.

exception xml4h.exceptions.FeatureUnavailableException[source]

User has attempted to use a feature that is available in some xml4h implementations/adapters, but is not available in the current one.

exception xml4h.exceptions.IncorrectArgumentTypeException(arg, expected_types)[source]

Richer flavour of a ValueError that describes exactly what argument types are expected.

exception xml4h.exceptions.Xml4hException[source]

Base exception class for all non-standard exceptions raised by xml4h.

Project Versions

Table Of Contents

Previous topic

Advanced

This Page

PK!lAG@@xml4h-develop/parser.html Parser — xml4h 0.2.0 documentation

Parser

The xml4h parser is a simple wrapper around the parser provided by an underlying XML library implementation.

Parse function

To parse XML documents with xml4h you feed the xml4h.parse() function an XML text document in one of three forms:

  • A file-like object:

    >>> import xml4h
    
    >>> xml_file = open('tests/data/monty_python_films.xml', 'rb')
    >>> doc = xml4h.parse(xml_file)
    
    >>> doc.MontyPythonFilms
    <xml4h.nodes.Element: "MontyPythonFilms">
    
  • A file path string:

    >>> doc = xml4h.parse('tests/data/monty_python_films.xml')
    
    >>> doc.root['source']
    'http://en.wikipedia.org/wiki/Monty_Python'
    
  • A string containing literal XML content:

    >>> xml_file = open('tests/data/monty_python_films.xml', 'rb')
    >>> xml_text = xml_file.read()
    >>> doc = xml4h.parse(xml_text)
    
    >>> len(doc.find('Film'))
    7
    

Note

The parse() method distinguishes between a file path string and an XML text string by looking for a < character in the value.

Stripping of Whitespace Nodes

By default the parse method ignores whitespace nodes in the XML document – or more accurately, it does extra work to remove these nodes after the document has been parsed by the underlying XML library.

Whitespace nodes are rarely interesting, since they are usually the result of XML content that has been serialized with extra whitespace to make it more readable to humans.

However if you need to keep these nodes, or if you want to avoid the extra processing overhead when parsing large documents, you can disable this feature by passing in the ignore_whitespace_text_nodes=False flag:

>>> # Strip whitespace nodes from document
>>> doc = xml4h.parse('tests/data/monty_python_films.xml')

>>> # No excess text nodes (XML doc lists 7 films)
>>> len(doc.MontyPythonFilms.children)
7
>>> doc.MontyPythonFilms.children[0]
<xml4h.nodes.Element: "Film">


>>> # Don't strip whitespace nodes
>>> doc = xml4h.parse('tests/data/monty_python_films.xml',
...                   ignore_whitespace_text_nodes=False)

>>> # An extra text node is present
>>> len(doc.MontyPythonFilms.children)
8
>>> doc.MontyPythonFilms.children[0]
<xml4h.nodes.Text: "#text">

Project Versions

Table Of Contents

Previous topic

xml4h: XML for Humans in Python

Next topic

Builder

This Page

PK!lAمXXxml4h-develop/writer.html Writer — xml4h 0.2.0 documentation

Writer

The xml4h writer produces serialized XML text documents much as you would expect, and in respect that it is a little unlike the writer methods in some of the other Python XML libraries.

Write methods

To write out an XML document with xml4h you will generally use the write() or write_doc() methods available on any xml4h node.

The write() method outputs the current node and any descendants:

>>> import xml4h
>>> doc = xml4h.parse('tests/data/monty_python_films.xml')

>>> first_film_elem = doc.find('Film')[0]
>>> first_film_elem.write(indent=True)  
<Film year="1971">
    <Title>And Now for Something Completely Different</Title>
    <Description>A collection of sketches from the first and second...
</Film>

The write_doc() method outputs the entire document no matter which node you call it on:

>>> first_film_elem.write_doc(indent=True)  
<?xml version="1.0" encoding="utf-8"?>
<MontyPythonFilms source="http://en.wikipedia.org/wiki/Monty_Python">
    <Film year="1971">
        <Title>And Now for Something Completely Different</Title>
        <Description>A collection of sketches from the first and second...
    </Film>
 ...

The write methods send output to sys.stdout by default. To send output to a file, or any other writer-like object, provide the target writer as an argument:

>>> # Write to a file
>>> with open('/tmp/example.xml', 'wb') as f:
...     first_film_elem.write_doc(f)

>>> # Write to a string (BUT SEE SECTION BELOW...)
>>> from StringIO import StringIO
>>> str_writer = StringIO()
>>> first_film_elem.write_doc(str_writer)
>>> str_writer.getvalue()  
'<?xml version="1.0" encoding="utf-8"?><MontyPythonFilms source...

Write to a String

Because you will often want to generate a string of XML content directly, xml4h includes the convenience methods xml() and xml_doc() to do this easily.

The xml() method works like the write method and will return a string of XML content including the current node and its descendants:

>>> print first_film_elem.xml()  
<Film year="1971">
    <Title>And Now for Something Completely...

The xml_doc() method works like the write_doc method and returns a string for the whole document:

>>> print first_film_elem.xml_doc()  
<?xml version="1.0" encoding="utf-8"?>
<MontyPythonFilms source="http://en.wikipedia.org/wiki/Monty_Python">
    <Film year="1971">
        <Title>And Now for Something Completely Different</Title>
        <Description>A collection of sketches from the first and second...
    </Film>
    ...

Note

xml4h assumes that when you directly generate an XML string in this way it is intended for human consumption, so it applies pretty-print formatting by default.

Format Output

The write and xml methods accept a range of formatting options to control how XML content is serialized. These are useful if you expect a human to read the resulting data.

For the full range of formatting options see the code documentation for write() and xml() et al. but here are some pointers to get you started:

  • Set indent=True to write a pretty-printed XML document with four space characters for indentation and \n for newlines.
  • To use a tab character for indenting and \r\n for indents: indent='\t', newline='\r\n'.
  • xml4h writes utf-8-encoded documents by default, to write with a different encoding: encoding='iso-8859-1'.
  • To avoid outputting the XML declaration when writing a document: omit_declaration=True.

Write using the underlying implementation

Because xml4h sits on top of an underlying XML library implementation you can use that library’s serialization methods if you prefer, and if you don’t mind having some implementation-specific code.

For example, if you are using lxml as the underlying library you can use its serialisation methods by accessing the implementation node:

>>> # Get the implementation root node, in this case an lxml node
>>> lxml_root_node = first_film_elem.root.impl_node
>>> lxml_root_node.__class__
<type 'lxml.etree._Element'>

>>> # Use lxml features as normal; xml4h is no longer in the picture
>>> from lxml import etree
>>> print etree.tostring(lxml_root_node, encoding='utf-8',
...                      xml_declaration=True, pretty_print=True)  
<?xml version='1.0' encoding='utf-8'?>
<MontyPythonFilms source="http://en.wikipedia.org/wiki/Monty_Python"><Film year="1971"><Title>And Now for Something Completely Different</Title>
        <Description>A collection of sketches from the first and second...
    </Film>
    <Film year="1974"><Title>Monty Python and the Holy Grail</Title>
        <Description>King Arthur and his knights embark on a low-budget...
    </Film>
    ...

Note

The output from lxml is a little quirky, at least on the author’s machine. Note for example the single-quote characters in the XML declaration, and the missing newline and indent before the first <Film> element. But don’t worry, that’s why you have xml4h ;)

Project Versions

Table Of Contents

Previous topic

Builder

Next topic

DOM Nodes

This Page

PK!lA0 pxml4h-develop/advanced.html Advanced — xml4h 0.2.0 documentation

Advanced

Namespaces

xml4h supports using XML namespaces in a number of ways, and tries to make this sometimes complex and fiddly aspect of XML a little easier to deal with.

Namespace URIs

XML document nodes can be associated with a namespace URI which uniquely identifies the namespace. At bottom a URI is really just a name to identifiy the namespace, which may or may not point at an actual resource.

Namespace URIs are the core piece of the namespacing puzzle, everything else is extras.

Namespace URI values are assigned to a node in one of three ways:

  • an xmlns attribute on an element assigns a namespace URI to that element, and may also define a shorthand prefix for the namespace:

    <AnElement xmlns:my-prefix="urn:example-uri">

    Note

    Technically the xmlns attribute must itself also be in the special XML namespacing namespace http://www.w3.org/2000/xmlns/. You needn’t care about this.

  • a tag or attribute name includes a prefix alias portion that specifies the namespace the item belongs to:

    <my-prefix:AnotherElement attr1="x" my-prefix:attr2="i am namespaced">

    A prefix alias can be defined using an “xmlns” attribute as described above, or by using the Builder ns_prefix() or Node set_ns_prefix() methods.

  • in an apparent effort to reduce confusion around namespace URIs and prefixes, some XML libraries avoid prefix aliases altogether and instead require you to specify the full namespace URI as a prefix to tag and attribute names using a special syntax with braces:

    >>> tagname = '{urn:example-uri}YetAnotherWayToNamespace'
    

    Note

    In the author’s opinion, using a non-standard way to define namespaces does not reduce confusion. xml4h supports this approach technically but not philosphically.

xml4h allows you to assign namespace URIs to document nodes when using the Builder:

>>> # Assign a default namespace with ns_uri
>>> import xml4h
>>> b = xml4h.build('Doc', ns_uri='ns-uri')
>>> root = b.root

>>> # Descendent without a namespace inherit their ancestor's default one
>>> elem1 = b.elem('Elem1').dom_element
>>> elem1.namespace_uri
'ns-uri'

>>> # Define a prefix alias to assign a new or existing namespace URI
>>> elem2 = b.ns_prefix('my-ns', 'second-ns-uri') \
...     .elem('my-ns:Elem2').dom_element
>>> print root.xml()
<Doc xmlns="ns-uri" xmlns:my-ns="second-ns-uri">
    <Elem1/>
    <my-ns:Elem2/>
</Doc>

>>> # Or use the explicit URI prefix approach, if you must
>>> elem3 = b.elem('{third-ns-uri}Elem3').dom_element
>>> elem3.namespace_uri
'third-ns-uri'

And when adding nodes with the API:

>>> # Define the ns_uri argument when creating a new element
>>> elem4 = root.add_element('Elem4', ns_uri='fourth-ns-uri')

>>> # Attributes can be namespaced too
>>> elem4.attrs({'my-ns:attr1': 'value'})

>>> print elem4.xml()
<Elem4 my-ns:attr1="value" xmlns="fourth-ns-uri"/>

Filtering by Namespace

xml4h allows you to find and filter nodes based on their namespace.

The find() method takes a ns_uri keyword argument to return only elements in that namespace:

>>> # By default, find ignores namespaces...
>>> [n.local_name for n in root.find()]
[u'Elem1', u'Elem2', u'Elem3', u'Elem4']
>>> # ...but will filter by namespace URI if you wish
>>> [n.local_name for n in root.find(ns_uri='fourth-ns-uri')]
[u'Elem4']

Similarly, a node’s children listing can be filtered:

>>> len(root.children)
4
>>> root.children(ns_uri='ns-uri')
[<xml4h.nodes.Element: "Elem1">]

XPath queries can also filter by namespace, but the xpath() method needs to be given a dictionary mapping of prefix aliases to URIs:

>>> root.xpath('//ns4:*', namespaces={'ns4': 'fourth-ns-uri'})
[<xml4h.nodes.Element: "Elem4">]

Note

Normally, because XPath queries rely on namespace prefix aliases, they cannot find namespaced nodes in the default namespace which has an “empty” prefix name. xml4h works around this limitation by providing the special empty/default prefix alias ‘_’.

Element Names: Local and Prefix Components

When you use a namespace prefix alias to define the namespace an element or attribute belongs to, the name of that node will be made up of two components:

  • prefix - the namespace alias.
  • local - the real name of the node, without the namespace alias.

xml4h makes the full (qualified) name, and the two components, available at node attributes:

>>> # Elem2's namespace was defined earlier using a prefix alias
>>> elem2
<xml4h.nodes.Element: "my-ns:Elem2">

# The full node name...
>>> elem2.name
u'my-ns:Elem2'
>>> # ...comprises a prefix...
>>> elem2.prefix
u'my-ns'
>>> # ...and a local name component
>>> elem2.local_name
u'Elem2'

>>> # Here is an element without a prefix alias
>>> elem1.name
u'Elem1'
>>> elem1.prefix == None
True
>>> elem1.local_name
u'Elem1'

xml4h Architecture

To best understand the xml4h library and to use it appropriately in demanding situations, you should appreciate what the library is not.

xml4h is not a full-fledged XML library in its own right, far from it. Instead of implementing low-level document parsing and manipulation tools, it operates as an abstraction layer on top of the pre-existing XML processing libraries you already know.

This means the improved API and tool suite provided by xml4h work by mediating operations you perform, asking the underlying XML library to do the work, and packaging up the results of this work as wrapped xml4h objects.

This approach has a number of implications, good and bad.

On the good side:

  • you can start using and benefiting from xml4h in an existing projects that already use a supported XML library without any impact, it can fit right in.
  • xml4h can take advantage of the existing powerful and fast XML libraries to do its work.
  • by providing an abstraction layer over multiple libraries, xml4h can make it (relatively) easy to switch the underlying library without you needing to rewrite your own XML handling code.
  • by building on the shoulders of giants, xml4h itself can remain relatively lightweight and focussed on simplicity and usability.
  • the author of xml4h does not have to write XML-handling code in C...

On the bad side:

  • if the underlying XML libraries available in the Python environment do not support a feature (like XPath querying) then that feature will not be available in xml4h.
  • xml4h cannot provide radical new XML processing features, since the bulk of its work must be done by the underlying library.
  • the abstraction layer xml4h uses to do its work requires more resources than it would to use the underlying library directly, so if you absolutely need maximal speed or minimal memory use the library might prove too expensive.
  • xml4h sometimes needs to jump through some hoops to maintain the shared abstraction interface over multiple libraries, which means extra work is done in Python instead of by the underlying library code in C.

The author believes the benefits of using xml4h outweighs the drawbacks in the majority of real-world situations, or he wouldn’t have created the library in the first place, but ultimately it is up to you to decide where you should or should not use it.

Library Adapters

To provide an abstraction layer over multiple underlying XML libraries, xml4h uses an “adapter” mechanism to mediate operations on documents. There is an adapter implementation for each library xml4h can work with, each of which extends the XmlImplAdapter class. This base class includes some standard behaviour, and defines the interface for adapter implementations (to the extent you can define such interfaces in Python).

The current version of xml4h includes two adapter implementations:

  • LXMLAdapter works with the excellent lxml library which is very full-featured and fast, but which is not included in the standard library.
  • XmlDomImplAdapter works with the minidom W3C-style XML library included with the standard library. This library is always available but is slower and has fewer features than alternative libraries (e.g. no support for XPath)

The adapter layer allows the rest of the xml4h library code to remain almost entirely oblivious to the underlying XML library that happens to be available at the time. The xml4h Builder, Node objects, writer etc. call adapter methods to perform document operations, and the adapter is responsible for doing the necessary work with the underlying library.

“Best” Adapter

While xml4h can work with multiple underlying XML libraries, some of these libraries are better (faster, more fully-featured) than others so it would be smart to use the best of the libraries available.

xml4h does exactly that: unless you explicitly choose an adapter (see below) xml4h will find the supported libraries in the Python environment and choose the “best” adapter for you.

With only two adapter implementations in xml4h right now the algorithm for making this choice isn’t exactly complex, so let’s spell it out explicitly:

  • use lxml if it is available.
  • use the minidom if nothing else is available.

The xml4h.best_adapter attribute stores the adapter class that xml4h considers to be the best.

Choose Your Own Adapter

By default, xml4h will choose an adapter and underlying XML library implementation that it considers the best available. However, in some cases you may need to have full control over which underlying implementation xml4h uses, perhaps because you will use features of the underlying XML implementation later on, or because you need the performance characteristics only available in a particular library.

For these situations it is possible to tell xml4h which adapter implementation, and therefore which underlying XML library, it should use.

To use a specific adapter implementation when parsing a document, or when creating a new document using the builder, simply provide the optional adapter keyword argument to the relevant method:

  • Parsing:

    >>> # Explicitly use the minidom adapter to parse a document
    >>> minidom_doc = xml4h.parse('tests/data/monty_python_films.xml',
    ...                           adapter=xml4h.XmlDomImplAdapter)
    >>> minidom_doc.root.impl_node  
    <DOM Element: MontyPythonFilms at ...
    
  • Building:

    >>> # Explicitly use the lxml adapter to build a document
    >>> lxml_b = xml4h.build('MyDoc', adapter=xml4h.LXMLAdapter)
    >>> lxml_b.root.impl_node  
    <Element {http://www.w3.org/2000/xmlns/}MyDoc at ...
    

Check Feature Support

Because not all underlying XML libraries support all the features exposed by xml4h, the library includes a simple mechanism to check whether a given feature is available in the current Python environment or with the current adapter.

To check for feature support call the has_feature() method on a document node, or has_feature() on an adapter class.

List of features that are not available in all adapters:

  • xpath - Can perform XPath queries using the xpath() method.
  • More to come later, probably...

For example, here is how you would test for XPath support in the minidom adapter, which doesn’t include it:

>>> minidom_doc.root.has_feature('xpath')
False

If you forget to check for a feature and use it anyway, you will get a FeatureUnavailableException:

>>> try:
...     minidom_doc.root.xpath('//*')
... except Exception, e:
...     e
FeatureUnavailableException('xpath',)

Adapter & Implementation Quirks

Although xml4h aims to provide a seamless abstraction over underlying XML library implementations this isn’t always possible, or is only possible by performing lots of extra work that affects performance. This section describes some implementation-specific quirks or differences you may encounter.

LXMLAdapter - lxml

  • lxml does not have full support for CDATA nodes, which devolve into plain text node values when written (by xml4h or by lxml‘s writer).
  • Namespaces defined by adding xmlns element attributes are not properly represented in the underlying implementation due to the lxml library’s immutable nsmap namespace map. Such namespaces are written correcly by the xml4h writer, but to avoid quirks it is best to specify namespace when creating nodes by setting the ns_uri keyword attribute.
  • When xml4h writes lxml-based documents with namespaces, some node tag names may have unnecessary namespace prefix aliases.

XmlImplAdapter - minidom

  • No support for performing XPath queries.
  • Slower than alternative C-based implementations.
PK!lAu (xml4h-develop/_static/comment-bright.pngPNG  IHDRa OiCCPPhotoshop ICC profilexڝSgTS=BKKoR RB&*! J!QEEȠQ, !{kּ> H3Q5 B.@ $pd!s#~<<+"x M0B\t8K@zB@F&S`cbP-`'{[! eDh;VEX0fK9-0IWfH  0Q){`##xFW<+*x<$9E[-qWW.(I+6aa@.y24x6_-"bbϫp@t~,/;m%h^ uf@Wp~<5j>{-]cK'Xto(hw?G%fIq^D$.Tʳ?D*A, `6B$BB dr`)B(Ͱ*`/@4Qhp.U=pa( Aa!ڈbX#!H$ ɈQ"K5H1RT UH=r9\F;2G1Q= C7F dt1r=6Ыhڏ>C03l0.B8, c˱" VcϱwE 6wB aAHXLXNH $4 7 Q'"K&b21XH,#/{C7$C2'ITFnR#,4H#dk9, +ȅ3![ b@qS(RjJ4e2AURݨT5ZBRQ4u9̓IKhhitݕNWGw Ljg(gwLӋT071oUX**| J&*/Tު UUT^S}FU3S ԖUPSSg;goT?~YYLOCQ_ cx,!k u5&|v*=9C3J3WRf?qtN (~))4L1e\kXHQG6EYAJ'\'GgSSݧ M=:.kDwn^Loy}/TmG X $ <5qo</QC]@Caaᄑ.ȽJtq]zۯ6iܟ4)Y3sCQ? 0k߬~OCOg#/c/Wװwa>>r><72Y_7ȷOo_C#dz%gA[z|!?:eAAA!h쐭!ΑiP~aa~ 'W?pX15wCsDDDޛg1O9-J5*>.j<74?.fYXXIlK9.*6nl {/]py.,:@LN8A*%w% yg"/6шC\*NH*Mz쑼5y$3,幄'L Lݛ:v m2=:1qB!Mggfvˬen/kY- BTZ(*geWf͉9+̳ې7ᒶKW-X潬j9(xoʿܔĹdff-[n ڴ VE/(ۻCɾUUMfeI?m]Nmq#׹=TR+Gw- 6 U#pDy  :v{vg/jBFS[b[O>zG499?rCd&ˮ/~јѡ򗓿m|x31^VwwO| (hSЧc3-bKGD pHYs  tIME 6 B\<IDAT8˅Kh]es1mA`jh[-E(FEaA!bIȐ*BX"؁4)NURZ!Mhjssm؋^-\gg ]o|Ҭ[346>zd ]#8Oݺt{5uIXN!I=@Vf=v1}e>;fvnvxaHrʪJF`D¹WZ]S%S)WAb |0K=So7D~\~q-˟\aMZ,S'*} F`Nnz674U H3Q5 B.@ $pd!s#~<<+"x M0B\t8K@zB@F&S`cbP-`'{[! eDh;VEX0fK9-0IWfH  0Q){`##xFW<+*x<$9E[-qWW.(I+6aa@.y24x6_-"bbϫp@t~,/;m%h^ uf@Wp~<5j>{-]cK'Xto(hw?G%fIq^D$.Tʳ?D*A, `6B$BB dr`)B(Ͱ*`/@4Qhp.U=pa( Aa!ڈbX#!H$ ɈQ"K5H1RT UH=r9\F;2G1Q= C7F dt1r=6Ыhڏ>C03l0.B8, c˱" VcϱwE 6wB aAHXLXNH $4 7 Q'"K&b21XH,#/{C7$C2'ITFnR#,4H#dk9, +ȅ3![ b@qS(RjJ4e2AURݨT5ZBRQ4u9̓IKhhitݕNWGw Ljg(gwLӋT071oUX**| J&*/Tު UUT^S}FU3S ԖUPSSg;goT?~YYLOCQ_ cx,!k u5&|v*=9C3J3WRf?qtN (~))4L1e\kXHQG6EYAJ'\'GgSSݧ M=:.kDwn^Loy}/TmG X $ <5qo</QC]@Caaᄑ.ȽJtq]zۯ6iܟ4)Y3sCQ? 0k߬~OCOg#/c/Wװwa>>r><72Y_7ȷOo_C#dz%gA[z|!?:eAAA!h쐭!ΑiP~aa~ 'W?pX15wCsDDDޛg1O9-J5*>.j<74?.fYXXIlK9.*6nl {/]py.,:@LN8A*%w% yg"/6шC\*NH*Mz쑼5y$3,幄'L Lݛ:v m2=:1qB!Mggfvˬen/kY- BTZ(*geWf͉9+̳ې7ᒶKW-X潬j9(xoʿܔĹdff-[n ڴ VE/(ۻCɾUUMfeI?m]Nmq#׹=TR+Gw- 6 U#pDy  :v{vg/jBFS[b[O>zG499?rCd&ˮ/~јѡ򗓿m|x31^VwwO| (hSЧc3-bKGD pHYs  tIME 1;VIDAT8ukU?sg4h`G1 RQܸp%Bn"bЍXJ .4V iZ##T;m!4bP~7r>ιbwc;m;oӍAΆ ζZ^/|s{;yR=9(rtVoG1w#_ө{*E&!(LVuoᲵ‘D PG4 :&~*ݳreu: S-,U^E&JY[P!RB ŖޞʖR@_ȐdBfNvHf"2T]R j'B1ddAak/DIJD D2H&L`&L $Ex,6|~_\P $MH`I=@Z||ttvgcЕWTZ'3rje"ܵx9W> mb|byfFRx{w%DZC$wdցHmWnta(M<~;9]C/_;Տ#}o`zSڷ_>:;x컓?yݩ|}~wam-/7=0S5RP"*֯ IENDB`PK!lA[{gtt$xml4h-develop/_static/up-pressed.pngPNG  IHDRasRGBbKGDC pHYs B(xtIME ,ZeIDAT8͓jA*WKk-,By@- و/`cXYh!6jf GrOlXvvfk2!p!GOOԲ &zf 6|M~%`]* ΛM]K ZĆ1Er%ȶcm1`%xml4h-develop/_static/ajax-loader.gifGIF89aU|NU|l!Created with ajaxload.info! ! NETSCAPE2.0,30Ikc:Nf E1º.`q-[9ݦ9 JkH! ,4N!  DqBQT`1 `LE[|ua C%$*! ,62#+AȐ̔V/cNIBap ̳ƨ+Y2d! ,3b%+2V_ ! 1DaFbR]=08,Ȥr9L! ,2r'+JdL &v`\bThYB)@<&,ȤR! ,3 9tڞ0!.BW1  sa50 m)J! ,2 ٜU]qp`a4AF0` @1Α! ,20IeBԜ) q10ʰPaVڥ ub[;PK!lA:>>>$xml4h-develop/_static/searchtools.js/* * searchtools.js_t * ~~~~~~~~~~~~~~~~ * * Sphinx JavaScript utilties for the full-text search. * * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ /** * helper function to return a node containing the * search summary for a given text. keywords is a list * of stemmed words, hlwords is the list of normal, unstemmed * words. the first one is used to find the occurance, the * latter for highlighting it. */ jQuery.makeSearchSummary = function(text, keywords, hlwords) { var textLower = text.toLowerCase(); var start = 0; $.each(keywords, function() { var i = textLower.indexOf(this.toLowerCase()); if (i > -1) start = i; }); start = Math.max(start - 120, 0); var excerpt = ((start > 0) ? '...' : '') + $.trim(text.substr(start, 240)) + ((start + 240 - text.length) ? '...' : ''); var rv = $('
').text(excerpt); $.each(hlwords, function() { rv = rv.highlightText(this, 'highlighted'); }); return rv; } /** * Porter Stemmer */ var Stemmer = function() { var step2list = { ational: 'ate', tional: 'tion', enci: 'ence', anci: 'ance', izer: 'ize', bli: 'ble', alli: 'al', entli: 'ent', eli: 'e', ousli: 'ous', ization: 'ize', ation: 'ate', ator: 'ate', alism: 'al', iveness: 'ive', fulness: 'ful', ousness: 'ous', aliti: 'al', iviti: 'ive', biliti: 'ble', logi: 'log' }; var step3list = { icate: 'ic', ative: '', alize: 'al', iciti: 'ic', ical: 'ic', ful: '', ness: '' }; var c = "[^aeiou]"; // consonant var v = "[aeiouy]"; // vowel var C = c + "[^aeiouy]*"; // consonant sequence var V = v + "[aeiou]*"; // vowel sequence var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 var s_v = "^(" + C + ")?" + v; // vowel in stem this.stemWord = function (w) { var stem; var suffix; var firstch; var origword = w; if (w.length < 3) return w; var re; var re2; var re3; var re4; firstch = w.substr(0,1); if (firstch == "y") w = firstch.toUpperCase() + w.substr(1); // Step 1a re = /^(.+?)(ss|i)es$/; re2 = /^(.+?)([^s])s$/; if (re.test(w)) w = w.replace(re,"$1$2"); else if (re2.test(w)) w = w.replace(re2,"$1$2"); // Step 1b re = /^(.+?)eed$/; re2 = /^(.+?)(ed|ing)$/; if (re.test(w)) { var fp = re.exec(w); re = new RegExp(mgr0); if (re.test(fp[1])) { re = /.$/; w = w.replace(re,""); } } else if (re2.test(w)) { var fp = re2.exec(w); stem = fp[1]; re2 = new RegExp(s_v); if (re2.test(stem)) { w = stem; re2 = /(at|bl|iz)$/; re3 = new RegExp("([^aeiouylsz])\\1$"); re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); if (re2.test(w)) w = w + "e"; else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); } else if (re4.test(w)) w = w + "e"; } } // Step 1c re = /^(.+?)y$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = new RegExp(s_v); if (re.test(stem)) w = stem + "i"; } // Step 2 re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; suffix = fp[2]; re = new RegExp(mgr0); if (re.test(stem)) w = stem + step2list[suffix]; } // Step 3 re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; suffix = fp[2]; re = new RegExp(mgr0); if (re.test(stem)) w = stem + step3list[suffix]; } // Step 4 re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; re2 = /^(.+?)(s|t)(ion)$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = new RegExp(mgr1); if (re.test(stem)) w = stem; } else if (re2.test(w)) { var fp = re2.exec(w); stem = fp[1] + fp[2]; re2 = new RegExp(mgr1); if (re2.test(stem)) w = stem; } // Step 5 re = /^(.+?)e$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = new RegExp(mgr1); re2 = new RegExp(meq1); re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) w = stem; } re = /ll$/; re2 = new RegExp(mgr1); if (re.test(w) && re2.test(w)) { re = /.$/; w = w.replace(re,""); } // and turn initial Y back to y if (firstch == "y") w = firstch.toLowerCase() + w.substr(1); return w; } } /** * Search Module */ var Search = { _index : null, _queued_query : null, _pulse_status : -1, init : function() { var params = $.getQueryParameters(); if (params.q) { var query = params.q[0]; $('input[name="q"]')[0].value = query; this.performSearch(query); } }, loadIndex : function(url) { $.ajax({type: "GET", url: url, data: null, success: null, dataType: "script", cache: true}); }, setIndex : function(index) { var q; this._index = index; if ((q = this._queued_query) !== null) { this._queued_query = null; Search.query(q); } }, hasIndex : function() { return this._index !== null; }, deferQuery : function(query) { this._queued_query = query; }, stopPulse : function() { this._pulse_status = 0; }, startPulse : function() { if (this._pulse_status >= 0) return; function pulse() { Search._pulse_status = (Search._pulse_status + 1) % 4; var dotString = ''; for (var i = 0; i < Search._pulse_status; i++) dotString += '.'; Search.dots.text(dotString); if (Search._pulse_status > -1) window.setTimeout(pulse, 500); }; pulse(); }, /** * perform a search for something */ performSearch : function(query) { // create the required interface elements this.out = $('#search-results'); this.title = $('

' + _('Searching') + '

').appendTo(this.out); this.dots = $('').appendTo(this.title); this.status = $('

').appendTo(this.out); this.output = $('