XML DOM Parsing — DOM Tree, Nodes, Traversal, Modification, Java/JavaScript Examples
The Document Object Model (DOM) represents an XML document as a tree of nodes in memory, enabling full read/write access. This guide covers DOM parsing fundamentals, traversal patterns, modification techniques, and performance considerations in Java and JavaScript.
What You’ll Learn
You’ll understand the DOM tree structure and node types, traverse documents with recursive and iterative patterns, modify XML (add/remove/update nodes), parse and manipulate XML in Java and JavaScript, and optimize DOM performance for large documents.
Learning Path
flowchart LR
A[XML Basics] --> B[DOM Parsing<br/>You are here]
B --> C[SAX Parsing]
C --> D[XPath Integration]
style B fill:#f90,color:#fff
DOM Tree Structure
The DOM represents XML as an in-memory tree:
<bookstore>
<book category="fiction">
<title lang="en">The DOM Guide</title>
<author>Alice Smith</author>
<price>29.99</price>
</book>
</bookstore>This becomes a tree with these node types:
| Node Type | Example |
|---|---|
| Document | Root of the tree |
| Element | <book>, <title> |
| Text | “The DOM Guide” |
| Attribute | category="fiction", lang="en" |
| Comment | <!-- comment --> |
| Processing Instruction | <?xml version="1.0"?> |
Java DOM Parsing
Loading and Parsing
import javax.xml.parsers.*;
import org.w3c.dom.*;
import java.io.File;
public class DomParserExample {
public static void main(String[] args) throws Exception {
// Create a DocumentBuilder
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
// Parse the XML file
Document document = builder.parse(new File("bookstore.xml"));
// Normalize the document (merge adjacent text nodes)
document.getDocumentElement().normalize();
// Get root element
Element root = document.getDocumentElement();
System.out.println("Root: " + root.getNodeName());
}
}Expected output:
Root: bookstoreTraversing the DOM Tree
public class DomTraversal {
public static void traverse(Node node, int depth) {
// Print indentation
for (int i = 0; i < depth; i++) {
System.out.print(" ");
}
// Print node information
System.out.print(getNodeTypeName(node.getNodeType())
+ ": " + node.getNodeName());
if (node.getNodeType() == Node.TEXT_NODE) {
String text = node.getTextContent().trim();
if (!text.isEmpty()) {
System.out.print(" = \"" + text + "\"");
}
}
System.out.println();
// Recursively traverse children
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
traverse(children.item(i), depth + 1);
}
}
private static String getNodeTypeName(short type) {
return switch (type) {
case Node.DOCUMENT_NODE -> "DOCUMENT";
case Node.ELEMENT_NODE -> "ELEMENT";
case Node.TEXT_NODE -> "TEXT";
case Node.ATTRIBUTE_NODE -> "ATTR";
case Node.COMMENT_NODE -> "COMMENT";
default -> "UNKNOWN";
};
}
public static void main(String[] args) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new File("bookstore.xml"));
traverse(document, 0);
}
}Expected output:
DOCUMENT: #document
ELEMENT: bookstore
ELEMENT: book
TEXT: #text = "
"
ELEMENT: title
TEXT: #text = "The DOM Guide"
TEXT: #text = "
"
ELEMENT: author
TEXT: #text = "Alice Smith"Finding Specific Elements
public class DomSearch {
public static void main(String[] args) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new File("bookstore.xml"));
// Get all <title> elements
NodeList titles = document.getElementsByTagName("title");
System.out.println("Found " + titles.getLength() + " titles:");
for (int i = 0; i < titles.getLength(); i++) {
Element title = (Element) titles.item(i);
System.out.println(" Title: " + title.getTextContent());
System.out.println(" Language: " + title.getAttribute("lang"));
}
// Get specific book by attribute
NodeList books = document.getElementsByTagName("book");
for (int i = 0; i < books.getLength(); i++) {
Element book = (Element) books.item(i);
String category = book.getAttribute("category");
if ("fiction".equals(category)) {
String title = book.getElementsByTagName("title")
.item(0).getTextContent();
System.out.println("Fiction book: " + title);
}
}
}
}Expected output:
Found 1 titles:
Title: The DOM Guide
Language: en
Fiction book: The DOM GuideModifying the DOM
public class DomModification {
public static void main(String[] args) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new File("bookstore.xml"));
Element root = document.getDocumentElement();
// Add a new book
Element newBook = document.createElement("book");
newBook.setAttribute("category", "non-fiction");
Element newTitle = document.createElement("title");
newTitle.setAttribute("lang", "en");
newTitle.setTextContent("XML for Beginners");
newBook.appendChild(newTitle);
Element newAuthor = document.createElement("author");
newAuthor.setTextContent("Bob Jones");
newBook.appendChild(newAuthor);
Element newPrice = document.createElement("price");
newPrice.setTextContent("39.99");
newBook.appendChild(newPrice);
root.appendChild(newBook);
// Update existing book price
NodeList books = document.getElementsByTagName("book");
Element firstBook = (Element) books.item(0);
Element price = (Element) firstBook.getElementsByTagName("price").item(0);
price.setTextContent("24.99");
// Remove the author element
firstBook.removeChild(
firstBook.getElementsByTagName("author").item(0));
// Write to file
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
DOMSource source = new DOMSource(document);
StreamResult result = new StreamResult(new File("bookstore_updated.xml"));
transformer.transform(source, result);
System.out.println("XML file updated successfully");
}
}JavaScript DOM Parsing (Browser)
// Fetch and parse XML in the browser
async function parseXML(url) {
const response = await fetch(url);
const xmlText = await response.text();
const parser = new DOMParser();
const xmlDoc = parser.parseFromString(xmlText, 'text/xml');
// Check for parse errors
const parseError = xmlDoc.querySelector('parsererror');
if (parseError) {
console.error('Parse error:', parseError.textContent);
return null;
}
return xmlDoc;
}
// Traverse DOM
function traverseXML(node, depth = 0) {
const indent = ' '.repeat(depth);
console.log(`${indent}${node.nodeName}`);
if (node.attributes) {
for (const attr of node.attributes) {
console.log(`${indent} @${attr.name}="${attr.value}"`);
}
}
for (const child of node.childNodes) {
if (child.nodeType === Node.TEXT_NODE) {
const text = child.textContent.trim();
if (text) {
console.log(`${indent} "${text}"`);
}
} else {
traverseXML(child, depth + 1);
}
}
}
// Search and modify
function findBooksByCategory(xmlDoc, category) {
const books = xmlDoc.getElementsByTagName('book');
return Array.from(books).filter(book =>
book.getAttribute('category') === category
);
}
// Usage
const xmlDoc = await parseXML('books.xml');
traverseXML(xmlDoc.documentElement);
const fictionBooks = findBooksByCategory(xmlDoc, 'fiction');
console.log(`Found ${fictionBooks.length} fiction books`);JavaScript DOM Modification
function addBook(xmlDoc, title, author, price, category) {
const root = xmlDoc.documentElement;
const book = xmlDoc.createElement('book');
book.setAttribute('category', category);
const titleEl = xmlDoc.createElement('title');
titleEl.setAttribute('lang', 'en');
titleEl.textContent = title;
book.appendChild(titleEl);
const authorEl = xmlDoc.createElement('author');
authorEl.textContent = author;
book.appendChild(authorEl);
const priceEl = xmlDoc.createElement('price');
priceEl.textContent = price;
book.appendChild(priceEl);
root.appendChild(book);
// Serialize back to string
const serializer = new XMLSerializer();
return serializer.serializeToString(xmlDoc);
}Performance Optimization
DOM vs SAX Decision
| Factor | DOM | SAX |
|---|---|---|
| Memory | Entire tree in memory | Stream-based, low memory |
| Speed | Slower for large files | Fast parsing |
| Modification | Full read/write | Read-only |
| Random access | Yes | No |
| Best for | < 10MB files, need modifications | > 10MB files, read-only |
Memory Estimation
public class DomMemoryEstimate {
public static long estimateMemoryUsage(long fileSizeBytes) {
// Rough estimate: DOM uses 5-10x the file size in memory
return fileSizeBytes * 7; // 7x multiplier
}
public static boolean shouldUseDom(long fileSizeBytes) {
long maxMemory = Runtime.getRuntime().maxMemory();
long estimatedUsage = estimateMemoryUsage(fileSizeBytes);
// Don't use DOM if it would use more than 50% of max heap
return estimatedUsage < maxMemory / 2;
}
}Common DOM Parsing Mistakes
1. Not Normalizing the Document
Without document.getDocumentElement().normalize(), adjacent text nodes from whitespace formatting create unexpected child nodes. Always normalize after parsing.
2. Assuming getElementsByTagName Returns Direct Children
getElementsByTagName is recursive — it searches the entire subtree. Use a direct traversal of getChildNodes() and check node type for direct children.
3. Forgetting to Handle Namespaces
Without namespace awareness, elements with prefixes (<xsd:schema>) may not match queries. Enable namespace awareness:
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);4. Modifying NodeList During Iteration
NodeList is live — modifications during iteration change the list and cause skipped or repeated elements. Collect items first, then modify:
// BAD
NodeList list = doc.getElementsByTagName("book");
for (int i = 0; i < list.getLength(); i++) {
Element book = (Element) list.item(i);
book.getParentNode().removeChild(book); // Skips elements!
}
// GOOD
List<Element> toRemove = new ArrayList<>();
for (int i = 0; i < list.getLength(); i++) {
toRemove.add((Element) list.item(i));
}
for (Element book : toRemove) {
book.getParentNode().removeChild(book);
}5. Not Handling Parse Errors
Malformed XML throws SAXException. Always wrap parsing in try-catch:
try {
Document doc = builder.parse(file);
} catch (SAXException e) {
System.err.println("XML parse error: " + e.getMessage());
} catch (IOException e) {
System.err.println("File error: " + e.getMessage());
}6. Memory Leaks with Large Documents
DOM holds the entire document in memory. For files > 10MB, use SAX or StAX. Always null document references after use to allow garbage collection.
7. Ignoring Encoding
XML declaration encoding must match the actual file encoding. <?xml version="1.0" encoding="UTF-8"?> — if the file is actually ISO-8859-1, special characters will be corrupted.
Practice Questions
1. What is the root node of a DOM document?
The Document object is the root. It contains exactly one Element child (the document element, e.g., <bookstore>), plus optional comments and processing instructions.
2. How does getElementsByTagName differ from getChildNodes?
getElementsByTagName is recursive — it returns all descendants matching the tag name. getChildNodes returns only direct children of the current node.
3. Why should you normalize the document after parsing? Normalization merges adjacent text nodes and removes empty text nodes. Without it, whitespace between elements creates unexpected text nodes that complicate traversal.
4. When should you choose DOM over SAX? Choose DOM when you need to: (1) modify the document, (2) access nodes in any order, (3) perform complex queries, (4) work with documents under 10MB.
5. Challenge: You need to parse a 500MB XML file containing 10 years of transaction data, find all transactions over $10,000, and generate a summary report. DOM would use 3.5GB+ of memory. Design a better approach. Answer: Use SAX or StAX streaming parser. Process each transaction as it’s read, filter > $10,000, accumulate in-memory (just the summary, not the full document). Output report when the stream ends. Memory usage: < 100MB.
Mini Project: XML Document Editor
Create a Java class that reads, modifies, and saves XML:
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.*;
import java.io.File;
public class XmlEditor {
private Document document;
private final File file;
public XmlEditor(File file) throws Exception {
this.file = file;
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
this.document = builder.parse(file);
this.document.getDocumentElement().normalize();
}
public void listBooks() {
NodeList books = document.getElementsByTagName("book");
for (int i = 0; i < books.getLength(); i++) {
Element book = (Element) books.item(i);
String category = book.getAttribute("category");
String title = book.getElementsByTagName("title")
.item(0).getTextContent();
String author = book.getElementsByTagName("author")
.item(0).getTextContent();
String price = book.getElementsByTagName("price")
.item(0).getTextContent();
System.out.printf("%d. [%s] %s by %s — $%s%n",
i + 1, category, title, author, price);
}
}
public void addBook(String title, String author,
String price, String category) {
Element root = document.getDocumentElement();
Element book = document.createElement("book");
book.setAttribute("category", category);
Element titleEl = document.createElement("title");
titleEl.setAttribute("lang", "en");
titleEl.setTextContent(title);
book.appendChild(titleEl);
Element authorEl = document.createElement("author");
authorEl.setTextContent(author);
book.appendChild(authorEl);
Element priceEl = document.createElement("price");
priceEl.setTextContent(price);
book.appendChild(priceEl);
root.appendChild(book);
}
public void updatePrice(String titleMatch, String newPrice)
throws Exception {
NodeList titles = document.getElementsByTagName("title");
for (int i = 0; i < titles.getLength(); i++) {
Element titleEl = (Element) titles.item(i);
if (titleEl.getTextContent().contains(titleMatch)) {
Element book = (Element) titleEl.getParentNode();
Element price = (Element) book
.getElementsByTagName("price").item(0);
price.setTextContent(newPrice);
System.out.println("Updated price for: "
+ titleEl.getTextContent());
}
}
}
public void save() throws Exception {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(
"{http://xml.apache.org/xslt}indent-amount", "2");
DOMSource source = new DOMSource(document);
StreamResult result = new StreamResult(file);
transformer.transform(source, result);
System.out.println("Saved to: " + file.getAbsolutePath());
}
public static void main(String[] args) throws Exception {
XmlEditor editor = new XmlEditor(new File("books.xml"));
System.out.println("=== Current Books ===");
editor.listBooks();
System.out.println("\n=== Adding New Book ===");
editor.addBook("XML Patterns", "Carol Davis", "34.99", "non-fiction");
System.out.println("\n=== Updating Price ===");
editor.updatePrice("DOM Guide", "27.99");
editor.save();
}
}FAQ
What’s Next
Built by the developers of Doda Browser, DodaZIP, and Durga Antivirus Pro. Updated 2026-06-20.
Built by the developers of DodaTech
Doda Browser, DodaZIP & Durga Antivirus Pro