| /* |
| * Copyright (C) 2007 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.harmony.xml.parsers; |
| |
| import java.io.IOException; |
| import java.net.URL; |
| import java.net.URLConnection; |
| import javax.xml.parsers.DocumentBuilder; |
| import libcore.io.IoUtils; |
| import org.apache.harmony.xml.dom.CDATASectionImpl; |
| import org.apache.harmony.xml.dom.DOMImplementationImpl; |
| import org.apache.harmony.xml.dom.DocumentImpl; |
| import org.apache.harmony.xml.dom.DocumentTypeImpl; |
| import org.apache.harmony.xml.dom.TextImpl; |
| import org.kxml2.io.KXmlParser; |
| import org.w3c.dom.Attr; |
| import org.w3c.dom.DOMImplementation; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.DocumentType; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.Text; |
| import org.xml.sax.EntityResolver; |
| import org.xml.sax.ErrorHandler; |
| import org.xml.sax.InputSource; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.SAXParseException; |
| import org.xml.sax.helpers.LocatorImpl; |
| import org.xmlpull.v1.XmlPullParser; |
| import org.xmlpull.v1.XmlPullParserException; |
| |
| /** |
| * Builds a DOM using KXmlParser. |
| */ |
| class DocumentBuilderImpl extends DocumentBuilder { |
| |
| private static DOMImplementationImpl dom = DOMImplementationImpl.getInstance(); |
| |
| private boolean coalescing; |
| private EntityResolver entityResolver; |
| private ErrorHandler errorHandler; |
| private boolean ignoreComments; |
| private boolean ignoreElementContentWhitespace; |
| private boolean namespaceAware; |
| // adding a new field? don't forget to update reset(). |
| |
| @Override public void reset() { |
| coalescing = false; |
| entityResolver = null; |
| errorHandler = null; |
| ignoreComments = false; |
| ignoreElementContentWhitespace = false; |
| namespaceAware = false; |
| } |
| |
| @Override |
| public DOMImplementation getDOMImplementation() { |
| return dom; |
| } |
| |
| @Override |
| public boolean isNamespaceAware() { |
| return namespaceAware; |
| } |
| |
| @Override |
| public boolean isValidating() { |
| return false; |
| } |
| |
| @Override |
| public Document newDocument() { |
| return dom.createDocument(null, null, null); |
| } |
| |
| @Override |
| public Document parse(InputSource source) throws SAXException, IOException { |
| if (source == null) { |
| throw new IllegalArgumentException("source == null"); |
| } |
| |
| String namespaceURI = null; |
| String qualifiedName = null; |
| DocumentType doctype = null; |
| String inputEncoding = source.getEncoding(); |
| String systemId = source.getSystemId(); |
| DocumentImpl document = new DocumentImpl( |
| dom, namespaceURI, qualifiedName, doctype, inputEncoding); |
| document.setDocumentURI(systemId); |
| |
| KXmlParser parser = new KXmlParser(); |
| try { |
| parser.keepNamespaceAttributes(); |
| parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, namespaceAware); |
| |
| if (source.getByteStream() != null) { |
| parser.setInput(source.getByteStream(), inputEncoding); |
| } else if (source.getCharacterStream() != null) { |
| parser.setInput(source.getCharacterStream()); |
| } else if (systemId != null) { |
| URL url = new URL(systemId); |
| URLConnection urlConnection = url.openConnection(); |
| urlConnection.connect(); |
| // TODO: if null, extract the inputEncoding from the Content-Type header? |
| parser.setInput(urlConnection.getInputStream(), inputEncoding); |
| } else { |
| throw new SAXParseException("InputSource needs a stream, reader or URI", null); |
| } |
| |
| if (parser.nextToken() == XmlPullParser.END_DOCUMENT) { |
| throw new SAXParseException("Unexpected end of document", null); |
| } |
| |
| parse(parser, document, document, XmlPullParser.END_DOCUMENT); |
| |
| parser.require(XmlPullParser.END_DOCUMENT, null, null); |
| } catch (XmlPullParserException ex) { |
| if (ex.getDetail() instanceof IOException) { |
| throw (IOException) ex.getDetail(); |
| } |
| if (ex.getDetail() instanceof RuntimeException) { |
| throw (RuntimeException) ex.getDetail(); |
| } |
| |
| LocatorImpl locator = new LocatorImpl(); |
| |
| locator.setPublicId(source.getPublicId()); |
| locator.setSystemId(systemId); |
| locator.setLineNumber(ex.getLineNumber()); |
| locator.setColumnNumber(ex.getColumnNumber()); |
| |
| SAXParseException newEx = new SAXParseException(ex.getMessage(), locator); |
| |
| if (errorHandler != null) { |
| errorHandler.error(newEx); |
| } |
| |
| throw newEx; |
| } finally { |
| IoUtils.closeQuietly(parser); |
| } |
| |
| return document; |
| } |
| |
| /** |
| * Implements the whole parsing of the XML document. The XML pull parser is |
| * actually more of a tokenizer, and we are doing a classical recursive |
| * descent parsing (the method invokes itself for XML elements). Our |
| * approach to parsing does accept some illegal documents (more than one |
| * root element, for example). The assumption is that the DOM implementation |
| * throws the proper exceptions in these cases. |
| * |
| * @param parser The XML pull parser we're reading from. |
| * @param document The document we're building. |
| * @param node The node we're currently on (initially the document itself). |
| * @param endToken The token that will end this recursive call. Either |
| * XmlPullParser.END_DOCUMENT or XmlPullParser.END_TAG. |
| * |
| * @throws XmlPullParserException If a parsing error occurs. |
| * @throws IOException If a general IO error occurs. |
| */ |
| private void parse(KXmlParser parser, DocumentImpl document, Node node, |
| int endToken) throws XmlPullParserException, IOException { |
| |
| int token = parser.getEventType(); |
| |
| /* |
| * The main parsing loop. The precondition is that we are already on the |
| * token to be processed. This holds for each iteration of the loop, so |
| * the inner statements have to ensure that (in particular the recursive |
| * call). |
| */ |
| while (token != endToken && token != XmlPullParser.END_DOCUMENT) { |
| if (token == XmlPullParser.PROCESSING_INSTRUCTION) { |
| /* |
| * Found a processing instructions. We need to split the token |
| * text at the first whitespace character. |
| */ |
| String text = parser.getText(); |
| |
| int dot = text.indexOf(' '); |
| |
| String target = (dot != -1 ? text.substring(0, dot) : text); |
| String data = (dot != -1 ? text.substring(dot + 1) : ""); |
| |
| node.appendChild(document.createProcessingInstruction(target, |
| data)); |
| } else if (token == XmlPullParser.DOCDECL) { |
| String name = parser.getRootElementName(); |
| String publicId = parser.getPublicId(); |
| String systemId = parser.getSystemId(); |
| document.appendChild(new DocumentTypeImpl(document, name, publicId, systemId)); |
| |
| } else if (token == XmlPullParser.COMMENT) { |
| /* |
| * Found a comment. We simply take the token text, but we only |
| * create a node if the client wants to see comments at all. |
| */ |
| if (!ignoreComments) { |
| node.appendChild(document.createComment(parser.getText())); |
| } |
| } else if (token == XmlPullParser.IGNORABLE_WHITESPACE) { |
| /* |
| * Found some ignorable whitespace. We only add it if the client |
| * wants to see whitespace. Whitespace before and after the |
| * document element is always ignored. |
| */ |
| if (!ignoreElementContentWhitespace && document != node) { |
| appendText(document, node, token, parser.getText()); |
| } |
| } else if (token == XmlPullParser.TEXT || token == XmlPullParser.CDSECT) { |
| /* |
| * Found a piece of text (possibly encoded as a CDATA section). |
| * That's the easiest case. We simply take it and create a new text node, |
| * or merge with an adjacent text node. |
| */ |
| appendText(document, node, token, parser.getText()); |
| } else if (token == XmlPullParser.ENTITY_REF) { |
| /* |
| * Found an entity reference. If an entity resolver is |
| * installed, we replace it by text (if possible). Otherwise we |
| * add an entity reference node. |
| */ |
| String entity = parser.getName(); |
| |
| if (entityResolver != null) { |
| // TODO Implement this... |
| } |
| |
| String resolved = resolvePredefinedOrCharacterEntity(entity); |
| if (resolved != null) { |
| appendText(document, node, token, resolved); |
| } else { |
| node.appendChild(document.createEntityReference(entity)); |
| } |
| } else if (token == XmlPullParser.START_TAG) { |
| /* |
| * Found an element start tag. We create an element node with |
| * the proper info and attributes. We then invoke parse() |
| * recursively to handle the next level of nesting. When we |
| * return from this call, we check that we are on the proper |
| * element end tag. The whole handling differs somewhat |
| * depending on whether the parser is namespace-aware or not. |
| */ |
| if (namespaceAware) { |
| // Collect info for element node |
| String namespace = parser.getNamespace(); |
| String name = parser.getName(); |
| String prefix = parser.getPrefix(); |
| |
| if ("".equals(namespace)) { |
| namespace = null; |
| } |
| |
| // Create element node and wire it correctly |
| Element element = document.createElementNS(namespace, name); |
| element.setPrefix(prefix); |
| node.appendChild(element); |
| |
| for (int i = 0; i < parser.getAttributeCount(); i++) { |
| // Collect info for a single attribute node |
| String attrNamespace = parser.getAttributeNamespace(i); |
| String attrPrefix = parser.getAttributePrefix(i); |
| String attrName = parser.getAttributeName(i); |
| String attrValue = parser.getAttributeValue(i); |
| |
| if ("".equals(attrNamespace)) { |
| attrNamespace = null; |
| } |
| |
| // Create attribute node and wire it correctly |
| Attr attr = document.createAttributeNS(attrNamespace, attrName); |
| attr.setPrefix(attrPrefix); |
| attr.setValue(attrValue); |
| element.setAttributeNodeNS(attr); |
| } |
| |
| // Recursive descent |
| token = parser.nextToken(); |
| parse(parser, document, element, XmlPullParser.END_TAG); |
| |
| // Expect the element's end tag here |
| parser.require(XmlPullParser.END_TAG, namespace, name); |
| |
| } else { |
| // Collect info for element node |
| String name = parser.getName(); |
| |
| // Create element node and wire it correctly |
| Element element = document.createElement(name); |
| node.appendChild(element); |
| |
| for (int i = 0; i < parser.getAttributeCount(); i++) { |
| // Collect info for a single attribute node |
| String attrName = parser.getAttributeName(i); |
| String attrValue = parser.getAttributeValue(i); |
| |
| // Create attribute node and wire it correctly |
| Attr attr = document.createAttribute(attrName); |
| attr.setValue(attrValue); |
| element.setAttributeNode(attr); |
| } |
| |
| // Recursive descent |
| token = parser.nextToken(); |
| parse(parser, document, element, XmlPullParser.END_TAG); |
| |
| // Expect the element's end tag here |
| parser.require(XmlPullParser.END_TAG, "", name); |
| } |
| } |
| |
| token = parser.nextToken(); |
| } |
| } |
| |
| /** |
| * @param token the XML pull parser token type, such as XmlPullParser.CDSECT |
| * or XmlPullParser.ENTITY_REF. |
| */ |
| private void appendText(DocumentImpl document, Node parent, int token, String text) { |
| // Ignore empty runs. |
| if (text.isEmpty()) { |
| return; |
| } |
| // Merge with any previous text node if possible. |
| if (coalescing || token != XmlPullParser.CDSECT) { |
| Node lastChild = parent.getLastChild(); |
| if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) { |
| Text textNode = (Text) lastChild; |
| textNode.appendData(text); |
| return; |
| } |
| } |
| // Okay, we really do need a new text node |
| parent.appendChild(token == XmlPullParser.CDSECT |
| ? new CDATASectionImpl(document, text) |
| : new TextImpl(document, text)); |
| } |
| |
| @Override |
| public void setEntityResolver(EntityResolver resolver) { |
| entityResolver = resolver; |
| } |
| |
| @Override |
| public void setErrorHandler(ErrorHandler handler) { |
| errorHandler = handler; |
| } |
| |
| /** |
| * Controls whether this DocumentBuilder ignores comments. |
| */ |
| public void setIgnoreComments(boolean value) { |
| ignoreComments = value; |
| } |
| |
| public void setCoalescing(boolean value) { |
| coalescing = value; |
| } |
| |
| /** |
| * Controls whether this DocumentBuilder ignores element content whitespace. |
| */ |
| public void setIgnoreElementContentWhitespace(boolean value) { |
| ignoreElementContentWhitespace = value; |
| } |
| |
| /** |
| * Controls whether this DocumentBuilder is namespace-aware. |
| */ |
| public void setNamespaceAware(boolean value) { |
| namespaceAware = value; |
| } |
| |
| /** |
| * Returns the replacement text or null if {@code entity} isn't predefined. |
| */ |
| private String resolvePredefinedOrCharacterEntity(String entityName) { |
| // Character references, section 4.1 of the XML specification. |
| if (entityName.startsWith("#x")) { |
| return resolveCharacterReference(entityName.substring(2), 16); |
| } else if (entityName.startsWith("#")) { |
| return resolveCharacterReference(entityName.substring(1), 10); |
| } |
| // Predefined entities, section 4.6 of the XML specification. |
| if ("lt".equals(entityName)) { |
| return "<"; |
| } else if ("gt".equals(entityName)) { |
| return ">"; |
| } else if ("amp".equals(entityName)) { |
| return "&"; |
| } else if ("apos".equals(entityName)) { |
| return "'"; |
| } else if ("quot".equals(entityName)) { |
| return "\""; |
| } else { |
| return null; |
| } |
| } |
| |
| private String resolveCharacterReference(String value, int base) { |
| try { |
| int ch = Integer.parseInt(value, base); |
| if (ch < 0 || ch > Character.MAX_VALUE) { |
| return null; |
| } |
| return String.valueOf((char) ch); |
| } catch (NumberFormatException ex) { |
| return null; |
| } |
| } |
| } |