Class MXParser

java.lang.Object
org.codehaus.plexus.util.xml.pull.MXParser
All Implemented Interfaces:
XmlPullParser

public class MXParser extends Object implements XmlPullParser
Absolutely minimal implementation of XMLPULL V1 API. Encoding handling done with XmlReader
See Also:
  • Field Details

    • XML_URI

      private static final String XML_URI
      See Also:
    • XMLNS_URI

      private static final String XMLNS_URI
      See Also:
    • FEATURE_XML_ROUNDTRIP

      private static final String FEATURE_XML_ROUNDTRIP
      See Also:
    • FEATURE_NAMES_INTERNED

      private static final String FEATURE_NAMES_INTERNED
      See Also:
    • PROPERTY_XMLDECL_VERSION

      private static final String PROPERTY_XMLDECL_VERSION
      See Also:
    • PROPERTY_XMLDECL_STANDALONE

      private static final String PROPERTY_XMLDECL_STANDALONE
      See Also:
    • PROPERTY_XMLDECL_CONTENT

      private static final String PROPERTY_XMLDECL_CONTENT
      See Also:
    • PROPERTY_LOCATION

      private static final String PROPERTY_LOCATION
      See Also:
    • allStringsInterned

      private boolean allStringsInterned
      Implementation notice: the is instance variable that controls if newString() is interning.

      NOTE: newStringIntern always returns interned strings and newString MAY return interned String depending on this variable.

      NOTE: by default in this minimal implementation it is false!

    • TRACE_SIZING

      private static final boolean TRACE_SIZING
      See Also:
    • processNamespaces

      private boolean processNamespaces
    • roundtripSupported

      private boolean roundtripSupported
    • location

      private String location
    • lineNumber

      private int lineNumber
    • columnNumber

      private int columnNumber
    • seenRoot

      private boolean seenRoot
    • reachedEnd

      private boolean reachedEnd
    • eventType

      private int eventType
    • emptyElementTag

      private boolean emptyElementTag
    • depth

      private int depth
    • elRawName

      private char[][] elRawName
    • elRawNameEnd

      private int[] elRawNameEnd
    • elRawNameLine

      private int[] elRawNameLine
    • elName

      private String[] elName
    • elPrefix

      private String[] elPrefix
    • elUri

      private String[] elUri
    • elNamespaceCount

      private int[] elNamespaceCount
    • fileEncoding

      private String fileEncoding
    • attributeCount

      private int attributeCount
    • attributeName

      private String[] attributeName
    • attributeNameHash

      private int[] attributeNameHash
    • attributePrefix

      private String[] attributePrefix
    • attributeUri

      private String[] attributeUri
    • attributeValue

      private String[] attributeValue
    • namespaceEnd

      private int namespaceEnd
    • namespacePrefix

      private String[] namespacePrefix
    • namespacePrefixHash

      private int[] namespacePrefixHash
    • namespaceUri

      private String[] namespaceUri
    • entityEnd

      private int entityEnd
    • entityName

      private String[] entityName
    • entityNameBuf

      private char[][] entityNameBuf
    • entityReplacement

      private String[] entityReplacement
    • entityReplacementBuf

      private char[][] entityReplacementBuf
    • entityNameHash

      private int[] entityNameHash
    • replacementMapTemplate

      private final EntityReplacementMap replacementMapTemplate
    • READ_CHUNK_SIZE

      private static final int READ_CHUNK_SIZE
      See Also:
    • reader

      private Reader reader
    • inputEncoding

      private String inputEncoding
    • bufLoadFactor

      private int bufLoadFactor
    • bufferLoadFactor

      private float bufferLoadFactor
    • buf

      private char[] buf
    • bufSoftLimit

      private int bufSoftLimit
    • preventBufferCompaction

      private boolean preventBufferCompaction
    • bufAbsoluteStart

      private int bufAbsoluteStart
    • bufStart

      private int bufStart
    • bufEnd

      private int bufEnd
    • pos

      private int pos
    • posStart

      private int posStart
    • posEnd

      private int posEnd
    • pc

      private char[] pc
    • pcStart

      private int pcStart
    • pcEnd

      private int pcEnd
    • usePC

      private boolean usePC
    • seenStartTag

      private boolean seenStartTag
    • seenEndTag

      private boolean seenEndTag
    • pastEndTag

      private boolean pastEndTag
    • seenAmpersand

      private boolean seenAmpersand
    • seenMarkup

      private boolean seenMarkup
    • seenDocdecl

      private boolean seenDocdecl
    • tokenize

      private boolean tokenize
    • text

      private String text
    • entityRefName

      private String entityRefName
    • xmlDeclVersion

      private String xmlDeclVersion
    • xmlDeclStandalone

      private Boolean xmlDeclStandalone
    • xmlDeclContent

      private String xmlDeclContent
    • BUF_NOT_RESOLVED

      private static final char[] BUF_NOT_RESOLVED
    • BUF_LT

      private static final char[] BUF_LT
    • BUF_AMP

      private static final char[] BUF_AMP
    • BUF_GT

      private static final char[] BUF_GT
    • BUF_APO

      private static final char[] BUF_APO
    • BUF_QUOT

      private static final char[] BUF_QUOT
    • resolvedEntityRefCharBuf

      private char[] resolvedEntityRefCharBuf
    • VERSION

      private static final char[] VERSION
    • NCODING

      private static final char[] NCODING
    • TANDALONE

      private static final char[] TANDALONE
    • YES

      private static final char[] YES
    • NO

      private static final char[] NO
    • LOOKUP_MAX

      private static final int LOOKUP_MAX
      See Also:
    • LOOKUP_MAX_CHAR

      private static final char LOOKUP_MAX_CHAR
      See Also:
    • lookupNameStartChar

      private static final boolean[] lookupNameStartChar
    • lookupNameChar

      private static final boolean[] lookupNameChar
  • Constructor Details

  • Method Details

    • resetStringCache

      private void resetStringCache()
    • newString

      private String newString(char[] cbuf, int off, int len)
    • newStringIntern

      private String newStringIntern(char[] cbuf, int off, int len)
    • ensureElementsCapacity

      private void ensureElementsCapacity()
      Make sure that we have enough space to keep element stack if passed size. It will always create one additional slot then current depth
    • ensureAttributesCapacity

      private void ensureAttributesCapacity(int size)
    • ensureNamespacesCapacity

      private void ensureNamespacesCapacity(int size)
    • fastHash

      private static final int fastHash(char[] ch, int off, int len)
    • ensureEntityCapacity

      private void ensureEntityCapacity()
    • reset

      private void reset()
    • setupFromTemplate

      public void setupFromTemplate()
    • setFeature

      public void setFeature(String name, boolean state) throws XmlPullParserException
      Method setFeature
      Specified by:
      setFeature in interface XmlPullParser
      Parameters:
      name - a String
      state - a boolean
      Throws:
      XmlPullParserException - issue
    • getFeature

      public boolean getFeature(String name)
      Unknown properties are always returned as false
      Specified by:
      getFeature in interface XmlPullParser
      Parameters:
      name - The name of feature to be retrieved.
      Returns:
      The value of the feature.
    • setProperty

      public void setProperty(String name, Object value) throws XmlPullParserException
      Description copied from interface: XmlPullParser
      Set the value of a property. The property name is any fully-qualified URI.
      Specified by:
      setProperty in interface XmlPullParser
      Parameters:
      name - property name
      value - property value
      Throws:
      XmlPullParserException - If the property is not supported or can not be set
      XmlPullParserException - parsing issue
    • getProperty

      public Object getProperty(String name)
      Description copied from interface: XmlPullParser
      Look up the value of a property. The property name is any fully-qualified URI.

      NOTE: unknown properties are always returned as null.

      Specified by:
      getProperty in interface XmlPullParser
      Parameters:
      name - The name of property to be retrieved.
      Returns:
      The value of named property.
    • setInput

      public void setInput(Reader in) throws XmlPullParserException
      Description copied from interface: XmlPullParser
      Set the input source for parser to the given reader and resets the parser. The event type is set to the initial value START_DOCUMENT. Setting the reader to null will just stop parsing and reset parser state, allowing the parser to free internal resources such as parsing buffers.
      Specified by:
      setInput in interface XmlPullParser
      Parameters:
      in - the Reader
      Throws:
      XmlPullParserException - parsing issue
    • setInput

      public void setInput(InputStream inputStream, String inputEncoding) throws XmlPullParserException
      Description copied from interface: XmlPullParser
      Sets the input stream the parser is going to process. This call resets the parser state and sets the event type to the initial value START_DOCUMENT.

      NOTE: If an input encoding string is passed, it MUST be used. Otherwise, if inputEncoding is null, the parser SHOULD try to determine input encoding following XML 1.0 specification (see below). If encoding detection is supported then following feature http://xmlpull.org/v1/doc/features.html#detect-encoding MUST be true and otherwise it must be false

      Specified by:
      setInput in interface XmlPullParser
      Parameters:
      inputStream - contains a raw byte input stream of possibly unknown encoding (when inputEncoding is null).
      inputEncoding - if not null it MUST be used as encoding for inputStream
      Throws:
      XmlPullParserException - parsing issue
    • getInputEncoding

      public String getInputEncoding()
      Specified by:
      getInputEncoding in interface XmlPullParser
      Returns:
      the input encoding if known, null otherwise. If setInput(InputStream, inputEncoding) was called with an inputEncoding value other than null, this value must be returned from this method. Otherwise, if inputEncoding is null and the parser supports the encoding detection feature (http://xmlpull.org/v1/doc/features.html#detect-encoding), it must return the detected encoding. If setInput(Reader) was called, null is returned. After first call to next if XML declaration was present this method will return encoding declared.
    • defineEntityReplacementText

      public void defineEntityReplacementText(String entityName, String replacementText) throws XmlPullParserException
      Description copied from interface: XmlPullParser
      Set new value for entity replacement text as defined in XML 1.0 Section 4.5 Construction of Internal Entity Replacement Text. If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this function will result in an exception -- when processing of DOCDECL is enabled, there is no need to the entity replacement text manually.

      The motivation for this function is to allow very small implementations of XMLPULL that will work in J2ME environments. Though these implementations may not be able to process the document type declaration, they still can work with known DTDs by using this function.

      Please notes: The given value is used literally as replacement text and it corresponds to declaring entity in DTD that has all special characters escaped: left angle bracket is replaced with <, ampersand with & and so on.

      Note: The given value is the literal replacement text and must not contain any other entity reference (if it contains any entity reference there will be no further replacement).

      Note: The list of pre-defined entity names will always contain standard XML entities such as amp (&), lt (<), gt (>), quot ("), and apos ('). Those cannot be redefined by this method!

      Specified by:
      defineEntityReplacementText in interface XmlPullParser
      Parameters:
      entityName - entity name
      replacementText - remplacement
      Throws:
      XmlPullParserException - parsing issue
      See Also:
    • getNamespaceCount

      public int getNamespaceCount(int depth) throws XmlPullParserException
      Specified by:
      getNamespaceCount in interface XmlPullParser
      Parameters:
      depth - depth
      Returns:
      the numbers of elements in the namespace stack for the given depth. If namespaces are not enabled, 0 is returned.

      NOTE: when parser is on END_TAG then it is allowed to call this function with getDepth()+1 argument to retrieve position of namespace prefixes and URIs that were declared on corresponding START_TAG.

      NOTE: to retrieve lsit of namespaces declared in current element:

             XmlPullParser pp = ...
             int nsStart = pp.getNamespaceCount(pp.getDepth()-1);
             int nsEnd = pp.getNamespaceCount(pp.getDepth());
             for (int i = nsStart; i > nsEnd; i++) {
                String prefix = pp.getNamespacePrefix(i);
                String ns = pp.getNamespaceUri(i);
                 // ...
            }
       
      Throws:
      XmlPullParserException - parsing issue
      See Also:
    • getNamespacePrefix

      public String getNamespacePrefix(int pos) throws XmlPullParserException
      Specified by:
      getNamespacePrefix in interface XmlPullParser
      Parameters:
      pos - namespace stack position
      Returns:
      Returns the namespace prefix for the given position in the namespace stack. Default namespace declaration (xmlns='...') will have null as prefix. If the given index is out of range, an exception is thrown. Please note: when the parser is on an END_TAG, namespace prefixes that were declared in the corresponding START_TAG are still accessible although they are no longer in scope. namespace prefix
      Throws:
      XmlPullParserException - parsing issue
    • getNamespaceUri

      public String getNamespaceUri(int pos) throws XmlPullParserException
      Specified by:
      getNamespaceUri in interface XmlPullParser
      Parameters:
      pos - namespace stack position
      Returns:
      Returns the namespace URI for the given position in the namespace stack If the position is out of range, an exception is thrown. NOTE: when parser is on END_TAG then namespace prefixes that were declared in corresponding START_TAG are still accessible even though they are not in scope
      Throws:
      XmlPullParserException - parsing issue
    • getNamespace

      public String getNamespace(String prefix)
      Specified by:
      getNamespace in interface XmlPullParser
      Parameters:
      prefix - given prefix
      Returns:
      the URI corresponding to the given prefix, depending on current state of the parser.

      If the prefix was not declared in the current scope, null is returned. The default namespace is included in the namespace table and is available via getNamespace (null).

      This method is a convenience method for

       for ( int i = getNamespaceCount( getDepth() ) - 1; i >= 0; i-- )
       {
           if ( getNamespacePrefix( i ).equals( prefix ) )
           {
               return getNamespaceUri( i );
           }
       }
       return null;
       

      Please note: parser implementations may provide more efficient lookup, e.g. using a Hashtable. The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as defined in the Namespaces in XML specification. Analogous, the 'xmlns' prefix is resolved to http://www.w3.org/2000/xmlns/

      See Also:
    • getDepth

      public int getDepth()
      Specified by:
      getDepth in interface XmlPullParser
      Returns:
      the current depth of the element. Outside the root element, the depth is 0. The depth is incremented by 1 when a start tag is reached. The depth is decremented AFTER the end tag event was observed.
       <!-- outside -->     0
       <root>                  1
         sometext                 1
           <foobar>         2
           </foobar>        2
       </root>              1
       <!-- outside -->     0
       
    • findFragment

      private static int findFragment(int bufMinPos, char[] b, int start, int end)
    • getPositionDescription

      public String getPositionDescription()
      Return string describing current position of parsers as text 'STATE [seen %s...] @line:column'.
      Specified by:
      getPositionDescription in interface XmlPullParser
      Returns:
      a short text describing the current parser state, including the position, a description of the current event and the data source if known. This method is especially useful to provide meaningful error messages and for debugging purposes.
    • getLineNumber

      public int getLineNumber()
      Description copied from interface: XmlPullParser
      Returns the current line number, starting from 1. When the parser does not know the current line number or can not determine it, -1 is returned (e.g. for WBXML).
      Specified by:
      getLineNumber in interface XmlPullParser
      Returns:
      current line number or -1 if unknown.
    • getColumnNumber

      public int getColumnNumber()
      Description copied from interface: XmlPullParser
      Returns the current column number, starting from 0. When the parser does not know the current column number or can not determine it, -1 is returned (e.g. for WBXML).
      Specified by:
      getColumnNumber in interface XmlPullParser
      Returns:
      current column number or -1 if unknown.
    • isWhitespace

      public boolean isWhitespace() throws XmlPullParserException
      Specified by:
      isWhitespace in interface XmlPullParser
      Returns:
      Checks whether the current TEXT event contains only whitespace characters. For IGNORABLE_WHITESPACE, this is always true. For TEXT and CDSECT, false is returned when the current event text contains at least one non-white space character. For any other event type an exception is thrown.

      Please note: non-validating parsers are not able to distinguish whitespace and ignorable whitespace, except from whitespace outside the root element. Ignorable whitespace is reported as separate event, which is exposed via nextToken only.

      Throws:
      XmlPullParserException - parsing issue
    • getText

      public String getText()
      Specified by:
      getText in interface XmlPullParser
      Returns:
      the text content of the current event as String. The value returned depends on current event type, for example for TEXT event it is element content (this is typical case when next() is used). See description of nextToken() for detailed description of possible returned values for different types of events.

      NOTE: in case of ENTITY_REF, this method returns the entity replacement text (or null if not available). This is the only case where getText() and getTextCharacters() return different values.

      See Also:
    • getTextCharacters

      public char[] getTextCharacters(int[] holderForStartAndLength)
      Description copied from interface: XmlPullParser
      Returns the buffer that contains the text of the current event, as well as the start offset and length relevant for the current event. See getText(), next() and nextToken() for description of possible returned values.

      Please note: this buffer must not be modified and its content MAY change after a call to next() or nextToken(). This method will always return the same value as getText(), except for ENTITY_REF. In the case of ENTITY ref, getText() returns the replacement text and this method returns the actual input buffer containing the entity name. If getText() returns null, this method returns null as well and the values returned in the holder array MUST be -1 (both start and length).

      Specified by:
      getTextCharacters in interface XmlPullParser
      Parameters:
      holderForStartAndLength - Must hold an 2-element int array into which the start offset and length values will be written.
      Returns:
      char buffer that contains the text of the current event (null if the current event has no text associated).
      See Also:
    • getNamespace

      public String getNamespace()
      Specified by:
      getNamespace in interface XmlPullParser
      Returns:
      the namespace URI of the current element. The default namespace is represented as empty string. If namespaces are not enabled, an empty String ("") is always returned. The current event must be START_TAG or END_TAG; otherwise, null is returned.
    • getName

      public String getName()
      Specified by:
      getName in interface XmlPullParser
      Returns:
      For START_TAG or END_TAG events, the (local) name of the current element is returned when namespaces are enabled. When namespace processing is disabled, the raw name is returned. For ENTITY_REF events, the entity name is returned. If the current event is not START_TAG, END_TAG, or ENTITY_REF, null is returned.

      Please note: To reconstruct the raw element name when namespaces are enabled and the prefix is not null, you will need to add the prefix and a colon to localName..

    • getPrefix

      public String getPrefix()
      Specified by:
      getPrefix in interface XmlPullParser
      Returns:
      the prefix of the current element. If the element is in the default namespace (has no prefix), null is returned. If namespaces are not enabled, or the current event is not START_TAG or END_TAG, null is returned.
    • isEmptyElementTag

      public boolean isEmptyElementTag() throws XmlPullParserException
      Specified by:
      isEmptyElementTag in interface XmlPullParser
      Returns:
      true if the current event is START_TAG and the tag is degenerated (e.g. <foobar/>).

      NOTE: if the parser is not on START_TAG, an exception will be thrown.

      Throws:
      XmlPullParserException - parsing issue
    • getAttributeCount

      public int getAttributeCount()
      Specified by:
      getAttributeCount in interface XmlPullParser
      Returns:
      the number of attributes of the current start tag, or -1 if the current event type is not START_TAG
      See Also:
    • getAttributeNamespace

      public String getAttributeNamespace(int index)
      Description copied from interface: XmlPullParser
      Returns the namespace URI of the attribute with the given index (starts from 0). Returns an empty string ("") if namespaces are not enabled or the attribute has no namespace. Throws an IndexOutOfBoundsException if the index is out of range or the current event type is not START_TAG.

      NOTE: if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set then namespace attributes (xmlns:ns='...') must be reported with namespace http://www.w3.org/2000/xmlns/ (visit this URL for description!). The default namespace attribute (xmlns="...") will be reported with empty namespace.

      NOTE:The xml prefix is bound as defined in Namespaces in XML specification to "http://www.w3.org/XML/1998/namespace".

      Specified by:
      getAttributeNamespace in interface XmlPullParser
      Parameters:
      index - zero based index of attribute
      Returns:
      attribute namespace, empty string ("") is returned if namespaces processing is not enabled or namespaces processing is enabled but attribute has no namespace (it has no prefix).
    • getAttributeName

      public String getAttributeName(int index)
      Description copied from interface: XmlPullParser
      Returns the local name of the specified attribute if namespaces are enabled or just attribute name if namespaces are disabled. Throws an IndexOutOfBoundsException if the index is out of range or current event type is not START_TAG.
      Specified by:
      getAttributeName in interface XmlPullParser
      Parameters:
      index - zero based index of attribute
      Returns:
      attribute name (null is never returned)
    • getAttributePrefix

      public String getAttributePrefix(int index)
      Description copied from interface: XmlPullParser
      Returns the prefix of the specified attribute Returns null if the element has no prefix. If namespaces are disabled it will always return null. Throws an IndexOutOfBoundsException if the index is out of range or current event type is not START_TAG.
      Specified by:
      getAttributePrefix in interface XmlPullParser
      Parameters:
      index - zero based index of attribute
      Returns:
      attribute prefix or null if namespaces processing is not enabled.
    • getAttributeType

      public String getAttributeType(int index)
      Description copied from interface: XmlPullParser
      Returns the type of the specified attribute If parser is non-validating it MUST return CDATA.
      Specified by:
      getAttributeType in interface XmlPullParser
      Parameters:
      index - zero based index of attribute
      Returns:
      attribute type (null is never returned)
    • isAttributeDefault

      public boolean isAttributeDefault(int index)
      Description copied from interface: XmlPullParser
      Returns if the specified attribute was not in input was declared in XML. If parser is non-validating it MUST always return false. This information is part of XML infoset:
      Specified by:
      isAttributeDefault in interface XmlPullParser
      Parameters:
      index - zero based index of attribute
      Returns:
      false if attribute was in input
    • getAttributeValue

      public String getAttributeValue(int index)
      Description copied from interface: XmlPullParser
      Returns the given attributes value. Throws an IndexOutOfBoundsException if the index is out of range or current event type is not START_TAG.

      NOTE: attribute value must be normalized (including entity replacement text if PROCESS_DOCDECL is false) as described in XML 1.0 section 3.3.3 Attribute-Value Normalization

      Specified by:
      getAttributeValue in interface XmlPullParser
      Parameters:
      index - zero based index of attribute
      Returns:
      value of attribute (null is never returned)
      See Also:
    • getAttributeValue

      public String getAttributeValue(String namespace, String name)
      Description copied from interface: XmlPullParser
      Returns the attributes value identified by namespace URI and namespace localName. If namespaces are disabled namespace must be null. If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.

      NOTE: attribute value must be normalized (including entity replacement text if PROCESS_DOCDECL is false) as described in XML 1.0 section 3.3.3 Attribute-Value Normalization

      Specified by:
      getAttributeValue in interface XmlPullParser
      Parameters:
      namespace - Namespace of the attribute if namespaces are enabled otherwise must be null
      name - If namespaces enabled local name of attribute otherwise just attribute name
      Returns:
      value of attribute or null if attribute with given name does not exist
      See Also:
    • getEventType

      public int getEventType() throws XmlPullParserException
      Specified by:
      getEventType in interface XmlPullParser
      Returns:
      the type of the current event (START_TAG, END_TAG, TEXT, etc.)
      Throws:
      XmlPullParserException - parsing issue
      See Also:
    • require

      public void require(int type, String namespace, String name) throws XmlPullParserException, IOException
      Description copied from interface: XmlPullParser
      Test if the current event is of the given type and if the namespace and name do match. null will match any namespace and any name. If the test is not passed, an exception is thrown. The exception text indicates the parser position, the expected event and the current event that is not meeting the requirement.

      Essentially it does this

       if ( type != getEventType() || ( namespace != null && !namespace.equals( getNamespace() ) )
           || ( name != null && !name.equals( getName() ) ) )
           throw new XmlPullParserException( "expected " + TYPES[type] + getPositionDescription() );
       
      Specified by:
      require in interface XmlPullParser
      Parameters:
      type - type
      namespace - namespace
      name - name
      Throws:
      XmlPullParserException - parsing issue
      IOException - io issue
    • skipSubTree

      public void skipSubTree() throws XmlPullParserException, IOException

      Skip sub tree that is currently parser positioned on.

      NOTE: parser must be on START_TAG and when function returns parser will be positioned on corresponding END_TAG
      Throws:
      XmlPullParserException - issue
      IOException - io
    • nextText

      public String nextText() throws XmlPullParserException, IOException
      Description copied from interface: XmlPullParser
      If current event is START_TAG then if next element is TEXT then element content is returned or if next event is END_TAG then empty string is returned, otherwise exception is thrown. After calling this function successfully parser will be positioned on END_TAG.

      The motivation for this function is to allow to parse consistently both empty elements and elements that has non empty content, for example for input:

      1. <tag>foo</tag>
      2. <tag></tag> (which is equivalent to <tag/> both input can be parsed with the same code:
           p.nextTag()
           p.requireEvent(p.START_TAG, "", "tag");
           String content = p.nextText();
           p.requireEvent(p.END_TAG, "", "tag");
         
      This function together with nextTag make it very easy to parse XML that has no mixed content.

      Essentially it does this

       if ( getEventType() != START_TAG )
       {
           throw new XmlPullParserException( "parser must be on START_TAG to read next text", this, null );
       }
       int eventType = next();
       if ( eventType == TEXT )
       {
           String result = getText();
           eventType = next();
           if ( eventType != END_TAG )
           {
               throw new XmlPullParserException( "event TEXT it must be immediately followed by END_TAG", this, null );
           }
           return result;
       }
       else if ( eventType == END_TAG )
       {
           return "";
       }
       else
       {
           throw new XmlPullParserException( "parser must be on START_TAG or TEXT to read text", this, null );
       }
       
      Specified by:
      nextText in interface XmlPullParser
      Returns:
      see description
      Throws:
      XmlPullParserException - parsing issue
      IOException - io issue
    • nextTag

      public int nextTag() throws XmlPullParserException, IOException
      Description copied from interface: XmlPullParser
      Call next() and return event if it is START_TAG or END_TAG otherwise throw an exception. It will skip whitespace TEXT before actual tag if any.

      essentially it does this

       int eventType = next();
       if ( eventType == TEXT && isWhitespace() )
       { // skip whitespace
           eventType = next();
       }
       if ( eventType != START_TAG && eventType != END_TAG )
       {
           throw new XmlPullParserException( "expected start or end tag", this, null );
       }
       return eventType;
       
      Specified by:
      nextTag in interface XmlPullParser
      Returns:
      see description
      Throws:
      XmlPullParserException - parsing issue
      IOException - io issue
    • next

      public int next() throws XmlPullParserException, IOException
      Specified by:
      next in interface XmlPullParser
      Returns:
      Get next parsing event - element content wil be coalesced and only one TEXT event must be returned for whole element content (comments and processing instructions will be ignored and entity references must be expanded or exception mus be thrown if entity reference can not be expanded). If element content is empty (content is "") then no TEXT event will be reported.

      NOTE: empty element (such as <tag/>) will be reported with two separate events: START_TAG, END_TAG - it must be so to preserve parsing equivalency of empty element to <tag></tag>. (see isEmptyElementTag ())

      Throws:
      XmlPullParserException - parsing issue
      IOException - io issue
      See Also:
    • nextToken

      public int nextToken() throws XmlPullParserException, IOException
      Description copied from interface: XmlPullParser
      This method works similarly to next() but will expose additional event types (COMMENT, CDSECT, DOCDECL, ENTITY_REF, PROCESSING_INSTRUCTION, or IGNORABLE_WHITESPACE) if they are available in input.

      If special feature FEATURE_XML_ROUNDTRIP (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip) is enabled it is possible to do XML document round trip ie. reproduce exactly on output the XML input using getText(): returned content is always unnormalized (exactly as in input). Otherwise returned content is end-of-line normalized as described XML 1.0 End-of-Line Handling and. Also when this feature is enabled exact content of START_TAG, END_TAG, DOCDECL and PROCESSING_INSTRUCTION is available.

      Here is the list of tokens that can be returned from nextToken() and what getText() and getTextCharacters()

      Specified by:
      nextToken in interface XmlPullParser
      Returns:
      START_DOCUMENT
      null
      END_DOCUMENT
      null
      START_TAG
      null unless FEATURE_XML_ROUNDTRIP enabled and then returns XML tag, ex: <tag attr='val'>
      END_TAG
      null unless FEATURE_XML_ROUNDTRIP id enabled and then returns XML tag, ex: </tag>
      TEXT
      return element content.
      Note: that element content may be delivered in multiple consecutive TEXT events.
      IGNORABLE_WHITESPACE
      return characters that are determined to be ignorable white space. If the FEATURE_XML_ROUNDTRIP is enabled all whitespace content outside root element will always reported as IGNORABLE_WHITESPACE otherwise reporting is optional.
      Note: that element content may be delivered in multiple consecutive IGNORABLE_WHITESPACE events.
      CDSECT
      return text inside CDATA (ex. 'fo<o' from <!CDATA[fo<o]]>)
      PROCESSING_INSTRUCTION
      if FEATURE_XML_ROUNDTRIP is true return exact PI content ex: 'pi foo' from <?pi foo?> otherwise it may be exact PI content or concatenation of PI target, space and data so for example for <?target data?> string "target data" may be returned if FEATURE_XML_ROUNDTRIP is false.
      COMMENT
      return comment content ex. 'foo bar' from <!--foo bar-->
      ENTITY_REF
      getText() MUST return entity replacement text if PROCESS_DOCDECL is false otherwise getText() MAY return null, additionally getTextCharacters() MUST return entity name (for example 'entity_name' for &entity_name;).
      NOTE: this is the only place where value returned from getText() and getTextCharacters() are different
      NOTE: it is user responsibility to resolve entity reference if PROCESS_DOCDECL is false and there is no entity replacement text set in defineEntityReplacementText() method (getText() will be null)
      NOTE: character entities (ex. &#32;) and standard entities such as &amp; &lt; &gt; &quot; &apos; are reported as well and are not reported as TEXT tokens but as ENTITY_REF tokens! This requirement is added to allow to do roundtrip of XML documents!
      DOCDECL
      if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false then return what is inside of DOCDECL for example it returns:
       " titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
       [<!ENTITY % active.links "INCLUDE">]"
       

      for input document that contained:

       <!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
       [<!ENTITY % active.links "INCLUDE">]>
       
      otherwise if FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true then what is returned is undefined (it may be even null)

      NOTE: there is no guarantee that there will only one TEXT or IGNORABLE_WHITESPACE event from nextToken() as parser may chose to deliver element content in multiple tokens (dividing element content into chunks)

      NOTE: whether returned text of token is end-of-line normalized is depending on FEATURE_XML_ROUNDTRIP.

      NOTE: XMLDecl (<?xml ...?>) is not reported but its content is available through optional properties (see class description above).

      Throws:
      XmlPullParserException - parsing issue
      IOException - io issue
      See Also:
    • nextImpl

      private int nextImpl() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseProlog

      private int parseProlog() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseEpilog

      private int parseEpilog() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseEndTag

      public int parseEndTag() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseStartTag

      public int parseStartTag() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseAttribute

      private char parseAttribute() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseCharOrPredefinedEntityRef

      private int parseCharOrPredefinedEntityRef() throws XmlPullParserException, IOException
      parse Entity Ref, either a character entity or one of the predefined name entities.
      Returns:
      the length of the valid found character reference, which may be one of the predefined character reference names (resolvedEntityRefCharBuf contains the replaced chars). Returns the length of the not found entity name, otherwise.
      Throws:
      XmlPullParserException - if invalid XML is detected.
      IOException - if an I/O error is found.
    • parseEntityRefInDocDecl

      private void parseEntityRefInDocDecl() throws XmlPullParserException, IOException
      Parse an entity reference inside the DOCDECL section.
      Throws:
      XmlPullParserException - if invalid XML is detected.
      IOException - if an I/O error is found.
    • parseEntityRef

      private void parseEntityRef() throws XmlPullParserException, IOException
      Parse an entity reference inside a tag or attribute.
      Throws:
      XmlPullParserException - if invalid XML is detected.
      IOException - if an I/O error is found.
    • isValidCodePoint

      private static boolean isValidCodePoint(int codePoint)
      Check if the provided parameter is a valid Char. According to https://www.w3.org/TR/REC-xml/#NT-Char
      Parameters:
      codePoint - the numeric value to check
      Returns:
      true if it is a valid numeric character reference. False otherwise.
    • lookuEntityReplacement

      private char[] lookuEntityReplacement(int entityNameLen)
    • parseComment

      private void parseComment() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parsePI

      private boolean parsePI() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseXmlDecl

      private void parseXmlDecl(char ch) throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseXmlDeclWithVersion

      private void parseXmlDeclWithVersion(int versionStart, int versionEnd) throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseDocdecl

      private void parseDocdecl() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • extractEntityRefInDocDecl

      private void extractEntityRefInDocDecl() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • extractEntityRef

      private void extractEntityRef() throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • parseCDSect

      private void parseCDSect(boolean hadCharData) throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • fillBuf

      private void fillBuf() throws IOException, XmlPullParserException
      Throws:
      IOException
      XmlPullParserException
    • more

      private char more() throws IOException, XmlPullParserException
      Throws:
      IOException
      XmlPullParserException
    • ensurePC

      private void ensurePC(int end)
    • joinPC

      private void joinPC()
    • requireInput

      private char requireInput(char ch, char[] input) throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • skipS

      private char skipS(char ch) throws XmlPullParserException, IOException
      Throws:
      XmlPullParserException
      IOException
    • setName

      private static void setName(char ch)
    • setNameStart

      private static void setNameStart(char ch)
    • isNameStartChar

      private static boolean isNameStartChar(char ch)
    • isNameChar

      private static boolean isNameChar(char ch)
    • isS

      private static boolean isS(char ch)
    • printable

      private static String printable(int ch)
    • printable

      private static String printable(String s)