Package org.jsoup.parser
Class Tokeniser
- java.lang.Object
-
- org.jsoup.parser.Tokeniser
-
final class Tokeniser extends java.lang.ObjectReaders the input stream into tokens.
-
-
Field Summary
Fields Modifier and Type Field Description (package private) Token.CharactercharPendingprivate java.lang.StringBuildercharsBuilderprivate java.lang.StringcharsStringprivate int[]codepointHolder(package private) Token.CommentcommentPending(package private) java.lang.StringBuilderdataBuffer(package private) Token.DoctypedoctypePendingprivate TokenemitPending(package private) Token.EndTagendPendingprivate ParseErrorListerrorsprivate booleanisEmitPendingprivate java.lang.StringlastStartTagprivate int[]multipointHolderprivate static char[]notCharRefCharsSortedprivate CharacterReaderreader(package private) static charreplacementChar(package private) Token.StartTagstartPendingprivate TokeniserStatestate(package private) Token.TagtagPending(package private) static int[]win1252Extensions(package private) static intwin1252ExtensionsStart
-
Constructor Summary
Constructors Constructor Description Tokeniser(CharacterReader reader, ParseErrorList errors)
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description (package private) voidadvanceTransition(TokeniserState state)(package private) java.lang.StringappropriateEndTagName()private voidcharacterReferenceError(java.lang.String message)(package private) int[]consumeCharacterReference(java.lang.Character additionalAllowedCharacter, boolean inAttribute)(package private) voidcreateBogusCommentPending()(package private) voidcreateCommentPending()(package private) voidcreateDoctypePending()(package private) Token.TagcreateTagPending(boolean start)(package private) voidcreateTempBuffer()(package private) booleancurrentNodeInHtmlNS()(package private) voidemit(char c)(package private) voidemit(char[] chars)(package private) voidemit(int[] codepoints)(package private) voidemit(java.lang.String str)(package private) voidemit(Token token)(package private) voidemitCommentPending()(package private) voidemitDoctypePending()(package private) voidemitTagPending()(package private) voideofError(TokeniserState state)(package private) voiderror(java.lang.String errorMsg)(package private) voiderror(TokeniserState state)(package private) TokeniserStategetState()(package private) booleanisAppropriateEndTagToken()(package private) Tokenread()(package private) voidtransition(TokeniserState state)(package private) java.lang.StringunescapeEntities(boolean inAttribute)Utility method to consume reader and unescape entities found within.
-
-
-
Field Detail
-
replacementChar
static final char replacementChar
- See Also:
- Constant Field Values
-
notCharRefCharsSorted
private static final char[] notCharRefCharsSorted
-
win1252ExtensionsStart
static final int win1252ExtensionsStart
- See Also:
- Constant Field Values
-
win1252Extensions
static final int[] win1252Extensions
-
reader
private final CharacterReader reader
-
errors
private final ParseErrorList errors
-
state
private TokeniserState state
-
emitPending
private Token emitPending
-
isEmitPending
private boolean isEmitPending
-
charsString
private java.lang.String charsString
-
charsBuilder
private java.lang.StringBuilder charsBuilder
-
dataBuffer
java.lang.StringBuilder dataBuffer
-
tagPending
Token.Tag tagPending
-
startPending
Token.StartTag startPending
-
endPending
Token.EndTag endPending
-
charPending
Token.Character charPending
-
doctypePending
Token.Doctype doctypePending
-
commentPending
Token.Comment commentPending
-
lastStartTag
private java.lang.String lastStartTag
-
codepointHolder
private final int[] codepointHolder
-
multipointHolder
private final int[] multipointHolder
-
-
Constructor Detail
-
Tokeniser
Tokeniser(CharacterReader reader, ParseErrorList errors)
-
-
Method Detail
-
read
Token read()
-
emit
void emit(Token token)
-
emit
void emit(java.lang.String str)
-
emit
void emit(char[] chars)
-
emit
void emit(int[] codepoints)
-
emit
void emit(char c)
-
getState
TokeniserState getState()
-
transition
void transition(TokeniserState state)
-
advanceTransition
void advanceTransition(TokeniserState state)
-
consumeCharacterReference
int[] consumeCharacterReference(java.lang.Character additionalAllowedCharacter, boolean inAttribute)
-
createTagPending
Token.Tag createTagPending(boolean start)
-
emitTagPending
void emitTagPending()
-
createCommentPending
void createCommentPending()
-
emitCommentPending
void emitCommentPending()
-
createBogusCommentPending
void createBogusCommentPending()
-
createDoctypePending
void createDoctypePending()
-
emitDoctypePending
void emitDoctypePending()
-
createTempBuffer
void createTempBuffer()
-
isAppropriateEndTagToken
boolean isAppropriateEndTagToken()
-
appropriateEndTagName
java.lang.String appropriateEndTagName()
-
error
void error(TokeniserState state)
-
eofError
void eofError(TokeniserState state)
-
characterReferenceError
private void characterReferenceError(java.lang.String message)
-
error
void error(java.lang.String errorMsg)
-
currentNodeInHtmlNS
boolean currentNodeInHtmlNS()
-
unescapeEntities
java.lang.String unescapeEntities(boolean inAttribute)
Utility method to consume reader and unescape entities found within.- Parameters:
inAttribute- if the text to be unescaped is in an attribute- Returns:
- unescaped string from reader
-
-