/** * @license * Copyright Google LLC All Rights Reserved. * * Use of this source code is governed by an MIT-style license that can be * found in the LICENSE file at https://angular.io/license */ import { ParseError, ParseSourceSpan } from '../parse_util'; import * as html from './ast'; import { NAMED_ENTITIES } from './entities'; import { tokenize } from './lexer'; import { getNsPrefix, mergeNsAndName, splitNsName } from './tags'; export class TreeError extends ParseError { static create(elementName, span, msg) { return new TreeError(elementName, span, msg); } constructor(elementName, span, msg) { super(span, msg); this.elementName = elementName; } } export class ParseTreeResult { constructor(rootNodes, errors) { this.rootNodes = rootNodes; this.errors = errors; } } export class Parser { constructor(getTagDefinition) { this.getTagDefinition = getTagDefinition; } parse(source, url, options) { const tokenizeResult = tokenize(source, url, this.getTagDefinition, options); const parser = new _TreeBuilder(tokenizeResult.tokens, this.getTagDefinition); parser.build(); return new ParseTreeResult(parser.rootNodes, tokenizeResult.errors.concat(parser.errors)); } } class _TreeBuilder { constructor(tokens, getTagDefinition) { this.tokens = tokens; this.getTagDefinition = getTagDefinition; this._index = -1; this._containerStack = []; this.rootNodes = []; this.errors = []; this._advance(); } build() { while (this._peek.type !== 24 /* TokenType.EOF */) { if (this._peek.type === 0 /* TokenType.TAG_OPEN_START */ || this._peek.type === 4 /* TokenType.INCOMPLETE_TAG_OPEN */) { this._consumeStartTag(this._advance()); } else if (this._peek.type === 3 /* TokenType.TAG_CLOSE */) { this._consumeEndTag(this._advance()); } else if (this._peek.type === 12 /* TokenType.CDATA_START */) { this._closeVoidElement(); this._consumeCdata(this._advance()); } else if (this._peek.type === 10 /* TokenType.COMMENT_START */) { this._closeVoidElement(); this._consumeComment(this._advance()); } else if (this._peek.type === 5 /* TokenType.TEXT */ || this._peek.type === 7 /* TokenType.RAW_TEXT */ || this._peek.type === 6 /* TokenType.ESCAPABLE_RAW_TEXT */) { this._closeVoidElement(); this._consumeText(this._advance()); } else if (this._peek.type === 19 /* TokenType.EXPANSION_FORM_START */) { this._consumeExpansion(this._advance()); } else if (this._peek.type === 25 /* TokenType.BLOCK_GROUP_OPEN_START */) { this._closeVoidElement(); this._consumeBlockGroupOpen(this._advance()); } else if (this._peek.type === 29 /* TokenType.BLOCK_OPEN_START */) { this._closeVoidElement(); this._consumeBlock(this._advance(), 30 /* TokenType.BLOCK_OPEN_END */); } else if (this._peek.type === 27 /* TokenType.BLOCK_GROUP_CLOSE */) { this._closeVoidElement(); this._consumeBlockGroupClose(this._advance()); } else { // Skip all other tokens... this._advance(); } } } _advance() { const prev = this._peek; if (this._index < this.tokens.length - 1) { // Note: there is always an EOF token at the end this._index++; } this._peek = this.tokens[this._index]; return prev; } _advanceIf(type) { if (this._peek.type === type) { return this._advance(); } return null; } _consumeCdata(_startToken) { this._consumeText(this._advance()); this._advanceIf(13 /* TokenType.CDATA_END */); } _consumeComment(token) { const text = this._advanceIf(7 /* TokenType.RAW_TEXT */); const endToken = this._advanceIf(11 /* TokenType.COMMENT_END */); const value = text != null ? text.parts[0].trim() : null; const sourceSpan = endToken == null ? token.sourceSpan : new ParseSourceSpan(token.sourceSpan.start, endToken.sourceSpan.end, token.sourceSpan.fullStart); this._addToParent(new html.Comment(value, sourceSpan)); } _consumeExpansion(token) { const switchValue = this._advance(); const type = this._advance(); const cases = []; // read = while (this._peek.type === 20 /* TokenType.EXPANSION_CASE_VALUE */) { const expCase = this._parseExpansionCase(); if (!expCase) return; // error cases.push(expCase); } // read the final } if (this._peek.type !== 23 /* TokenType.EXPANSION_FORM_END */) { this.errors.push(TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '}'.`)); return; } const sourceSpan = new ParseSourceSpan(token.sourceSpan.start, this._peek.sourceSpan.end, token.sourceSpan.fullStart); this._addToParent(new html.Expansion(switchValue.parts[0], type.parts[0], cases, sourceSpan, switchValue.sourceSpan)); this._advance(); } _parseExpansionCase() { const value = this._advance(); // read { if (this._peek.type !== 21 /* TokenType.EXPANSION_CASE_EXP_START */) { this.errors.push(TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '{'.`)); return null; } // read until } const start = this._advance(); const exp = this._collectExpansionExpTokens(start); if (!exp) return null; const end = this._advance(); exp.push({ type: 24 /* TokenType.EOF */, parts: [], sourceSpan: end.sourceSpan }); // parse everything in between { and } const expansionCaseParser = new _TreeBuilder(exp, this.getTagDefinition); expansionCaseParser.build(); if (expansionCaseParser.errors.length > 0) { this.errors = this.errors.concat(expansionCaseParser.errors); return null; } const sourceSpan = new ParseSourceSpan(value.sourceSpan.start, end.sourceSpan.end, value.sourceSpan.fullStart); const expSourceSpan = new ParseSourceSpan(start.sourceSpan.start, end.sourceSpan.end, start.sourceSpan.fullStart); return new html.ExpansionCase(value.parts[0], expansionCaseParser.rootNodes, sourceSpan, value.sourceSpan, expSourceSpan); } _collectExpansionExpTokens(start) { const exp = []; const expansionFormStack = [21 /* TokenType.EXPANSION_CASE_EXP_START */]; while (true) { if (this._peek.type === 19 /* TokenType.EXPANSION_FORM_START */ || this._peek.type === 21 /* TokenType.EXPANSION_CASE_EXP_START */) { expansionFormStack.push(this._peek.type); } if (this._peek.type === 22 /* TokenType.EXPANSION_CASE_EXP_END */) { if (lastOnStack(expansionFormStack, 21 /* TokenType.EXPANSION_CASE_EXP_START */)) { expansionFormStack.pop(); if (expansionFormStack.length === 0) return exp; } else { this.errors.push(TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`)); return null; } } if (this._peek.type === 23 /* TokenType.EXPANSION_FORM_END */) { if (lastOnStack(expansionFormStack, 19 /* TokenType.EXPANSION_FORM_START */)) { expansionFormStack.pop(); } else { this.errors.push(TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`)); return null; } } if (this._peek.type === 24 /* TokenType.EOF */) { this.errors.push(TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`)); return null; } exp.push(this._advance()); } } _consumeText(token) { const tokens = [token]; const startSpan = token.sourceSpan; let text = token.parts[0]; if (text.length > 0 && text[0] === '\n') { const parent = this._getContainer(); // This is unlikely to happen, but we have an assertion just in case. if (parent instanceof html.BlockGroup) { this.errors.push(TreeError.create(null, startSpan, 'Text cannot be placed directly inside of a block group.')); return null; } if (parent != null && parent.children.length === 0 && this.getTagDefinition(parent.name).ignoreFirstLf) { text = text.substring(1); tokens[0] = { type: token.type, sourceSpan: token.sourceSpan, parts: [text] }; } } while (this._peek.type === 8 /* TokenType.INTERPOLATION */ || this._peek.type === 5 /* TokenType.TEXT */ || this._peek.type === 9 /* TokenType.ENCODED_ENTITY */) { token = this._advance(); tokens.push(token); if (token.type === 8 /* TokenType.INTERPOLATION */) { // For backward compatibility we decode HTML entities that appear in interpolation // expressions. This is arguably a bug, but it could be a considerable breaking change to // fix it. It should be addressed in a larger project to refactor the entire parser/lexer // chain after View Engine has been removed. text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity); } else if (token.type === 9 /* TokenType.ENCODED_ENTITY */) { text += token.parts[0]; } else { text += token.parts.join(''); } } if (text.length > 0) { const endSpan = token.sourceSpan; this._addToParent(new html.Text(text, new ParseSourceSpan(startSpan.start, endSpan.end, startSpan.fullStart, startSpan.details), tokens)); } } _closeVoidElement() { const el = this._getContainer(); if (el instanceof html.Element && this.getTagDefinition(el.name).isVoid) { this._containerStack.pop(); } } _consumeStartTag(startTagToken) { const [prefix, name] = startTagToken.parts; const attrs = []; while (this._peek.type === 14 /* TokenType.ATTR_NAME */) { attrs.push(this._consumeAttr(this._advance())); } const fullName = this._getElementFullName(prefix, name, this._getClosestParentElement()); let selfClosing = false; // Note: There could have been a tokenizer error // so that we don't get a token for the end tag... if (this._peek.type === 2 /* TokenType.TAG_OPEN_END_VOID */) { this._advance(); selfClosing = true; const tagDef = this.getTagDefinition(fullName); if (!(tagDef.canSelfClose || getNsPrefix(fullName) !== null || tagDef.isVoid)) { this.errors.push(TreeError.create(fullName, startTagToken.sourceSpan, `Only void, custom and foreign elements can be self closed "${startTagToken.parts[1]}"`)); } } else if (this._peek.type === 1 /* TokenType.TAG_OPEN_END */) { this._advance(); selfClosing = false; } const end = this._peek.sourceSpan.fullStart; const span = new ParseSourceSpan(startTagToken.sourceSpan.start, end, startTagToken.sourceSpan.fullStart); // Create a separate `startSpan` because `span` will be modified when there is an `end` span. const startSpan = new ParseSourceSpan(startTagToken.sourceSpan.start, end, startTagToken.sourceSpan.fullStart); const el = new html.Element(fullName, attrs, [], span, startSpan, undefined); const parentEl = this._getContainer(); this._pushContainer(el, parentEl instanceof html.Element && this.getTagDefinition(parentEl.name).isClosedByChild(el.name)); if (selfClosing) { // Elements that are self-closed have their `endSourceSpan` set to the full span, as the // element start tag also represents the end tag. this._popContainer(fullName, html.Element, span); } else if (startTagToken.type === 4 /* TokenType.INCOMPLETE_TAG_OPEN */) { // We already know the opening tag is not complete, so it is unlikely it has a corresponding // close tag. Let's optimistically parse it as a full element and emit an error. this._popContainer(fullName, html.Element, null); this.errors.push(TreeError.create(fullName, span, `Opening tag "${fullName}" not terminated.`)); } } _pushContainer(node, isClosedByChild) { if (isClosedByChild) { this._containerStack.pop(); } this._addToParent(node); this._containerStack.push(node); } _consumeEndTag(endTagToken) { const fullName = this._getElementFullName(endTagToken.parts[0], endTagToken.parts[1], this._getClosestParentElement()); if (this.getTagDefinition(fullName).isVoid) { this.errors.push(TreeError.create(fullName, endTagToken.sourceSpan, `Void elements do not have end tags "${endTagToken.parts[1]}"`)); } else if (!this._popContainer(fullName, html.Element, endTagToken.sourceSpan)) { const errMsg = `Unexpected closing tag "${fullName}". It may happen when the tag has already been closed by another tag. For more info see https://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags`; this.errors.push(TreeError.create(fullName, endTagToken.sourceSpan, errMsg)); } } /** * Closes the nearest element with the tag name `fullName` in the parse tree. * `endSourceSpan` is the span of the closing tag, or null if the element does * not have a closing tag (for example, this happens when an incomplete * opening tag is recovered). */ _popContainer(fullName, expectedType, endSourceSpan) { let unexpectedCloseTagDetected = false; for (let stackIndex = this._containerStack.length - 1; stackIndex >= 0; stackIndex--) { const node = this._containerStack[stackIndex]; const name = node instanceof html.BlockGroup ? node.blocks[0]?.name : node.name; if (name === fullName && node instanceof expectedType) { // Record the parse span with the element that is being closed. Any elements that are // removed from the element stack at this point are closed implicitly, so they won't get // an end source span (as there is no explicit closing element). node.endSourceSpan = endSourceSpan; node.sourceSpan.end = endSourceSpan !== null ? endSourceSpan.end : node.sourceSpan.end; this._containerStack.splice(stackIndex, this._containerStack.length - stackIndex); return !unexpectedCloseTagDetected; } // Blocks are self-closing while block groups and (most times) elements are not. if (node instanceof html.BlockGroup || node instanceof html.Element && !this.getTagDefinition(node.name).closedByParent) { // Note that we encountered an unexpected close tag but continue processing the element // stack so we can assign an `endSourceSpan` if there is a corresponding start tag for this // end tag in the stack. unexpectedCloseTagDetected = true; } } return false; } _consumeAttr(attrName) { const fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]); let attrEnd = attrName.sourceSpan.end; // Consume any quote if (this._peek.type === 15 /* TokenType.ATTR_QUOTE */) { this._advance(); } // Consume the attribute value let value = ''; const valueTokens = []; let valueStartSpan = undefined; let valueEnd = undefined; // NOTE: We need to use a new variable `nextTokenType` here to hide the actual type of // `_peek.type` from TS. Otherwise TS will narrow the type of `_peek.type` preventing it from // being able to consider `ATTR_VALUE_INTERPOLATION` as an option. This is because TS is not // able to see that `_advance()` will actually mutate `_peek`. const nextTokenType = this._peek.type; if (nextTokenType === 16 /* TokenType.ATTR_VALUE_TEXT */) { valueStartSpan = this._peek.sourceSpan; valueEnd = this._peek.sourceSpan.end; while (this._peek.type === 16 /* TokenType.ATTR_VALUE_TEXT */ || this._peek.type === 17 /* TokenType.ATTR_VALUE_INTERPOLATION */ || this._peek.type === 9 /* TokenType.ENCODED_ENTITY */) { const valueToken = this._advance(); valueTokens.push(valueToken); if (valueToken.type === 17 /* TokenType.ATTR_VALUE_INTERPOLATION */) { // For backward compatibility we decode HTML entities that appear in interpolation // expressions. This is arguably a bug, but it could be a considerable breaking change to // fix it. It should be addressed in a larger project to refactor the entire parser/lexer // chain after View Engine has been removed. value += valueToken.parts.join('').replace(/&([^;]+);/g, decodeEntity); } else if (valueToken.type === 9 /* TokenType.ENCODED_ENTITY */) { value += valueToken.parts[0]; } else { value += valueToken.parts.join(''); } valueEnd = attrEnd = valueToken.sourceSpan.end; } } // Consume any quote if (this._peek.type === 15 /* TokenType.ATTR_QUOTE */) { const quoteToken = this._advance(); attrEnd = quoteToken.sourceSpan.end; } const valueSpan = valueStartSpan && valueEnd && new ParseSourceSpan(valueStartSpan.start, valueEnd, valueStartSpan.fullStart); return new html.Attribute(fullName, value, new ParseSourceSpan(attrName.sourceSpan.start, attrEnd, attrName.sourceSpan.fullStart), attrName.sourceSpan, valueSpan, valueTokens.length > 0 ? valueTokens : undefined, undefined); } _consumeBlockGroupOpen(token) { const end = this._peek.sourceSpan.fullStart; const span = new ParseSourceSpan(token.sourceSpan.start, end, token.sourceSpan.fullStart); // Create a separate `startSpan` because `span` will be modified when there is an `end` span. const startSpan = new ParseSourceSpan(token.sourceSpan.start, end, token.sourceSpan.fullStart); const blockGroup = new html.BlockGroup([], span, startSpan, null); this._pushContainer(blockGroup, false); const implicitBlock = this._consumeBlock(token, 26 /* TokenType.BLOCK_GROUP_OPEN_END */); // Block parameters are consumed as a part of the implicit block so we need to expand the // start source span once the block is parsed to include the full opening tag. startSpan.end = implicitBlock.startSourceSpan.end; } _consumeBlock(token, closeToken) { // The start of a block implicitly closes the previous block. this._conditionallyClosePreviousBlock(); const parameters = []; while (this._peek.type === 28 /* TokenType.BLOCK_PARAMETER */) { const paramToken = this._advance(); parameters.push(new html.BlockParameter(paramToken.parts[0], paramToken.sourceSpan)); } if (this._peek.type === closeToken) { this._advance(); } const end = this._peek.sourceSpan.fullStart; const span = new ParseSourceSpan(token.sourceSpan.start, end, token.sourceSpan.fullStart); // Create a separate `startSpan` because `span` will be modified when there is an `end` span. const startSpan = new ParseSourceSpan(token.sourceSpan.start, end, token.sourceSpan.fullStart); const block = new html.Block(token.parts[0], parameters, [], span, startSpan); const parent = this._getContainer(); if (!(parent instanceof html.BlockGroup)) { this.errors.push(TreeError.create(block.name, block.sourceSpan, 'Blocks can only be placed inside of block groups.')); } else { parent.blocks.push(block); this._containerStack.push(block); } return block; } _consumeBlockGroupClose(token) { const name = token.parts[0]; const previousContainer = this._getContainer(); // Blocks are implcitly closed by the block group. this._conditionallyClosePreviousBlock(); if (!this._popContainer(name, html.BlockGroup, token.sourceSpan)) { const context = previousContainer instanceof html.Element ? `There is an unclosed "${previousContainer.name}" HTML tag named that may have to be closed first.` : `The block may have been closed earlier.`; this.errors.push(TreeError.create(name, token.sourceSpan, `Unexpected closing block "${name}". ${context}`)); } } _conditionallyClosePreviousBlock() { const container = this._getContainer(); if (container instanceof html.Block) { // Blocks don't have an explicit closing tag, they're closed either by the next block or // the end of the block group. Infer the end span from the last child node. const lastChild = container.children.length ? container.children[container.children.length - 1] : null; const endSpan = lastChild === null ? null : new ParseSourceSpan(lastChild.sourceSpan.end, lastChild.sourceSpan.end); this._popContainer(container.name, html.Block, endSpan); } } _getContainer() { return this._containerStack.length > 0 ? this._containerStack[this._containerStack.length - 1] : null; } _getClosestParentElement() { for (let i = this._containerStack.length - 1; i > -1; i--) { if (this._containerStack[i] instanceof html.Element) { return this._containerStack[i]; } } return null; } _addToParent(node) { const parent = this._getContainer(); if (parent === null) { this.rootNodes.push(node); } else if (parent instanceof html.BlockGroup) { // Due to how parsing is set up, we're unlikely to hit this code path, but we // have the assertion here just in case and to satisfy the type checker. this.errors.push(TreeError.create(null, node.sourceSpan, 'Block groups can only contain blocks.')); } else { parent.children.push(node); } } _getElementFullName(prefix, localName, parentElement) { if (prefix === '') { prefix = this.getTagDefinition(localName).implicitNamespacePrefix || ''; if (prefix === '' && parentElement != null) { const parentTagName = splitNsName(parentElement.name)[1]; const parentTagDefinition = this.getTagDefinition(parentTagName); if (!parentTagDefinition.preventNamespaceInheritance) { prefix = getNsPrefix(parentElement.name); } } } return mergeNsAndName(prefix, localName); } } function lastOnStack(stack, element) { return stack.length > 0 && stack[stack.length - 1] === element; } /** * Decode the `entity` string, which we believe is the contents of an HTML entity. * * If the string is not actually a valid/known entity then just return the original `match` string. */ function decodeEntity(match, entity) { if (NAMED_ENTITIES[entity] !== undefined) { return NAMED_ENTITIES[entity] || match; } if (/^#x[a-f0-9]+$/i.test(entity)) { return String.fromCodePoint(parseInt(entity.slice(2), 16)); } if (/^#\d+$/.test(entity)) { return String.fromCodePoint(parseInt(entity.slice(1), 10)); } return match; } //# sourceMappingURL=data:application/json;base64,