Replace expressions engine (#133)

This commit is contained in:
ChristopherHX
2025-10-06 13:53:15 +02:00
committed by GitHub
parent 418c708bb0
commit 82dccc7820
40 changed files with 6876 additions and 1304 deletions

361
internal/expr/lexer.go Normal file
View File

@@ -0,0 +1,361 @@
package workflow
import (
"math"
"slices"
"strconv"
"strings"
"unicode"
)
// TokenKind represents the type of token returned by the lexer.
// The values mirror the C# TokenKind enum.
//
// Note: The names are kept identical to the C# implementation for
// easier mapping when porting the parser.
//
// The lexer is intentionally simple it only tokenises the subset of
// expressions that are used in GitHub Actions workflow `if:` expressions.
// It does not evaluate the expression that is left to the parser.
type TokenKind int
const (
TokenKindStartGroup TokenKind = iota
TokenKindStartIndex
TokenKindEndGroup
TokenKindEndIndex
TokenKindSeparator
TokenKindDereference
TokenKindWildcard
TokenKindLogicalOperator
TokenKindNumber
TokenKindString
TokenKindBoolean
TokenKindNull
TokenKindPropertyName
TokenKindFunction
TokenKindNamedValue
TokenKindStartParameters
TokenKindEndParameters
TokenKindUnexpected
)
// Token represents a single lexical token.
// Raw holds the original text, Value holds the parsed value when applicable.
// Index is the start position in the source string.
//
// The struct is intentionally minimal it only contains what the parser
// needs. If you need more information (e.g. token length) you can add it.
type Token struct {
Kind TokenKind
Raw string
Value interface{}
Index int
}
// Lexer holds the state while tokenising an expression.
// It is a direct port of the C# LexicalAnalyzer.
//
// Flags can be used to enable/disable features for now we only support
// a single flag that mirrors ExpressionFlags.DTExpressionsV1.
//
// The lexer is not threadsafe reuse a single instance per expression.
type Lexer struct {
expr string
flags int
index int
last *Token
stack []TokenKind // unclosed start tokens
}
// NewLexer creates a new lexer for the given expression.
func NewLexer(expr string, flags int) *Lexer {
return &Lexer{expr: expr, flags: flags}
}
func testTokenBoundary(c rune) bool {
switch c {
case '(', '[', ')', ']', ',', '.',
'!', '>', '<', '=', '&', '|':
return true
default:
return unicode.IsSpace(c)
}
}
// Next returns the next token or nil if the end of the expression is reached.
func (l *Lexer) Next() *Token {
// Skip whitespace
for l.index < len(l.expr) && unicode.IsSpace(rune(l.expr[l.index])) {
l.index++
}
if l.index >= len(l.expr) {
return nil
}
c := l.expr[l.index]
switch c {
case '(':
l.index++
// Function call or logical grouping
if l.last != nil && l.last.Kind == TokenKindFunction {
return l.createToken(TokenKindStartParameters, "(")
}
if l.flags&FlagV1 != 0 {
// V1 does not support grouping treat as unexpected
return l.createToken(TokenKindUnexpected, "(")
}
return l.createToken(TokenKindStartGroup, "(")
case '[':
l.index++
return l.createToken(TokenKindStartIndex, "[")
case ')':
l.index++
if len(l.stack) > 0 && l.stack[len(l.stack)-1] == TokenKindStartParameters {
return l.createToken(TokenKindEndParameters, ")")
}
return l.createToken(TokenKindEndGroup, ")")
case ']':
l.index++
return l.createToken(TokenKindEndIndex, "]")
case ',':
l.index++
return l.createToken(TokenKindSeparator, ",")
case '*':
l.index++
return l.createToken(TokenKindWildcard, "*")
case '\'':
return l.readString()
case '!', '>', '<', '=', '&', '|':
if l.flags&FlagV1 != 0 {
l.index++
return l.createToken(TokenKindUnexpected, string(c))
}
return l.readOperator()
default:
return l.defaultNext(c)
}
}
func (l *Lexer) defaultNext(c byte) *Token {
if c == '.' {
// Could be number or dereference
if l.last == nil || l.last.Kind == TokenKindSeparator || l.last.Kind == TokenKindStartGroup || l.last.Kind == TokenKindStartIndex || l.last.Kind == TokenKindStartParameters || l.last.Kind == TokenKindLogicalOperator {
return l.readNumber()
}
l.index++
return l.createToken(TokenKindDereference, ".")
}
if c == '-' || c == '+' || unicode.IsDigit(rune(c)) {
return l.readNumber()
}
return l.readKeyword()
}
// Helper to create a token and update lexer state.
func (l *Lexer) createToken(kind TokenKind, raw string) *Token {
// Token order check
if !l.checkLastToken(kind, raw) {
// Illegal token sequence
return &Token{Kind: TokenKindUnexpected, Raw: raw, Index: l.index}
}
tok := &Token{Kind: kind, Raw: raw, Index: l.index}
l.last = tok
// Manage stack for grouping
switch kind {
case TokenKindStartGroup, TokenKindStartIndex, TokenKindStartParameters:
l.stack = append(l.stack, kind)
case TokenKindEndGroup, TokenKindEndIndex, TokenKindEndParameters:
if len(l.stack) > 0 {
l.stack = l.stack[:len(l.stack)-1]
}
}
return tok
}
// nil last token represented by nil
func (l *Lexer) getLastKind() *TokenKind {
var lastKind *TokenKind
if l.last != nil {
lastKind = &l.last.Kind
}
return lastKind
}
// checkLastToken verifies that the token sequence is legal based on the last token.
func (l *Lexer) checkLastToken(kind TokenKind, raw string) bool {
lastKind := l.getLastKind()
// Helper to check if lastKind is in allowed list
allowed := func(allowedKinds ...TokenKind) bool {
return lastKind != nil && slices.Contains(allowedKinds, *lastKind)
}
// For nil last, we treat as no previous token
// Define allowed previous kinds for each token kind
switch kind {
case TokenKindStartGroup:
return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartGroup, TokenKindStartParameters, TokenKindStartIndex, TokenKindLogicalOperator)
case TokenKindStartIndex:
return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindPropertyName, TokenKindNamedValue)
case TokenKindStartParameters:
return allowed(TokenKindFunction)
case TokenKindEndGroup:
return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
case TokenKindEndIndex:
return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
case TokenKindEndParameters:
return allowed(TokenKindStartParameters, TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
case TokenKindSeparator:
return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
case TokenKindWildcard:
return allowed(TokenKindStartIndex, TokenKindDereference)
case TokenKindDereference:
return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindPropertyName, TokenKindNamedValue)
case TokenKindLogicalOperator:
if raw == "!" { // "!"
return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartGroup, TokenKindStartParameters, TokenKindStartIndex, TokenKindLogicalOperator)
}
return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
case TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString:
return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartIndex, TokenKindStartGroup, TokenKindStartParameters, TokenKindLogicalOperator)
case TokenKindPropertyName:
return allowed(TokenKindDereference)
case TokenKindFunction, TokenKindNamedValue:
return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartIndex, TokenKindStartGroup, TokenKindStartParameters, TokenKindLogicalOperator)
default:
return true
}
}
// readNumber parses a numeric literal.
func (l *Lexer) readNumber() *Token {
start := l.index
periods := 0
for l.index < len(l.expr) {
ch := l.expr[l.index]
if ch == '.' {
periods++
}
if testTokenBoundary(rune(ch)) && ch != '.' {
break
}
l.index++
}
raw := l.expr[start:l.index]
if len(raw) > 2 {
switch raw[:2] {
case "0x", "0o":
tok := l.createToken(TokenKindNumber, raw)
if i, err := strconv.ParseInt(raw, 0, 32); err == nil {
tok.Value = float64(i)
return tok
}
}
}
// Try to parse as float64
var val interface{} = raw
if f, err := strconv.ParseFloat(raw, 64); err == nil {
val = f
}
tok := l.createToken(TokenKindNumber, raw)
tok.Value = val
return tok
}
// readString parses a singlequoted string literal.
func (l *Lexer) readString() *Token {
start := l.index
l.index++ // skip opening quote
var sb strings.Builder
closed := false
for l.index < len(l.expr) {
ch := l.expr[l.index]
l.index++
if ch == '\'' {
if l.index < len(l.expr) && l.expr[l.index] == '\'' {
// escaped quote
sb.WriteByte('\'')
l.index++
continue
}
closed = true
break
}
sb.WriteByte(ch)
}
raw := l.expr[start:l.index]
tok := l.createToken(TokenKindString, raw)
if closed {
tok.Value = sb.String()
} else {
tok.Kind = TokenKindUnexpected
}
return tok
}
// readOperator parses logical operators (==, !=, >, >=, etc.).
func (l *Lexer) readOperator() *Token {
start := l.index
l.index++
if l.index < len(l.expr) {
two := l.expr[start : l.index+1]
switch two {
case "!=", ">=", "<=", "==", "&&", "||":
l.index++
return l.createToken(TokenKindLogicalOperator, two)
}
}
ch := l.expr[start]
switch ch {
case '!', '>', '<':
return l.createToken(TokenKindLogicalOperator, string(ch))
}
return l.createToken(TokenKindUnexpected, string(ch))
}
// readKeyword parses identifiers, booleans, null, etc.
func (l *Lexer) readKeyword() *Token {
start := l.index
for l.index < len(l.expr) && !unicode.IsSpace(rune(l.expr[l.index])) && !strings.ContainsRune("()[],.!<>==&|*", rune(l.expr[l.index])) {
l.index++
}
raw := l.expr[start:l.index]
if l.last != nil && l.last.Kind == TokenKindDereference {
return l.createToken(TokenKindPropertyName, raw)
}
switch raw {
case "true":
tok := l.createToken(TokenKindBoolean, raw)
tok.Value = true
return tok
case "false":
tok := l.createToken(TokenKindBoolean, raw)
tok.Value = false
return tok
case "null":
return l.createToken(TokenKindNull, raw)
case "NaN":
tok := l.createToken(TokenKindNumber, raw)
tok.Value = math.NaN()
return tok
case "Infinity":
tok := l.createToken(TokenKindNumber, raw)
tok.Value = math.Inf(1)
return tok
}
if l.index < len(l.expr) && l.expr[l.index] == '(' {
return l.createToken(TokenKindFunction, raw)
}
return l.createToken(TokenKindNamedValue, raw)
}
// Flag constants only V1 is used for now.
const FlagV1 = 1
// UnclosedTokens returns the stack of unclosed start tokens.
func (l *Lexer) UnclosedTokens() []TokenKind {
return l.stack
}