Replace expressions engine (#133)

2026-03-24 07:45:02 +01:00 · 2025-10-06 13:53:15 +02:00
parent 418c708bb0
commit 82dccc7820
40 changed files with 6876 additions and 1304 deletions
--- a/internal/expr/lexer.go
+++ b/internal/expr/lexer.go
@@ -0,0 +1,361 @@
+package workflow
+
+import (
+	"math"
+	"slices"
+	"strconv"
+	"strings"
+	"unicode"
+)
+
+// TokenKind represents the type of token returned by the lexer.
+// The values mirror the C# TokenKind enum.
+//
+// Note: The names are kept identical to the C# implementation for
+// easier mapping when porting the parser.
+//
+// The lexer is intentionally simple – it only tokenises the subset of
+// expressions that are used in GitHub Actions workflow `if:` expressions.
+// It does not evaluate the expression – that is left to the parser.
+
+type TokenKind int
+
+const (
+	TokenKindStartGroup TokenKind = iota
+	TokenKindStartIndex
+	TokenKindEndGroup
+	TokenKindEndIndex
+	TokenKindSeparator
+	TokenKindDereference
+	TokenKindWildcard
+	TokenKindLogicalOperator
+	TokenKindNumber
+	TokenKindString
+	TokenKindBoolean
+	TokenKindNull
+	TokenKindPropertyName
+	TokenKindFunction
+	TokenKindNamedValue
+	TokenKindStartParameters
+	TokenKindEndParameters
+	TokenKindUnexpected
+)
+
+// Token represents a single lexical token.
+// Raw holds the original text, Value holds the parsed value when applicable.
+// Index is the start position in the source string.
+//
+// The struct is intentionally minimal – it only contains what the parser
+// needs. If you need more information (e.g. token length) you can add it.
+
+type Token struct {
+	Kind  TokenKind
+	Raw   string
+	Value interface{}
+	Index int
+}
+
+// Lexer holds the state while tokenising an expression.
+// It is a direct port of the C# LexicalAnalyzer.
+//
+// Flags can be used to enable/disable features – for now we only support
+// a single flag that mirrors ExpressionFlags.DTExpressionsV1.
+//
+// The lexer is not thread‑safe – reuse a single instance per expression.
+
+type Lexer struct {
+	expr  string
+	flags int
+	index int
+	last  *Token
+	stack []TokenKind // unclosed start tokens
+}
+
+// NewLexer creates a new lexer for the given expression.
+func NewLexer(expr string, flags int) *Lexer {
+	return &Lexer{expr: expr, flags: flags}
+}
+
+func testTokenBoundary(c rune) bool {
+	switch c {
+	case '(', '[', ')', ']', ',', '.',
+		'!', '>', '<', '=', '&', '|':
+		return true
+	default:
+		return unicode.IsSpace(c)
+	}
+}
+
+// Next returns the next token or nil if the end of the expression is reached.
+func (l *Lexer) Next() *Token {
+	// Skip whitespace
+	for l.index < len(l.expr) && unicode.IsSpace(rune(l.expr[l.index])) {
+		l.index++
+	}
+	if l.index >= len(l.expr) {
+		return nil
+	}
+
+	c := l.expr[l.index]
+	switch c {
+	case '(':
+		l.index++
+		// Function call or logical grouping
+		if l.last != nil && l.last.Kind == TokenKindFunction {
+			return l.createToken(TokenKindStartParameters, "(")
+		}
+		if l.flags&FlagV1 != 0 {
+			// V1 does not support grouping – treat as unexpected
+			return l.createToken(TokenKindUnexpected, "(")
+		}
+		return l.createToken(TokenKindStartGroup, "(")
+	case '[':
+		l.index++
+		return l.createToken(TokenKindStartIndex, "[")
+	case ')':
+		l.index++
+		if len(l.stack) > 0 && l.stack[len(l.stack)-1] == TokenKindStartParameters {
+			return l.createToken(TokenKindEndParameters, ")")
+		}
+		return l.createToken(TokenKindEndGroup, ")")
+	case ']':
+		l.index++
+		return l.createToken(TokenKindEndIndex, "]")
+	case ',':
+		l.index++
+		return l.createToken(TokenKindSeparator, ",")
+	case '*':
+		l.index++
+		return l.createToken(TokenKindWildcard, "*")
+	case '\'':
+		return l.readString()
+	case '!', '>', '<', '=', '&', '|':
+		if l.flags&FlagV1 != 0 {
+			l.index++
+			return l.createToken(TokenKindUnexpected, string(c))
+		}
+		return l.readOperator()
+	default:
+		return l.defaultNext(c)
+	}
+}
+
+func (l *Lexer) defaultNext(c byte) *Token {
+	if c == '.' {
+		// Could be number or dereference
+		if l.last == nil || l.last.Kind == TokenKindSeparator || l.last.Kind == TokenKindStartGroup || l.last.Kind == TokenKindStartIndex || l.last.Kind == TokenKindStartParameters || l.last.Kind == TokenKindLogicalOperator {
+			return l.readNumber()
+		}
+		l.index++
+		return l.createToken(TokenKindDereference, ".")
+	}
+	if c == '-' || c == '+' || unicode.IsDigit(rune(c)) {
+		return l.readNumber()
+	}
+	return l.readKeyword()
+}
+
+// Helper to create a token and update lexer state.
+func (l *Lexer) createToken(kind TokenKind, raw string) *Token {
+	// Token order check
+	if !l.checkLastToken(kind, raw) {
+		// Illegal token sequence
+		return &Token{Kind: TokenKindUnexpected, Raw: raw, Index: l.index}
+	}
+	tok := &Token{Kind: kind, Raw: raw, Index: l.index}
+	l.last = tok
+	// Manage stack for grouping
+	switch kind {
+	case TokenKindStartGroup, TokenKindStartIndex, TokenKindStartParameters:
+		l.stack = append(l.stack, kind)
+	case TokenKindEndGroup, TokenKindEndIndex, TokenKindEndParameters:
+		if len(l.stack) > 0 {
+			l.stack = l.stack[:len(l.stack)-1]
+		}
+	}
+	return tok
+}
+
+// nil last token represented by nil
+func (l *Lexer) getLastKind() *TokenKind {
+	var lastKind *TokenKind
+	if l.last != nil {
+		lastKind = &l.last.Kind
+	}
+	return lastKind
+}
+
+// checkLastToken verifies that the token sequence is legal based on the last token.
+func (l *Lexer) checkLastToken(kind TokenKind, raw string) bool {
+	lastKind := l.getLastKind()
+
+	// Helper to check if lastKind is in allowed list
+	allowed := func(allowedKinds ...TokenKind) bool {
+		return lastKind != nil && slices.Contains(allowedKinds, *lastKind)
+	}
+	// For nil last, we treat as no previous token
+	// Define allowed previous kinds for each token kind
+	switch kind {
+	case TokenKindStartGroup:
+		return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartGroup, TokenKindStartParameters, TokenKindStartIndex, TokenKindLogicalOperator)
+	case TokenKindStartIndex:
+		return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindPropertyName, TokenKindNamedValue)
+	case TokenKindStartParameters:
+		return allowed(TokenKindFunction)
+	case TokenKindEndGroup:
+		return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
+	case TokenKindEndIndex:
+		return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
+	case TokenKindEndParameters:
+		return allowed(TokenKindStartParameters, TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
+	case TokenKindSeparator:
+		return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
+	case TokenKindWildcard:
+		return allowed(TokenKindStartIndex, TokenKindDereference)
+	case TokenKindDereference:
+		return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindPropertyName, TokenKindNamedValue)
+	case TokenKindLogicalOperator:
+		if raw == "!" { // "!"
+			return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartGroup, TokenKindStartParameters, TokenKindStartIndex, TokenKindLogicalOperator)
+		}
+		return allowed(TokenKindEndGroup, TokenKindEndParameters, TokenKindEndIndex, TokenKindWildcard, TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString, TokenKindPropertyName, TokenKindNamedValue)
+	case TokenKindNull, TokenKindBoolean, TokenKindNumber, TokenKindString:
+		return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartIndex, TokenKindStartGroup, TokenKindStartParameters, TokenKindLogicalOperator)
+	case TokenKindPropertyName:
+		return allowed(TokenKindDereference)
+	case TokenKindFunction, TokenKindNamedValue:
+		return lastKind == nil || allowed(TokenKindSeparator, TokenKindStartIndex, TokenKindStartGroup, TokenKindStartParameters, TokenKindLogicalOperator)
+	default:
+		return true
+	}
+}
+
+// readNumber parses a numeric literal.
+func (l *Lexer) readNumber() *Token {
+	start := l.index
+	periods := 0
+	for l.index < len(l.expr) {
+		ch := l.expr[l.index]
+		if ch == '.' {
+			periods++
+		}
+		if testTokenBoundary(rune(ch)) && ch != '.' {
+			break
+		}
+		l.index++
+	}
+	raw := l.expr[start:l.index]
+	if len(raw) > 2 {
+		switch raw[:2] {
+		case "0x", "0o":
+			tok := l.createToken(TokenKindNumber, raw)
+			if i, err := strconv.ParseInt(raw, 0, 32); err == nil {
+				tok.Value = float64(i)
+				return tok
+			}
+		}
+	}
+	// Try to parse as float64
+	var val interface{} = raw
+	if f, err := strconv.ParseFloat(raw, 64); err == nil {
+		val = f
+	}
+	tok := l.createToken(TokenKindNumber, raw)
+	tok.Value = val
+	return tok
+}
+
+// readString parses a single‑quoted string literal.
+func (l *Lexer) readString() *Token {
+	start := l.index
+	l.index++ // skip opening quote
+	var sb strings.Builder
+	closed := false
+	for l.index < len(l.expr) {
+		ch := l.expr[l.index]
+		l.index++
+		if ch == '\'' {
+			if l.index < len(l.expr) && l.expr[l.index] == '\'' {
+				// escaped quote
+				sb.WriteByte('\'')
+				l.index++
+				continue
+			}
+			closed = true
+			break
+		}
+		sb.WriteByte(ch)
+	}
+	raw := l.expr[start:l.index]
+	tok := l.createToken(TokenKindString, raw)
+	if closed {
+		tok.Value = sb.String()
+	} else {
+		tok.Kind = TokenKindUnexpected
+	}
+	return tok
+}
+
+// readOperator parses logical operators (==, !=, >, >=, etc.).
+func (l *Lexer) readOperator() *Token {
+	start := l.index
+	l.index++
+	if l.index < len(l.expr) {
+		two := l.expr[start : l.index+1]
+		switch two {
+		case "!=", ">=", "<=", "==", "&&", "||":
+			l.index++
+			return l.createToken(TokenKindLogicalOperator, two)
+		}
+	}
+	ch := l.expr[start]
+	switch ch {
+	case '!', '>', '<':
+		return l.createToken(TokenKindLogicalOperator, string(ch))
+	}
+	return l.createToken(TokenKindUnexpected, string(ch))
+}
+
+// readKeyword parses identifiers, booleans, null, etc.
+func (l *Lexer) readKeyword() *Token {
+	start := l.index
+	for l.index < len(l.expr) && !unicode.IsSpace(rune(l.expr[l.index])) && !strings.ContainsRune("()[],.!<>==&|*", rune(l.expr[l.index])) {
+		l.index++
+	}
+	raw := l.expr[start:l.index]
+	if l.last != nil && l.last.Kind == TokenKindDereference {
+		return l.createToken(TokenKindPropertyName, raw)
+	}
+	switch raw {
+	case "true":
+		tok := l.createToken(TokenKindBoolean, raw)
+		tok.Value = true
+		return tok
+	case "false":
+		tok := l.createToken(TokenKindBoolean, raw)
+		tok.Value = false
+		return tok
+	case "null":
+		return l.createToken(TokenKindNull, raw)
+	case "NaN":
+		tok := l.createToken(TokenKindNumber, raw)
+		tok.Value = math.NaN()
+		return tok
+	case "Infinity":
+		tok := l.createToken(TokenKindNumber, raw)
+		tok.Value = math.Inf(1)
+		return tok
+	}
+	if l.index < len(l.expr) && l.expr[l.index] == '(' {
+		return l.createToken(TokenKindFunction, raw)
+	}
+	return l.createToken(TokenKindNamedValue, raw)
+}
+
+// Flag constants – only V1 is used for now.
+const FlagV1 = 1
+
+// UnclosedTokens returns the stack of unclosed start tokens.
+func (l *Lexer) UnclosedTokens() []TokenKind {
+	return l.stack
+}