// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // Expression parsing using Pratt parser (precedence climbing). // // Harbour's operator precedence from harbour.y: // POST < ASSIGN(right) < OR(right) < AND(right) < NOT(right) < // COMPARE(right) < ADD < MUL < POWER < UNARY < PRE < ALIAS/MACRO // // Key Harbour quirks: // - '=' is BOTH assignment (in statement context) and equality (in expression) // - Most operators are right-associative (unlike C) // - (expr)->field for dynamic alias // - &variable for macro package parser import ( "five/compiler/ast" "five/compiler/token" "strings" ) // parseExpr parses an expression using Pratt parsing. func (p *Parser) parseExpr() ast.Expr { return p.parseBinaryExpr(token.PrecAssign) } // parseBinaryExpr parses binary expressions with precedence climbing. // tsgo pattern: GetBinaryOperatorPrecedence (ref/typescript-go/internal/ast/precedence.go:338) func (p *Parser) parseBinaryExpr(minPrec token.Precedence) ast.Expr { left := p.parseUnaryExpr() for { prec := token.GetBinaryPrecedence(p.current.Kind) if prec < minPrec { break } op := p.advance() // Right-associative: use same precedence for right side // Left-associative: use precedence+1 for right side nextPrec := prec + 1 if token.IsRightAssociative(op.Kind) { nextPrec = prec } right := p.parseBinaryExpr(nextPrec) // Assignment operators → AssignExpr if isAssignOp(op.Kind) { left = &ast.AssignExpr{ Left: left, OpPos: op.Pos, Op: op.Kind, Right: right, } } else { left = &ast.BinaryExpr{ Left: left, OpPos: op.Pos, Op: op.Kind, Right: right, } } } return left } func isAssignOp(k token.Kind) bool { switch k { case token.ASSIGN, token.PLUSEQ, token.MINUSEQ, token.STAREQ, token.SLASHEQ, token.PERCENTEQ, token.POWEREQ: return true } return false } // parseUnaryExpr parses prefix unary expressions. func (p *Parser) parseUnaryExpr() ast.Expr { switch p.current.Kind { case token.MINUS: op := p.advance() x := p.parseUnaryExpr() return &ast.UnaryExpr{OpPos: op.Pos, Op: token.MINUS, X: x} case token.PLUS: p.advance() // unary plus — no-op, just parse the operand return p.parseUnaryExpr() case token.NOT: op := p.advance() x := p.parseUnaryExpr() return &ast.UnaryExpr{OpPos: op.Pos, Op: token.NOT, X: x} case token.INC: op := p.advance() x := p.parseUnaryExpr() return &ast.UnaryExpr{OpPos: op.Pos, Op: token.INC, X: x} case token.DEC: op := p.advance() x := p.parseUnaryExpr() return &ast.UnaryExpr{OpPos: op.Pos, Op: token.DEC, X: x} case token.AT: op := p.advance() x := p.parseUnaryExpr() return &ast.RefExpr{AtPos: op.Pos, X: x} case token.ARROW_LEFT: // <- ch (channel receive as expression) pos := p.advance().Pos ch := p.parsePostfixExpr() return &ast.ChanRecvExpr{ArrowPos: pos, Chan: ch} case token.ASYNC_KW: // ASYNC expr — launch async, return future pos := p.advance().Pos call := p.parsePostfixExpr() return &ast.AsyncExpr{AsyncPos: pos, Call: call} case token.AWAIT_KW: // AWAIT future — wait for result pos := p.advance().Pos future := p.parsePostfixExpr() return &ast.AwaitExpr{AwaitPos: pos, Future: future} default: return p.parsePostfixExpr() } } // parsePostfixExpr parses postfix operations: function calls, method sends, // array indexing, postfix ++/--, and alias-> access. func (p *Parser) parsePostfixExpr() ast.Expr { x := p.parsePrimaryExpr() for { switch p.current.Kind { case token.LPAREN: // Function call: x(args...) lp := p.advance().Pos var args []ast.Expr if !p.at(token.RPAREN) { args = p.parseExprList() } rp := p.expect(token.RPAREN).Pos x = &ast.CallExpr{Func: x, LParen: lp, Args: args, RParen: rp} case token.LBRACKET: // Array index: x[index], multi-dim x[i, j], or slice x[low:high] lb := p.advance().Pos // Check for slice syntax: x[:high], x[low:high], x[low:] // Detect by scanning ahead for : before ] if p.isSliceSyntax() { var low, high ast.Expr if !p.at(token.COLON) { low = p.parseSliceIndex() } p.expect(token.COLON) if !p.at(token.RBRACKET) { high = p.parseSliceIndex() } rb := p.expect(token.RBRACKET).Pos x = &ast.SliceExpr{X: x, LBracket: lb, Low: low, High: high, RBracket: rb} continue } // Normal array index index := p.parseExpr() rb := token.Position{} // Multi-dimensional: a[3, 2] → a[3][2] for p.match(token.COMMA) { rb = p.current.Pos x = &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb} index = p.parseExpr() lb = rb } rb = p.expect(token.RBRACKET).Pos x = &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb} case token.QMARK: // Nil-safe send: x?:method or x?:method(args...) if p.peekAt(1) == token.COLON { p.advance() // consume ? qpos := p.advance().Pos // consume : methodName := p.expectMethodName().Literal var args []ast.Expr hasParens := false if p.at(token.LPAREN) { hasParens = true p.advance() if !p.at(token.RPAREN) { args = p.parseExprList() } p.expect(token.RPAREN) } x = &ast.NilSafeExpr{X: x, QPos: qpos, Method: methodName, Args: args, HasParens: hasParens} } else { return x // bare ? is QOut, not postfix } case token.COLON: // Method send: x:method or x:method(args...) colonPos := p.advance().Pos var methodName string var macroMethod ast.Expr if p.current.Kind == token.AMPERSAND { // x:¯o — dynamic method macroMethod = p.parseMacro() } else { // Accept keywords as method names (end, delete, home, etc.) methodName = p.expectMethodName().Literal } // Check for call: x:method(args...) var args []ast.Expr var lp, rp token.Position hasParens := false if p.at(token.LPAREN) { hasParens = true lp = p.advance().Pos if !p.at(token.RPAREN) { args = p.parseExprList() } rp = p.expect(token.RPAREN).Pos } x = &ast.SendExpr{ Object: x, ColonPos: colonPos, Method: methodName, MacroMethod: macroMethod, HasParens: hasParens, LParen: lp, Args: args, RParen: rp, } case token.ARROW: // Alias access: x->field or (expr)->field arrowPos := p.advance().Pos field := p.parsePrimaryExpr() x = &ast.AliasExpr{Alias: x, ArrowPos: arrowPos, Field: field} case token.INC: // Postfix increment: x++ opPos := p.advance().Pos x = &ast.PostfixExpr{X: x, OpPos: opPos, Op: token.INC} case token.DEC: // Postfix decrement: x-- opPos := p.advance().Pos x = &ast.PostfixExpr{X: x, OpPos: opPos, Op: token.DEC} case token.COLONCOLON: // ::name — Self access (consumed as postfix of implicit Self) // This shouldn't happen here normally; :: is handled in primary return x case token.DOT: // Package member access: pkg.Func or obj.Field // Accept any token with literal (keywords like Index, Count, etc.) if p.peekLitAt(1) != "" { dotPos := p.advance().Pos // consume . member := p.advance() // consume member name x = &ast.DotExpr{X: x, DotPos: dotPos, Member: member.Literal} } else { return x } default: return x } } } // parsePrimaryExpr parses primary expressions (atoms). func (p *Parser) parsePrimaryExpr() ast.Expr { switch p.current.Kind { case token.INT, token.LONG, token.DOUBLE, token.STRING, token.DATE_LIT, token.TRUE, token.FALSE, token.NIL_LIT: tok := p.advance() return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: tok.Kind, Value: tok.Literal} case token.COLONCOLON: // ::name or ::name() or ::name(args) pos := p.advance().Pos if p.at(token.IDENT) || p.current.Literal != "" { name := p.advance() self := &ast.SelfExpr{ColonPos: pos} // Check for () — method call hasParens := false var args []ast.Expr var lp, rp token.Position if p.at(token.LPAREN) { hasParens = true lp = p.advance().Pos if !p.at(token.RPAREN) { args = p.parseExprList() } rp = p.expect(token.RPAREN).Pos } return &ast.SendExpr{ Object: self, ColonPos: pos, Method: name.Literal, HasParens: hasParens, LParen: lp, Args: args, RParen: rp, } } return &ast.SelfExpr{ColonPos: pos} case token.LPAREN: // Parenthesized expression, comma sequence (a,b,c), or (alias)->field // Harbour comma sequence: (expr1, expr2, ...) evaluates all, returns last. // Earlier expressions evaluated for side effects only (Harbour behavior). p.advance() expr := p.parseExpr() for p.match(token.COMMA) { expr = p.parseExpr() // keeps last value — matches Harbour semantics } p.expect(token.RPAREN) return expr case token.IF: // if(cond, true, false) — inline IF = IIF if p.peekAt(1) == token.LPAREN { return p.parseIIF() } // Otherwise fall through to error p.error("expected expression, got IF") tok := p.advance() return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: token.NIL_LIT, Value: "NIL"} case token.IDENT: // Check for IIF(cond, true, false) if strings.ToUpper(p.current.Literal) == "IIF" { return p.parseIIF() } // f"Hello {name}" — string interpolation if p.current.Literal == "f" && p.peekAt(1) == token.STRING { return p.parseInterpolatedString() } tok := p.advance() return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal} case token.AMPERSAND: return p.parseMacro() case token.COLON: // :field — WITH OBJECT send (bare colon prefix) // Treat as self-send: withObj:field pos := p.advance().Pos // consume : if p.at(token.IDENT) || p.current.Literal != "" { name := p.advance() return &ast.SendExpr{ Object: &ast.IdentExpr{NamePos: pos, Name: "__withObject"}, ColonPos: pos, Method: name.Literal, } } return &ast.IdentExpr{NamePos: pos, Name: "__withObject"} case token.LBRACE: return p.parseArrayOrBlock() default: // Keywords used as identifiers in expression context: // 1. Followed by ( → function call: Set(), Type(), Select() // 2. Keywords that can appear as variable/field names: TO, DATA, FIELD, ON, etc. if p.current.Literal != "" { if p.peekAt(1) == token.LPAREN { tok := p.advance() return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal} } // Allow certain keywords as bare identifiers in expression context switch p.current.Kind { case token.TO, token.DATA, token.FIELD, token.IN, token.FROM, token.WHILE, token.EACH, token.ENDDO, token.END, token.NEXT, token.RECOVER, token.SEQUENCE, token.GO, token.GOTO, token.MEMVAR, token.ALIAS, token.WITH, token.ON, token.STEP, token.DESCENDING, token.UNIQUE, token.DELETE_KW, token.RECALL, token.PACK, token.ZAP, token.TYPE_KW, token.CLASS, token.DECLARE, token.INLINE_KW, token.CASE, token.OTHERWISE, token.ENDCASE, token.BEGIN, token.DO, token.ENDIF, token.FOR, token.IF, token.SWITCH, token.RETURN, token.EXIT, token.LOOP, token.LOCAL, token.PRIVATE, token.PUBLIC, token.STATIC, token.PARAMETERS, token.DESTRUCTOR, token.CONSTRUCTOR, token.OPERATOR_KW, token.FUNCTION_KW, token.PROCEDURE, token.METHOD, token.ELSEIF, token.ELSE, token.ENDCLASS, token.USING, token.ASSIGN_KW, token.ACCESS, token.APPEND, token.REPLACE, token.INDEX, token.SEEK, token.SKIP_KW, token.USE, token.SELECT, token.SET: tok := p.advance() return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal} } } p.error("expected expression, got " + p.current.Kind.String() + " " + p.current.Literal) tok := p.advance() return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: token.NIL_LIT, Value: "NIL"} } } // parseArrayOrBlock parses { ... } which can be: // {1, 2, 3} → ArrayLitExpr // {"a" => 1} → HashLitExpr // {|x| x + 1} → BlockExpr // {|| expr} → BlockExpr (no params) func (p *Parser) parseArrayOrBlock() ast.Expr { lbrace := p.expect(token.LBRACE).Pos // Code block: {|params| body} if p.at(token.PIPE) { p.advance() // consume first | var params []string if !p.at(token.PIPE) { // Parse parameter names, with optional AS type for { params = append(params, p.expectMethodName().Literal) // Skip optional AS type: AS NUMERIC, AS STRING, etc. if p.match(token.AS) { for p.current.Kind != token.PIPE && p.current.Kind != token.COMMA && p.current.Kind != token.EOF { p.advance() } } if !p.match(token.COMMA) { break } } } p.expect(token.PIPE) // closing | // Empty block body: {||} or {|x|} → body is NIL if p.at(token.RBRACE) { rbrace := p.advance().Pos nilBody := &ast.LiteralExpr{ValuePos: rbrace, Kind: token.NIL_LIT, Value: "NIL"} return &ast.BlockExpr{LBrace: lbrace, Params: params, Body: nilBody, RBrace: rbrace} } // Parse block body — may have comma-separated expressions // {|x| expr1, expr2, expr3} → all evaluated in order, last is // the return value. Earlier impl dropped intermediate exprs by // overwriting `body`, which was a silent miscompile (any // non-trailing side effect — e.g. ` := + ` in a // multi-pair SUM block — vanished). first := p.parseExpr() var seq []ast.Expr for p.match(token.COMMA) { if seq == nil { seq = []ast.Expr{first} } seq = append(seq, p.parseExpr()) } rbrace := p.expect(token.RBRACE).Pos var body ast.Expr = first if seq != nil { body = &ast.SeqExpr{Items: seq, StartAt: first.Pos(), EndAt: rbrace} } return &ast.BlockExpr{LBrace: lbrace, Params: params, Body: body, RBrace: rbrace} } // Empty: {} → empty array if p.at(token.RBRACE) { rbrace := p.advance().Pos return &ast.ArrayLitExpr{LBrace: lbrace, RBrace: rbrace} } // { ... } → variadic params array (HB_PARAM_ALL()) if p.at(token.DOT) && p.peekAt(1) == token.DOT && p.peekAt(2) == token.DOT { p.advance() // . p.advance() // . p.advance() // . rbrace := p.expect(token.RBRACE).Pos return &ast.CallExpr{ Func: &ast.IdentExpr{NamePos: lbrace, Name: "HB_AParams"}, RParen: rbrace, } } // Empty hash: {=>} → empty hash literal if p.at(token.DBLARROW) { p.advance() // consume => rbrace := p.expect(token.RBRACE).Pos return &ast.HashLitExpr{LBrace: lbrace, RBrace: rbrace} } // Handle leading comma: {, x, y} → {NIL, x, y} if p.at(token.COMMA) { var items []ast.Expr items = append(items, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"}) for p.match(token.COMMA) { if p.at(token.RBRACE) || p.at(token.COMMA) { items = append(items, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"}) } else { items = append(items, p.parseExpr()) } } rbrace := p.expect(token.RBRACE).Pos return &ast.ArrayLitExpr{LBrace: lbrace, Items: items, RBrace: rbrace} } // Parse first element to determine: array or hash first := p.parseExpr() // Hash: { key => value, ... } if p.at(token.DBLARROW) { p.advance() // consume => firstVal := p.parseExpr() keys := []ast.Expr{first} vals := []ast.Expr{firstVal} for p.match(token.COMMA) { keys = append(keys, p.parseExpr()) p.expect(token.DBLARROW) vals = append(vals, p.parseExpr()) } rbrace := p.expect(token.RBRACE).Pos return &ast.HashLitExpr{LBrace: lbrace, Keys: keys, Values: vals, RBrace: rbrace} } // Array: {expr, expr, ...} items := []ast.Expr{first} for p.match(token.COMMA) { items = append(items, p.parseExpr()) } rbrace := p.expect(token.RBRACE).Pos return &ast.ArrayLitExpr{LBrace: lbrace, Items: items, RBrace: rbrace} } // parseMacro parses &variable or &(expression). func (p *Parser) parseMacro() ast.Expr { ampPos := p.expect(token.AMPERSAND).Pos if p.at(token.LPAREN) { // &(expression) p.advance() expr := p.parseExpr() p.expect(token.RPAREN) return &ast.MacroExpr{AmpPos: ampPos, Expr: expr} } // &variable[.suffix] — variable can be a keyword name ident := p.expectMethodName() macroExpr := &ast.MacroExpr{ AmpPos: ampPos, Expr: &ast.IdentExpr{NamePos: ident.Pos, Name: ident.Literal}, } // &var.suffix — dot terminates macro, suffix is text concatenation // &var. — dot terminates macro with no suffix // &var.1 — lexer may tokenize .1 as DOUBLE if p.at(token.DOT) { p.advance() // consume . // Skip optional suffix identifier (e.g. &a.aa, &a.1) if p.current.Kind == token.IDENT || p.current.Kind == token.INT { p.advance() } } else if p.current.Kind == token.DOUBLE && (strings.HasPrefix(p.current.Literal, ".") || strings.HasPrefix(p.current.Literal, "0.")) { // Lexer tokenized .1 as DOUBLE — consume it as macro suffix p.advance() } return macroExpr } // parseIIF parses IIF(cond, trueExpr, falseExpr). func (p *Parser) parseIIF() ast.Expr { pos := p.advance().Pos // consume IIF p.expect(token.LPAREN) cond := p.parseExpr() p.expect(token.COMMA) var trueExpr ast.Expr if p.at(token.COMMA) || p.at(token.RPAREN) { trueExpr = &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"} } else { trueExpr = p.parseExpr() } p.expect(token.COMMA) var falseExpr ast.Expr if p.at(token.RPAREN) { falseExpr = &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"} } else { falseExpr = p.parseExpr() } p.expect(token.RPAREN) return &ast.IIfExpr{IfPos: pos, Cond: cond, True: trueExpr, False: falseExpr} } // parseExprList parses a comma-separated list of expressions. func (p *Parser) parseExprList() []ast.Expr { var list []ast.Expr // Handle leading empty param: f(,x) → NIL, x if p.at(token.COMMA) { list = append(list, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"}) } else { list = append(list, p.parseExpr()) } for p.match(token.COMMA) { // Empty param: f(x,,y) → x, NIL, y if p.at(token.COMMA) || p.at(token.RPAREN) || p.at(token.RBRACE) { list = append(list, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"}) } else { list = append(list, p.parseExpr()) } } return list } // isSliceSyntax checks if current position inside [...] has a : before ]. // Limited lookahead — scans at most 10 tokens (covers 99% of real cases). func (p *Parser) isSliceSyntax() bool { depth := 0 maxLook := 10 // limit scan to avoid O(n) for i := 0; i < maxLook; i++ { k := p.peekAt(i) switch k { case token.COLON: if depth == 0 { return true } case token.LBRACKET, token.LPAREN, token.LBRACE: depth++ case token.RPAREN, token.RBRACE: depth-- case token.RBRACKET: if depth == 0 { return false } depth-- case token.NEWLINE, token.EOF: return false } } return false // too complex — treat as normal index } // parseSliceIndex parses expression inside slice but stops at : and ] func (p *Parser) parseSliceIndex() ast.Expr { return p.parsePrimaryExpr() // simple: just primary (number, ident, call) } // parseInterpolatedString: f"Hello {name}, age {age}" // Parses the format string and extracts {expr} references. // Converts to: fmt.Sprintf("Hello %v, age %v", name, age) // --- Extracted helpers for expression registry --- // parsePostfixSend: x:method or x:method(args...) func (p *Parser) parsePostfixSend(x ast.Expr) ast.Expr { colonPos := p.advance().Pos var methodName string var macroMethod ast.Expr if p.current.Kind == token.AMPERSAND { macroMethod = p.parseMacro() } else { methodName = p.expectMethodName().Literal } var args []ast.Expr var lp, rp token.Position hasParens := false if p.at(token.LPAREN) { hasParens = true lp = p.advance().Pos if !p.at(token.RPAREN) { args = p.parseExprList() } rp = p.expect(token.RPAREN).Pos } return &ast.SendExpr{ Object: x, ColonPos: colonPos, Method: methodName, MacroMethod: macroMethod, HasParens: hasParens, LParen: lp, Args: args, RParen: rp, } } // parsePrimaryIdent: IDENT (variable, function ref, IIF, f-string) func (p *Parser) parsePrimaryIdent() ast.Expr { if strings.ToUpper(p.current.Literal) == "IIF" { return p.parseIIF() } if p.current.Literal == "f" && p.peekAt(1) == token.STRING { return p.parseInterpolatedString() } tok := p.advance() return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal} } // parsePrimaryWithSend: :field (WITH OBJECT bare colon) func (p *Parser) parsePrimaryWithSend() ast.Expr { pos := p.advance().Pos if p.at(token.IDENT) || p.current.Literal != "" { name := p.advance() return &ast.SendExpr{ Object: &ast.IdentExpr{NamePos: pos, Name: "__withObject"}, ColonPos: pos, Method: name.Literal, } } return &ast.IdentExpr{NamePos: pos, Name: "__withObject"} } // parsePrimarySelf: ::name or ::name(args) func (p *Parser) parsePrimarySelf() ast.Expr { pos := p.advance().Pos if p.at(token.IDENT) || p.current.Literal != "" { name := p.advance() self := &ast.SelfExpr{ColonPos: pos} hasParens := false var args []ast.Expr var lp, rp token.Position if p.at(token.LPAREN) { hasParens = true lp = p.advance().Pos if !p.at(token.RPAREN) { args = p.parseExprList() } rp = p.expect(token.RPAREN).Pos } return &ast.SendExpr{ Object: self, ColonPos: pos, Method: name.Literal, HasParens: hasParens, LParen: lp, Args: args, RParen: rp, } } return &ast.SelfExpr{ColonPos: pos} } func (p *Parser) parseInterpolatedString() ast.Expr { fPos := p.advance().Pos // consume 'f' strTok := p.expect(token.STRING) src := strTok.Literal var fmtBuf string var args []ast.Expr i := 0 for i < len(src) { if src[i] == '{' { // Find closing } j := i + 1 depth := 1 for j < len(src) && depth > 0 { if src[j] == '{' { depth++ } if src[j] == '}' { depth-- } j++ } exprStr := src[i+1 : j-1] // Check for format spec: {expr:fmt} fmtSpec := "%v" if colonIdx := strings.LastIndex(exprStr, ":"); colonIdx >= 0 { fmtSpec = "%" + exprStr[colonIdx+1:] exprStr = exprStr[:colonIdx] } fmtBuf += fmtSpec // Parse the expression inside {} // Simple: just use IdentExpr for variable names args = append(args, &ast.IdentExpr{NamePos: fPos, Name: exprStr}) i = j } else { fmtBuf += string(src[i]) i++ } } if len(args) == 0 { // No interpolation — return as plain string return &ast.LiteralExpr{ValuePos: fPos, Kind: token.STRING, Value: src} } // Build: fmt.Sprintf(fmtStr, arg1, arg2, ...) allArgs := make([]ast.Expr, 0, len(args)+1) allArgs = append(allArgs, &ast.LiteralExpr{ValuePos: fPos, Kind: token.STRING, Value: fmtBuf}) allArgs = append(allArgs, args...) return &ast.CallExpr{ Func: &ast.DotExpr{ X: &ast.IdentExpr{NamePos: fPos, Name: "fmt"}, DotPos: fPos, Member: "Sprintf", }, LParen: fPos, Args: allArgs, RParen: fPos, } }