Lenguajes Formales

Definir un lenguaje de programación, elementos e implementación realizados con Swift.

 ::= <> Backus-Naur Form

 Context Free <A> ::= 0 | <A><B>
 A => 0
 A => AB

 Is a set of production rules that describe all posible strings,
 defined as simple replacements

  ::= "Separate replacement symbols"

 Terminal -> Simple replacement, only one rule
 <NON TERMINAL> -> Multiple replacement, set of rules
 with context free (position of symbols in text)
 
 <letter>   ::= A-Z,a-z
 <var>      ::= [<letter>]
 <var>      ::= <E>
 <Number>   ::= <Decimal>
 <Operator> ::= + | - | * | /
 <Decimal>  ::= 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | <Decimal <Decimal>

 <E> ::= <Number> | <Number> <Operator> <E> | (<E>)

 Recursive Decent Parsing (no left recursion)
 Parser without backtracking, allows to decide which production to use by examining only the next n tokens of input.

Utilidades

Utilidades para los nodos

import Foundation

var identifiers: [String: Float] = [:]
 
extension Float: Node {
    public func interpret() throws -> Float {
        return self
    }
}
 
extension String: Node {
    public func interpret() throws -> Float {
        guard let value = identifiers[self] else {
            throw Parser.Error.undefined(self)
        }

        return value
    }
}
 
extension String {
    func getPrefix(regex: String) -> String? {
        let expression = try! NSRegularExpression(pattern: "^\(regex)", options: [])
        let range = expression.rangeOfFirstMatch(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count))
        
        if range.location == 0 {
            return (self as NSString).substring(with: range)
        }
        return nil
    }
    
    mutating func trimLeadingWhitespace() {
        let i = startIndex

        while i < endIndex {
            guard CharacterSet.whitespacesAndNewlines.contains(self[i].unicodeScalars.first!) else {
                return
            }
            self.remove(at: i)
        }
    }
}

Programa Principal

Varios ejemplos para probar el Lexer y Parser en base a las reglas de producción y post-producción.

//let code = "(4+2)*4+6" //30
//let code = "(4/2)*4+2" //10

let code = "4+2*4+6" //18 with precedence, 30 no precedence

/*
var code = """
    var a = 4
    var b = 2
    var c = 6
    var z = a + b * a + c
"""
*/
 
let lexer = Lexer(code: code)
let parser = Parser(tokens: lexer.tokens)
 
do {
    let ast = try parser.parse()

    print(try ast.interpret())
}
catch {
    print((error as? Parser.Error)!)
}

Token

Cada token representa un patrón dentro de la cadena de caracteres, tipos de dato, símbolos, identificadores, etc.

import Foundation
 
public enum Token {
    case `var`
    case equals
    case op(String)
    case number(Int)
    case parenthesesOpen
    case parenthesesClose
    case identifier(String)
    
    typealias Generator = (String) -> Token?
    
    static var generators: [String: Generator] {
        return [
            "\\=": { _ in .equals },
            "\\+|\\-|\\*|\\/": { .op($0) },
            "\\(": { _ in .parenthesesOpen },
            "\\)": { _ in .parenthesesClose },
            "\\-?([1-9][0-9]*)": { .number(Int($0)!) },
            "[a-zA-Z][a-zA-Z_0-9]*": {
                guard $0 != "var" else { return .var }
                return .identifier($0)
            },
            //"\\-?([1-9]*\\.[0-9]+|[0-9]+)": { .number(Float($0)!) } //FLOAT
        ]
    }
}

Lexer

Procesa el string que recibe cómo argumento y genera una colección de tokens.

import Foundation

public class Lexer {
    public let tokens: [Token]
    
    public init(code: String) {
        var code = code
        code.trimLeadingWhitespace()
        
        var tokens: [Token] = []
 
        while let next = Lexer.getNextPrefix(code: code) {
            let (regex, prefix) = next
            
            code = String(code[prefix.endIndex...])
            code.trimLeadingWhitespace()
            
            guard let generator = Token.generators[regex],
                let token = generator(prefix) else {
                fatalError()
            }
            tokens.append(token)
        }
        self.tokens = tokens
    }
    
    public static func getNextPrefix(code: String) -> (regex: String, prefix: String)? {
        let keyValue = Token.generators
            .first(where: { regex, generator in
                code.getPrefix(regex: regex) != nil
            })
        
        guard let regex = keyValue?.key,
            keyValue?.value != nil else {
            return nil
        }
        
        return (regex, code.getPrefix(regex: regex)!)
    }
}

Parser

A partir de la colección de tokens el parser genera un árbol que mantiene la estructura sintática de nuestro lenguaje de programación de manera abstracta. A este nivel se devuelve ya información sobre errores detectados (sintaxis, declaración de variables).

import Foundation
 
public class Parser {
    enum Error: Swift.Error {
        case expected(String)
        case undefined(String)
        case expectedNumber
        case expectedOperator
        case expectedIdentifier
        case expectedExpression
    }
    
    var index = 0
    let tokens: [Token]
    
    var canPop: Bool {
        return index < tokens.count
    }
    
    public init(tokens: [Token]) {
        self.tokens = tokens
    }
    
    func peek() -> Token {
        return tokens[index]
    }
    
    func peekPrecedence() throws -> Int {
        guard canPop, case let .op(op) = peek() else {
            return -1
        }
        
        switch op {
            case "+", "-": return 1
            case "*", "/": return 2
            default: return 0
        }
    }
    
    func popToken() -> Token {
        let token = tokens[index]
        
        index += 1
        
        return token
    }
    
    func parseNumber() throws -> Float {
        guard case let .number(float) = popToken() else {
            throw Error.expectedNumber
        }
        
        return Float(float)
    }
    
    func parseParentheses() throws -> Node {
        guard case .parenthesesOpen = popToken() else {
            throw Error.expected("(")
        }
        
        let expressionNode = try parseExpression()
        
        guard case .parenthesesClose = popToken() else {
            throw Error.expected(")")
        }
        
        return expressionNode
    }
    
    func parseIdentifier() throws -> Node {
        guard case let .identifier(name) = popToken() else {
            throw Error.expectedIdentifier
        }
        
        return name
    }
    
    func parseValue() throws -> Node {
        switch (peek()) {
            case .number:
                return try parseNumber()
            case .identifier:
                return try parseIdentifier()
            case .parenthesesOpen:
                return try parseParentheses()
            default:
                throw Error.expected("<Expression> not valid")
        }
    }
    
    public func parseExpression() throws -> Node {
        guard canPop else { throw Error.expectedExpression }
        
        let node = try parseValue()
        
        return try ast(node: node)
    }
    
    func parseVariableDeclaration() throws -> Node {
        guard case .var = popToken() else {
            throw Error.expected("\"var\" in variable declaration")
        }

        guard case let .identifier(name) = popToken() else {
            throw Error.expectedIdentifier
        }

        guard case .equals = popToken() else {
            throw Error.expected("= not found for variable <\(name)>")
        }
        
        let expression = try parseExpression()
        
        return VariableDeclaration(name: name, value: expression)
    }
    
    func parse() throws -> Node {
        var nodes: [Node] = []
        
        while canPop {
            let token = peek()
            
            switch token {
                case .var:
                    let declaration = try parseVariableDeclaration()
                    nodes.append(declaration)
                default:
                    let expression = try parseExpression()
                    nodes.append(expression)
            }
        }
        return Block(nodes: nodes)
    }
    
    func ast(node: Node, nodePrecedence: Int = 0) throws -> Node { //Abstract Syntax Tree
        var leftNode = node

        while let precedence = try peekPrecedence() as Int?, precedence >= nodePrecedence {
            guard case let .op(op) = popToken() else {
                throw Error.expectedOperator
            }
            
            let rightNode = try parseValue()
            
            /*

            // **************************  PRECEDENCE **************************   //
            var rightNode = try parseValue()
            let nextPrecedence = try peekPrecedence()
            
            if precedence < nextPrecedence {
                rightNode = try ast(node: rightNode, nodePrecedence: precedence + 1)
            }

            // **************************  PRECEDENCE **************************   //
            */
            
            leftNode = InfixOperation(lhs: leftNode, rhs: rightNode, op: op)
        }
        
        return leftNode
    }
}

El parser devuelve un AST (Abstract syntax tree) el mismo que debe ser interpretado, este árbol está compuesto por nodos

Nodo

Los nodos definen la estructura deseada tanto para la declaración de variables con un keyword o para las distintas operaciones INFIX que se caracterizan por un operador entre operandos 2+2 y el manejo de bloques (1+2) – 2 que puede contener varias operaciones de cada lado del operador.

import Foundation
 
public protocol Node {
    func interpret() throws -> Float
}
 
struct VariableDeclaration: Node {
    let name: String
    let value: Node
 
    func interpret() throws -> Float {
        let val = try value.interpret()
        identifiers[name] = val
        return val
    }
}
 
public struct InfixOperation: Node {
    let lhs: Node
    let rhs: Node
    let op: String
    
    public func interpret() throws -> Float {
        let left = try lhs.interpret()
        let right = try rhs.interpret()
        
        switch op {
            case "+": return left + right
            case "-": return left - right
            case "*": return left * right
            case "+/": return left / right
            default: return 0
        }
    }
}
 
// Node wrapper around multiple nodes
struct Block: Node {
    let nodes: [Node]
    
    func interpret() throws -> Float {
        for line in nodes[0..<(nodes.endIndex - 1)] {
            _ = try line.interpret()
        }

        guard let last = nodes.last else {
            throw Parser.Error.expectedExpression
        }
        
        // All nodes have been interpreted before returning
        return try last.interpret()
    }
}

Intérprete

Este implementa operaciones aritméticas y el orden de las expresiones en base a la prioridad de las mismas al momento de interpretar las operaciones aritméticas.