суббота, 29 мая 2010 г.

F# Parsing simple language

During my professional career I often met the task of code generation based on some source data. Last time it was analogue of BCL compiler for ExpressionTrees and then C# compiler have done mostly all the required job with parsing expressions. It is time to invert our responsibilities and feel ourselves in the shoes of compiler. As a test animal we’ll take MiniML-like language (without binary/unary operators and with slightly modified syntax).

Parsing

Out first goal is transformation of plain character stream into the structured representation. First of all we’ll declare components of abstract syntax tree (AST)

namespace MiniML

type Ast =
| Let of string * Ast * Ast
| Letrec of string * Ast * Ast
| Var of string
| Integer of int
| Lambda of string * Ast
| Apply of Ast * Ast
| IfThenElse of Ast * Ast * Ast

With tools like fslexx/fsyacc creation of parser becomes a easy task, however today we’ll use another techique named parser combinators and define parser using F# itself. More details about this approach can be found in the documents below:

Disclaimer:Version of parser given in sample is pretty straightforward and ineffective for the sake of clearness

First of all we need to declare basic elements and combinators to create more complex parsers.

namespace MiniML

module BaseParsing =
open System

// result of parsing - either value+remaining part of char stream or evidence of failure
type ParseResult<'T> = Success of 'T * list<char> | Failed
// parser itself - function from list of chars to parsing result
type Parser<'T> = Parser of (list<char> -> ParseResult<'T>)

let apply (Parser p) s = p s
let run p l = apply p (Seq.toList l)

// wraps given value into parser keeping char stream untouched
let one v = Parser( fun cs -> Success(v, cs) )
let failed () = Parser (fun _ -> Failed)

// binds subsequent parsers
let bind p f = Parser (fun cs ->
match apply p cs with
| Success(r, cs2) -> apply (f r) cs2
| Failed -> Failed
)
// (OR combinator) applies first parser, if it fails - applies second
let (<|>) p1 p2 = Parser(fun cs ->
match apply p1 cs with
| Failed -> apply p2 cs
| ok -> ok
)
// (AND combinator) applies first parser to source stream, then applies second to remaining part of stream
let (<&>) p1 p2 = Parser(fun cs ->
match apply p1 cs with
| Success(_, cs2) -> apply p2 cs2
| Failed -> Failed
)
// applies given predicate to first symbol in the stream, on success transforms the symbol with p function
let choose f p = Parser(fun cs ->
match cs with
| c::cs2 when f c -> Success(p c, cs2)
| _ -> Failed
)
// selects digit and returns it as a result
let digit = choose Char.IsDigit id
// selects digit and returns unit
let digitU = choose Char.IsDigit ignore
// selects letter and returns it as a result
let letter = choose Char.IsLetter id
// selects digit and returns unit
let letterU = choose Char.IsLetter ignore
// selects letter or digit and returns it as a result
let letterOrDigit = choose Char.IsLetterOrDigit id
// selects digit and returns unit
let letterOrDigitU = choose Char.IsLetterOrDigit ignore
// selects given char and returns it as a result
let char c = choose ((=) c) id
// selects given char and returns unit
let charU c = choose ((=) c) ignore
// selects whitespace
let ws = choose Char.IsWhiteSpace ignore

// To enable computation expresion syntax
type ParserBuilder() =
member this.Return(v) = one v
member this.Bind(p, f) = bind p f
member this.ReturnFrom(p) = p
member this.Zero() = failed ()
let parser = ParserBuilder()

// applies given parser zero or more times. results are composed with v0 using f (in fold like manner)
let rec zeroOrMany p f v0 = parser {
return! oneOrMany p f v0 <|> one v0
}
// applies given parser one or more times. results are composed with v0 using f (in fold like manner)
and oneOrMany p f v0 = parser {
let! v1 = p
return! zeroOrMany p f (f v0 v1)
}

// repetition of given parser 0 or more times (similar to * in regexes)
let zeroOrManyU p = zeroOrMany p (fun _ _ -> ()) ()
// repetition of given parser 1 or more times (similar to + in regexes)
let oneOrManyU p = oneOrMany p (fun _ _ -> ()) ()
// transforms parser result with f function
let map p f = parser {
let! v = p
return f v
}

Now we can declare language grammar itself

namespace MiniML

module MiniMLParser =

open System
open BaseParsing

// Helpers

// checks if parsed text matches given string
let text (s : string) = Parser(fun cs ->
let rec loop l p =
if p >= s.Length then Success((), l)
else
match l with
| h::tl when h = s.[p] -> loop tl (p + 1)
| _ -> Failed
loop cs 0
)
// defines parser that consumes 0+ whitespaces + specified parser
let ws0Plus p = zeroOrManyU ws <&> p
// defines parser that consumes 1+ whitespaces + specified parser
let ws1Plus = oneOrManyU ws
// matches 0+ whitespaces + specifies string
let ws0Text s = ws0Plus (text s)
let ws1Text s = ws1Plus <&> (text s)
// matches 0+ whitespaces + specified char
let ws0Char c = ws0Plus (charU c)

// list of language keywords
let keywords = set ["let"; "letrec"; "in"; "if"; "then"; "else"]

// zero or more whitespaces + optional '-' + one or more digit
let integer = parser {
let! sign = ws0Plus ((char '-' <&> one -1) <|> (one 1))
let! value = 0 |> oneOrMany digit (fun acc v -> acc * 10 + (int v - int '0'))
return sign * value
}

// zero or more whitespaces + letter + zero or more letters or digits
// result should not be keyword
let identifier = parser {
let! f = ws0Plus letter
let sb = (new Text.StringBuilder()).Append(f)
let! r = sb |> zeroOrMany letterOrDigit (fun acc v -> acc.Append(v))
let text = r.ToString()
if not <| keywords.Contains(text)
then return text
}

let rec expr =
let parseIdentifier = map identifier Var
let parseNumber = map integer Integer

let parseLet, parseLetrec =
let p keyword f = parser {
do! ws0Text keyword <&> ws
let! id = identifier
do! ws0Char '='
let! value = expr
do! ws1Text "in"<&> ws
let! body = expr
return f(id, value, body)
}
p "let" Let, p "letrec" Letrec

let parseLambda = parser {
do! ws0Char '\\'
let! id = identifier
do! ws0Text "->"
let! body = expr
return Lambda(id, body)
}

let parseBracketed = parser {
do! ws0Char '('
let! e = expr
do! ws0Char ')'
return e
}

let parseApply = parser {
let! f = parseIdentifier <|> parseBracketed
return! f |> oneOrMany (parseIdentifier <|> parseNumber <|> parseBracketed) (fun acc e -> Apply(acc, e))
}

let parseIfThenElse = parser {
let body = parseIdentifier <|> parseBracketed <|> parseNumber

do! ws0Text "if" <&> ws
let! cond = parseIdentifier <|> parseBracketed
do! ws1Text "then" <&> ws
let! ifTrue = body
do! ws1Text "else" <&> ws
let! ifFalse = body
return IfThenElse(cond, ifTrue, ifFalse)

}

parseLet <|> parseLetrec <|> parseLambda <|> parseIfThenElse <|> parseApply <|> parseBracketed <|> parseIdentifier <|> parseNumber

let parse s = run expr s

To visualize parsing results we’ll use auxiliary function toString

open MiniML

let rec astToString = function
| Let(name, v, body) ->
sprintf "let %s =
%s in
%s" name (astToString v) (astToString body)
| Letrec(name, v, body) ->
sprintf "letrec %s =
%s in
%s" name (astToString v) (astToString body)
| Apply(arg, r) -> sprintf("(%s %s)") (astToString arg) (astToString r)
| IfThenElse(cond, ifTrue, ifFalse) ->
sprintf "if %s
then %s
else %s" (astToString cond) (astToString ifTrue) (astToString ifFalse)
| Integer(v) -> string v
| Lambda(name, body) -> sprintf "\%s -> %s" name (astToString body)
| Var(name) -> name

let parse s =
match MiniMLParser.parse s with
| BaseParsing.Success(r, []) -> printfn "%s" (astToString r)
| BaseParsing.Success(r, rest) ->
printfn "Warning: remaining chars found %A" rest
printfn "%s" (astToString r)
| BaseParsing.Failed -> printfn "Parsing failed for %s" s

let sources =
[
@"letrec length = \l -> if (null l) then 0 else (add 1 (length (tail l))) in length"
@"letrec fact = \x -> if (eq x 1) then 1 else (mul x (fact (add x -1))) in fact"
@"let f = \x -> \y -> (cons (x y) y) in f"
]

for src in sources do
parse src
(*
letrec length =
\l -> if (null l)
then 0
else ((add 1) (length (tail l))) in
length
letrec fact =
\x -> if ((eq x) 1)
then 1
else ((mul x) (fact ((add x) -1))) in
fact
let f =
\x -> \y -> ((cons (x y)) y) in
f
*)

All right, parsing step is completed but now we are facing another problem: our AST doesn’t contain any information about expression types (except the primitive numeric values). Luckily we know the answer.

Type inference

Wikipedia defines type inference as

Type inference, or implicit typing, refers to the ability to deduce automatically the type of a value in a programming language

In our compiler we’ll implement Hindley Milner type inference algorithm that is used by F# itself (as well as Haskell, Ocaml etc). Also nice and simple explanation can be found in ScalaByExample or here.

namespace MiniML

exception TypeInferenceException of Ast * string

module Types =
open System

type TypeVarName = string

// basic entity in the algorithm
type Type =
| TypeVar of TypeVarName
| Function of Type * Type
| TypeConstructor of string * Type list
override this.ToString() =
match this with
| TypeVar(name) -> name
| Function(arg, res) -> sprintf "(%O -> %O)" arg res
| TypeConstructor(name, args) ->
if List.isEmpty args
then name
else sprintf "%s[%s]" name (String.Join(", ", args |> Seq.map string))

exception UnificationException of Type * Type

// creates fresh type variables
type TypeVarGenerator() =
let n = ref 0
member this.New() =
incr n
TypeVar(sprintf "T%d" !n)

// represents mapping between types
[<AbstractClass>]
type Substitution private() =
static let empty = {new Substitution() with override this.Lookup(tv) = TypeVar(tv) }
static member Empty = empty

abstract Lookup : TypeVarName -> Type
member this.Run(t) =
match t with
| TypeVar tv ->
let substituted = this.Lookup(tv)
if t = substituted then substituted
else this.Run(substituted)
| Function(a, r) ->
Function(this.Run(a), this.Run(r))
| TypeConstructor(name, tyArgs) ->
TypeConstructor(name, tyArgs |> List.map this.Run)

member this.Extend(v : TypeVarName, t : Type) =
{ new Substitution() with override __.Lookup(tv) = if v = tv then t else this.Lookup(tv) }

// type + free type variables in type
// Instance returns new type where all free variables are replaces with fresh type vars
type TypeScheme(tyVars : Set<_>, t, gen : TypeVarGenerator) =
let subst = (Substitution.Empty, tyVars) ||> Seq.fold(fun s tt -> s.Extend(tt, gen.New() ))
let instance = subst.Run(t)
member this.Instance = instance
member this.TypeVariables = tyVars
member this.Type = t

// maps names to type schemes
// contains auxiliary functions that extracts type vars from different entities
module Env =
type Environment = Map<string, TypeScheme>

let rec typeVarsOfType = function
| TypeVar(tv) -> Set.singleton tv
| Function(a, r) -> typeVarsOfType(a) + typeVarsOfType(r)
| TypeConstructor(_, tyArgs) -> (Set.empty, tyArgs) ||> List.fold (fun acc ty -> acc + typeVarsOfType ty)

let typeVarsOfScheme(s : TypeScheme) =
(typeVarsOfType s.Type) - s.TypeVariables

let typeVarsOfEnv(e : Environment) =
let schemes = e |> Map.toSeq |> Seq.map snd
(Set.empty, schemes) ||> Seq.fold (fun acc s -> acc + (typeVarsOfScheme s))

let typeToScheme (env : Environment, t : Type, gen) =
TypeScheme((typeVarsOfType t) - (typeVarsOfEnv env), t, gen)

type Inferencer() =
let nameGenerator = TypeVarGenerator()

// calculates substitution that can convert two specified types into equivalent one
let rec unify(a : Type, b : Type, s : Substitution) =
match(s.Run(a), s.Run(b)) with
| TypeVar(ta), TypeVar(tb) when ta = tb ->
s
| TypeVar(ta), _ when not <| Env.typeVarsOfType(b).Contains(ta) ->
s.Extend(ta, b)
| _, TypeVar(_) ->
unify(b, a, s)
| Function(a1, b1), Function(a2, b2) ->
unify(a1, a2, unify(b1, b2, s))
| TypeConstructor(name1, args1), TypeConstructor(name2, args2) when name1 = name2 ->
(s, args1, args2) |||> List.fold2 (fun subst t1 t2 -> unify(t1, t2, subst))
| x,y -> UnificationException(x,y) |> raise

// predefines types and expressions
let newTypeVar = nameGenerator.New()
let newTypeScheme t = Env.typeToScheme(Map.empty, t, nameGenerator)

let boolean = TypeConstructor("bool", [])
let integer = TypeConstructor("int", [])
let list(t) = TypeConstructor("list", [t])

let builtins =
[
"true", newTypeScheme(boolean)
"false", newTypeScheme(boolean)
"nil", newTypeScheme(list(newTypeVar))
"cons", newTypeScheme(Function(newTypeVar, Function(list(newTypeVar), list(newTypeVar))))
"null", newTypeScheme(Function(list(newTypeVar), boolean))
"eq", newTypeScheme(Function(newTypeVar, Function(newTypeVar, boolean)))
"add", newTypeScheme(Function(integer, Function(integer, integer)))
"mul", newTypeScheme(Function(integer, Function(integer, integer)))
"sub", newTypeScheme(Function(integer, Function(integer, integer)))
"tail", newTypeScheme(Function(list(newTypeVar), list(newTypeVar)))
"head", newTypeScheme(Function(list(newTypeVar), newTypeVar))
] |> Map.ofList

// compute general substitution (based on s) for the pair of ast and baseType
let rec analyze (e : Env.Environment, ast, baseType : Type, s : Substitution) =
try
match ast with
| Integer(v) ->
unify(integer, baseType, s)
| Var(name) ->
if not (e.ContainsKey name)
then failwithf "Name %s no found" name

let schema = e.[name]
unify(schema.Instance, baseType, s)
| Lambda(arg, body) ->
let a = nameGenerator.New()
let b = nameGenerator.New()

let s1 = unify(baseType, Function(a, b), s)
let newEnv = e.Add(arg, TypeScheme(Set.empty, a, nameGenerator))
analyze(newEnv, body, b, s1)
| Apply(f, arg) ->
let a = nameGenerator.New()
let s1 = analyze(e, f, Function(a, baseType), s)
analyze(e, arg, a, s1)
| Let(name, inV, body) ->
let a = nameGenerator.New()
let s1 = analyze(e, inV, a, s)
analyze(e.Add(name, Env.typeToScheme(e, s1.Run(a), nameGenerator)),body, baseType, s1)
| Letrec(name, inV, body) ->
let t = nameGenerator.New()
let newEnv = e.Add(name, TypeScheme(Set.empty, t, nameGenerator))
let s1 = analyze(newEnv, inV, t, s)
analyze(e.Add(name, Env.typeToScheme(e, s1.Run(t), nameGenerator)), body, baseType, s1)

| IfThenElse(cond, ifTrue, ifFalse) ->
let s1 = analyze(e, cond, boolean, s)
let s2 = analyze(e, ifTrue, baseType, s1)
analyze(e, ifFalse, baseType, s2)
with
UnificationException(t1, t2) -> TypeInferenceException(ast, sprintf "Cannot unify %O and %O" t1 t2) |> raise

// renames type variables into good-looking ones
let alpha (t) =
let l = ref 'A'
let map = Collections.Generic.Dictionary<_, _>()
let rec run = function
| TypeVar(name) ->
if not <| map.ContainsKey(name) then
let newName = string (!l)
l := Convert.ToChar(int !l + 1)
map.Add(name, newName)
TypeVar(map.[name])
| Function(arg, res) -> Function(run arg, run res)
| TypeConstructor(name, typeArgs) -> TypeConstructor(name, List.map run typeArgs)
run t

member this.TypeOf(ast) =
let a = nameGenerator.New()
analyze(builtins, ast, a, Substitution.Empty).Run(a) |> alpha

Some samples

open MiniML

let sources =
[
@"if (1) then true else false", "incorrect condition type"
@"if true then 1 else false", "different types for conditional branches"
@"letrec fold =
\f -> \s -> \l -> if (null l) then s else (fold f (f s (head l)) (tail l)) in
fold", "generic fold"
@"letrec fold =
\f -> \s -> \l -> if (null l) then s else (fold f (f s (head l)) (tail l)) in
fold add", "partial application"
@"letrec length = \l -> if (null l) then 0 else (add 1 (length (tail l))) in length", "recursive definition1"
@"letrec fact = \x -> if (eq x 1) then 1 else (mul x (fact (add x -1))) in fact", "factorial"
@"let f = \x -> \y -> (cons (x y) y) in f", ""
]

let rec astToString = function
| Let(name, v, body) ->
sprintf "let %s =
%s in
%s" name (astToString v) (astToString body)
| Letrec(name, v, body) ->
sprintf "letrec %s =
%s in
%s" name (astToString v) (astToString body)
| Apply(arg, r) -> sprintf("(%s %s)") (astToString arg) (astToString r)
| IfThenElse(cond, ifTrue, ifFalse) ->
sprintf "if %s
then %s
else %s" (astToString cond) (astToString ifTrue) (astToString ifFalse)
| Integer(v) -> string v
| Lambda(name, body) -> sprintf "\%s -> %s" name (astToString body)
| Var(name) -> name

let parse (s, comment) =
printfn "===%s" comment
let inferencer = Types.Inferencer()
match MiniMLParser.parse s with
| BaseParsing.Success(r, []) ->
printfn "%s" (astToString r)
let text = try inferencer.TypeOf(r).ToString() with TypeInferenceException(ast, text) -> sprintf "%s: %s" (astToString ast) text
printfn "Type: %s" text
| _ -> printfn "Parsing failed for %s" s

for src in sources do
parse src
(*
===incorrect condition type
if 1
then true
else false
Type: 1: Cannot unify int and bool
===different types for conditional branches
if true
then 1
else false
Type: false: Cannot unify bool and int
===generic fold
letrec fold =
\f -> \s -> \l -> if (null l)
then s
else (((fold f) ((f s) (head l))) (tail l)) in
fold
Type: ((A -> (B -> A)) -> (A -> (list[B] -> A)))
===partial application
letrec fold =
\f -> \s -> \l -> if (null l)
then s
else (((fold f) ((f s) (head l))) (tail l)) in
(fold add)
Type: (int -> (list[int] -> int))
===recursive definition1
letrec length =
\l -> if (null l)
then 0
else ((add 1) (length (tail l))) in
length
Type: (list[A] -> int)
===factorial
letrec fact =
\x -> if ((eq x) 1)
then 1
else ((mul x) (fact ((add x) -1))) in
fact
Type: (int -> int)
===
let f =
\x -> \y -> ((cons (x y)) y) in
f
Type: ((list[A] -> A) -> (list[A] -> list[A]))
*)

Notes:

  1. Substutution is principal of the algorithm. It is function that maps type variables in given type to other types. Base substitution is Empty that is simple identity mapping. Inferencer.unify method creates general substitution that
    • Extend source substitution
    • Can convert two specified type into equivalent one
  2. TypeScheme describes structure of the type. It contains type itself and all type variables that is free in type (not captured from environment)
  3. Type inference mechanism relies on known information about built-in basic types (defined by builtins map)
  4. After type inference process is completed we perform alpha conversion for type variables to replace temporary names with nice looking ones

вторник, 18 мая 2010 г.

Copy-And-Update in C#

Disclaimer: only for demonstration purposes :).

Immutability is natural for F# types; tuples, lists and records are immutable by default. Also records have special syntax “copy-and-update” that allows creation copy of existing record specifying only modified fields.

type DataObject = 
{ Id : int
Value : string
Object : DataObject2 }
and DataObject2 =
{ Price : decimal
PriceType : string }

let o1 = {Id = 100; Value = "Value"; Object = {Price = 10m; PriceType = "CleanPrice"}}
let o2 = {o1 with Id=500; Object = {o1.Object with Price = 500m} }
(*
val o1 : DataObject = {Id = 100;
Value = "Value";
Object = {Price = 10M;
PriceType = "CleanPrice";};}
val o2 : DataObject = {Id = 500;
Value = "Value";
Object = {Price = 500M;
PriceType = "CleanPrice";};}
*)

C# doesn't provide similar things, but using VS 2010 and combination of named and optional parameters we can achive almost the same level of expressiveness.

var o1 = new DataObject(100, "Value1", new DataObject2(10m, "CleanPrice"));
var o2 = o1.With(id: 500, obj: o1.Object.With(price: 500));
Console.WriteLine(o1);
Console.WriteLine(o2);
//Id = 100, Value = Value1, Object = (Price:10, PriceType=CleanPrice)
//Id = 500, Value = Value1, Object = (Price:500, PriceType=CleanPrice)

Pretty close to original, huh? Idea of implementation is very simple, we somehow need to distinguish missing values from entered. To achieve it we introduce struct Optional that will hold all input parameters

    public struct Optional<T>
{
public T Value;
public bool HasValue;

public Optional(T value)
{
HasValue = true;
Value = value;
}

public static implicit operator Optional<T>(T value)
{
return new Optional<T>(value);
}
public T ValueOrDefault(T defaultValue) { return HasValue ? Value : defaultValue; } }

With method in each class declare its arguments with type Optional<T> and default value default( Optional<T>). This is simple solution, all entered arguments shall be converted by implicit operator and thus have HasValue=true. Optional is struct because otherwise it is impossible to input null as user defined value, it shall be interpreted not as valid value but rather as missing one.

    public class DataObject
{
public int Id { get; private set; }
public string Value { get; private set; }
public DataObject2 Object { get; private set; }

public DataObject(int id, string value, DataObject2 o)
{
Id = id;
Value = value;
Object = o;
}

private DataObject()
{
}

public override string ToString()
{
return string.Format("Id = {0}, Value = {1}, Object = ({2})", Id, Value ?? "null", Object != null ? Object.ToString() : "null");
}

public DataObject With(
Optional<int> id = default(Optional<int>),
Optional<string> value = default(Optional<string>),
Optional<DataObject2> obj = default(Optional<DataObject2>)
)
{
return new DataObject
{
Id = id.ValueOrDefault(Id),
Value = value.ValueOrDefault(Value),
Object = obj.ValueOrDefault(Object)
};
}
}

public class DataObject2
{
public decimal Price { get; private set; }
public string PriceType { get; private set; }

public DataObject2(decimal price, string priceType)
{
Price = price;
PriceType = priceType;
}

private DataObject2()
{
}

public DataObject2 With(
Optional<decimal> price = default(Optional<decimal>),
Optional<string> priceType = default(Optional<string>)
)
{
return new DataObject2
{
Price = price.ValueOrDefault(Price),
PriceType = priceType.ValueOrDefault(PriceType)
};
}

public override string ToString()
{
return string.Format("Price:{0}, PriceType={1}", Price, PriceType ?? "null");
}
}

суббота, 15 мая 2010 г.

F#: Active Patterns

Introduction

Pattern matching is an extremely powerful feature supported by F#. Ability to decompose composite data structure into constituent parts (recursively by support of nested patterns) makes analysis of complex objects a piece of cake. As a sample we can take insertion operation for AVL tree and implement tree rotation with pattern matching. Entire process is nicely demonstrated on the picture below.

type Tree<'T> = 
| Nil
| Node of Tree<'T> * 'T * Tree<'T>

let rec height = function
| Nil -> 0
| Node(l, _, r) -> 1 + max (height l) (height r)

// balance factor - difference between heights
let balance l r = height l - height r

// extracts value contained in tree
let valueOf = function
| Nil -> failwith "Nil has no value"
| Node(_, v, _) -> v

///Tree is valid if both left and right subtree are valid and balance factor of tree is lesser than 2
let rec validate = function
| Nil -> true
| Node(l, _, r) ->
validate l && validate r && (abs (balance l r) < 2)

// Rotate functions
let rotateLL = function
| Node(Node(ll, lv, lr), v, r) -> Node(ll, lv, Node(lr, v, r))
| x -> failwithf "Unexpected %O" x
let rotateRR = function
| Node(l, v, Node(rl, rv, rr)) -> Node(Node(l, v, rl), rv, rr)
| x -> failwithf "Unexpected %O" x
let rotateLR = function
| Node(Node(ll, lv, Node(lrl, lrv, lrr)), v, r) -> Node(Node(ll, lv, lrl), lrv, Node(lrr, v, r))
| x -> failwithf "Unexpected %O" x
let rotateRL = function
| Node(l, v, Node(Node(rll, rlv, rlr), rv, rr)) -> Node(Node(l, v, rll), rlv, Node(rlr, rv, rr))
| x -> failwithf "Unexpected %O" x

// the hero of blogpost: multi-case active pattern
let (|Lesser|Greater|Equals|) (a, b) =
if a < b then Lesser
else if a = b then Equals
else Greater

let rec insert v = function
| Nil -> Node(Nil, v, Nil)
| Node(l, c, r) as self->
// common part
// if balance factor of newly inserted node and its sibling is equal to threshold value (2)
// we should rebalance tree
let doInsert inserted newNode sibling previous rotateL rotateR =
if balance inserted sibling = 2 then
if v < valueOf previous then rotateL newNode
else rotateR newNode
else newNode
match (v,c) with
| Equals -> self
| Lesser ->
let inserted = insert v l
let newNode = Node(inserted, c, r)
doInsert inserted newNode r l rotateLL rotateLR
| Greater ->
let inserted = insert v r
let newNode = Node(l, c, inserted)
doInsert inserted newNode l r rotateRL rotateRR

let flip f x y = f y x
let random = seq {
let rnd = new System.Random()
while true do
yield rnd.Next()
}
let tree = (Nil, Seq.take 1000 random) ||> Seq.fold (flip insert)

printfn "%O" (validate tree) // True

Code of rotation routines is almost verbatim translation of the picture. As you see description of each case takes three source lines, IMO very good result. Without pattern matching analysis of nested elements will be much more cumbersome.

F# spec defines large number of pattern forms that are already built in language. However sometimes it is not enough to express intentions of the developer, you may need to desconstuct the F# structure in application-dependent way (split list of numbers into two parts with odd in first and evens in second) or the object is opaque to F# (interoperabiliy issue). Luckily F# allows writing custom recognizers that can participate in pattern matching as first class citizens (as opposed to functions that can appear only in guards), this language feature is called Active Patterns.

Single Case Active Recognizer.

The first and the simpliest kind of Active Patterns is Single Case pattern – it just converts one value to another without putting restrictions on type of result.

open System.Drawing

let (|ARGB|) (c : Color) = c.A, c.R, c.G, c.B
let (ARGB (a, r, g, b)) = Color.Aquamarine
(*
val r : byte = 127uy
val g : byte = 255uy
val b : byte = 212uy
val a : byte = 255uy
*)

Single Case pattern can also be used with additional parameters. F# specification puts Parameterized Single Case Patterns into separate group but for the sake of simplicity (IMO) basic Single Case patterns can be imagined as parameterized ones with no arguments.

//Value to match comes as a last mandatory argument.
(|SingleCase|) [arg0…argn] input

This type of Active Patterns doesn’t perform filtering of incoming values; it is converter and nothing more. However you can always mix it with other patterns (i.e. constant pattern).

type Object(id : int) = 
member this.Id = id

let (|Id|) (o : Object) = o.Id

// initially active recognizer extracts value of Id from the object, then Id is matched with simple constant pattern
let selectObjectWithSpecificIds o =
match o with
| Id 10 -> printfn "Object with id=10 found"
| Id 20 -> printfn "Object with id=20 found"
| _ -> printfn "Required object not found"

selectObjectWithSpecificIds (Object(100))
(*
Required object not found
*)
selectObjectWithSpecificId (Object(10))
(*
Object with id=10 found
*)

Sometimes it is enough, but ofter you need to pick objects based on more complex criteria and in this case the guy you need – partial active recognizer.

Partial Active Recognizer.

This type of recognizers combines abilities of the Single Case patterns with a possibility to reject input value; that’s why they return not just ‘T but option<’T>.

// tests if specified string value can be parsed as int value
let (|Int|_|) v =
match Int32.TryParse(v) with
| (true, v) -> Some(v)
| false, _ -> None

let (Int x) = "100" // x = 100
match "a" with
| Int v -> printfn "this is int value %d" v
| _ -> printfn "something else"
(*
something else
*)

By the way of application this recognizer is close to function passed into List.choose, you can simultaneously perform filtering and conversion. In fact you can directly use recognizer as a function in choose

let lst = ["1"; "a"; "2"; "b"; "3"]
let res = lst |> List.choose (|Int|_|)
printfn "%O" res
(*
[1; 2; 3]
*)

Similarly to Single Case patterns Partial Active recognizers can accept parameters, everything that was previously said regarding Single Case patterns is also applicable to partial ones. Declaration syntax is slightly different

// last argument is mandatory
(|Partial|_|) [arg0…argn] input


// tests if specified string value can be parsed as int value
let (|Int|_|) v =
match Int32.TryParse(v) with
| (true, v) -> Some(v)
| false, _ -> None

// tests if specified string value can be parsed as double value
let (|Double|_|) v =
match Double.TryParse(v) with
| true, v -> Some(v)
| false, _ -> None

// tests if specified string value starts with given prefix
let (|Prefix|_|) prefix (s : string) =
if s.StartsWith(prefix) then Some(s.Substring(prefix.Length))
else None

let classifyString s =
match s with
| Int v -> printfn "Int: %d" v
| Double v -> printfn "Double: %f" v
| Prefix "http://" v -> printfn "String that starts with http://: %s" v

You can notice that if we try to compile code from above, the compiler will issue warning FS0025: Incomplete pattern matches on this expression. because it cannot verify that match cases are exhaustive. If you have fixed number of categories and want to put matched value into one of them  then you definitely should use third kind of active recognizers: multi case active recognizer.

Multi-Case Active Recognizer

This type of recognizers makes input value fall into one of existing categories. Recognizers of this kind differ from the ones we’ve already seen: first of all they do not accept any arguments except input value, second: they return Choice<…>.Declaration syntax is following:

// Сase1..СaseN is disguised Choice
(|Case1|Case2|..|CaseN|) input

We can rewrite slightly modified sample with multicase recognizers and make sure that warning dissapears.

// ( |Int|Double|Other| ) : string -> Choice<int,float,string>
let (|Int|Double|Other|) s =
match Int32.TryParse(s) with
| (true, v) -> Int(v)
| false, _ ->
match Double.TryParse(s) with
| true, v -> Double(v)
| false, _ -> Other(s)

let classifyString s =
match s with
| Int v -> printfn "Int: %d" v
| Double v -> printfn "Double: %f" v
| Other s -> printfn "Other string %s" s

Practical application

Everything is limited by the imagination :). Adam Granicz‘ve posted amazing article about creating parsers based on active patterns. Nice example is interoperability with another .Net languages, you can easily enhance existing object with pattern matching capabilities without converting them to F# friendly structures.

using System.Collections.Generic;
using System.Linq;

namespace CSharp
{
public class DataObject
{
public int Id { get; set; }
public string Name { get; set; }
public string Value { get; set; }

public static List<DataObject> Create(int n)
{
return Enumerable
.Range(1, n)
.Select(i => new DataObject {Id = i, Name = "Name:" + i, Value = "Value:" + i}).
ToList();
}
}
}
open CSharp
module CSInterop =
// single case patterns
let (|Id|) (o : DataObject) = o.Id
let (|Name|) (o : DataObject) = o.Name
let (|Value|) (o : DataObject) = o.Value

// partial patterns (why I cannot use ? in identifiers like I can in Schema)
// my dream
//let (|Id?|_|) id (o : DataObject) = if o.Id = id then Some(o) else None
let (|Id_|_|) id (o : DataObject) = if o.Id = id then Some(o) else None
let (|Name_|_|) name (o : DataObject) = if o.Name = name then Some(o) else None
let (|Value_|_|) value (o : DataObject) = if o.Value = value then Some(o) else None

open CSInterop

let namesValues = DataObject.Create(5) |> Seq.map(function | Name name & Value value -> name + " " + value)
let findObject n = DataObject.Create(5) |> Seq.tryPick(function | Id_ n _ & Name name -> Some(name) | _ -> None)

Ability to decompose structure in arbitrary way is extremly powerful. With Active Patterns can perform pattern matching over any object model created in any .NET language. Processing XML, traversing LINQ expression trees, various file system based operations, decomposing text with regular expressions and binding matched group to the list elements or tuple components…You are the master of the Universe!

As a close-to-life sample we’ll examine transformation of expression trees. Often it is useful to describe structure of the objects with simple and straightforward expressions and later mechanically transform them into more complex ones. Expression can be used both for deducing type of field for the UI components and as a source for creating accessor delegates.

public class DataObject
{
public int Id { get; set; }
public string Name { get; set; }
}

Expression<Func<DataObject, string[]>> Expr = obj => obj.Name.Split(',');

Expression Expr describes what actions need to be performed to obtain result and provides complete type-related information. Unfortunatly we cannot use this compiled version of this expression directly because it is error-prone: NullReferenceException will be thrown in case of null argument or null Name property. Sad, but not fatal: we can easily detect all possible null references in this expression and transform it so all dangerous calls will be routed through special function.

type Methods private() = 
static member ClassBindMethod = typeof<Methods>.GetMethod("ClassBind")
static member ClassBind<'T when 'T : null and 'T : equality>(t : 'T, f : Func<'T, obj>) =
if t <> null then f.Invoke(t)
else box null

Transformed version will look like this:

ClassBind(obj, p0 => ClassBind(p0.Name, p1 -> p1.Split(',')))

Disclaimer: implementation below is made just for demonstration and supports only reference types as possible null candidates.

open System
open System.Collections.Generic
open System.Linq.Expressions

type Methods private() =
static member ClassBindMethod = typeof<Methods>.GetMethod("ClassBind")
static member ClassBind<'T when 'T : null and 'T : equality>(t : 'T, f : Func<'T, obj>) =
if t <> null then f.Invoke(t)
else box null


let isNullable(t : Type) = t.IsClass || t.IsInterface

// Multi-Case pattern for expression
let (|MemberAccess|MethodCall|Lambda|Other|) (expr : Expression) =
match expr.NodeType with
| ExpressionType.Lambda ->
let lambda = expr :?> LambdaExpression
Lambda(lambda.Type, lambda.Parameters, lambda.ReturnType, lambda.Body)
| ExpressionType.MemberAccess ->
let memberAccess = expr :?> MemberExpression
MemberAccess(memberAccess.Expression, memberAccess.Member)
| ExpressionType.Call ->
let call = expr :?> MethodCallExpression
MethodCall(call.Object, call.Method, call.Arguments |> Seq.toList)
| _ -> Other(expr)

// gathers nullable candidates from given expression
// checks instance argument in member accesses and method calls
let rec collectNullableExpressions e =
match e with
| Lambda(_, _, _, body) -> collectNullableExpressions body
| MemberAccess(instance, _) when instance <> null ->
let l1 = collectNullableExpressions instance
if isNullable instance.Type then instance::l1 else l1
| MethodCall(instance, _, args) ->
let l1 =
if instance <> null
then if isNullable instance.Type then instance::(collectNullableExpressions instance) else collectNullableExpressions instance
else []
(List.collect collectNullableExpressions args) @ l1
| _ -> []

// replaces all nullable references in expressions to the corrected values
let rec fixNullableRefs (map : Dictionary<_, _>) e =
match e with
| MemberAccess(instance, memberInfo) when instance <> null && map.ContainsKey instance ->
Expression.MakeMemberAccess(snd map.[instance], memberInfo) :> Expression, true
| MethodCall(instance, methodInfo, args) when instance <> null && map.ContainsKey instance ->
Expression.Call(snd map.[instance], methodInfo, args |> List.map (fixNullableRefs map >> fst) |> List.toArray) :> Expression, true
| MethodCall(instance, methodInfo, args) ->
let (fixedArgs, changes) = List.map (fixNullableRefs map) args |> List.unzip
if (List.exists id changes) then
Expression.Call(instance, methodInfo, fixedArgs |> List.toArray) :> Expression, true
else
e, false
| x -> x, false

let fix<'T when 'T :> LambdaExpression> (e :'T) =
let nullables =
collectNullableExpressions e
|> List.rev
|> Seq.distinct
|> List.ofSeq
let counter = ref 0
let map = new Dictionary<_, _>()
for nullable in nullables do
let (patchedExpr, hasChanges) = fixNullableRefs map nullable
let param = Expression.Parameter(nullable.Type, sprintf "p%d" !counter)
map.[nullable] <- ((if hasChanges then patchedExpr else nullable), param)
incr counter
let fixedBody,_ = fixNullableRefs map e.Body
let newBody =
(nullables, fixedBody)
||> List.foldBack(fun n body ->
let (patchedExpr, param) = map.[n]
let bind = Methods.ClassBindMethod.MakeGenericMethod(n.Type)
let funcType = typedefof<Func<_,_>>.MakeGenericType(n.Type, typeof<obj>)
let lambda = Expression.Lambda(funcType, body, [|param|])
Expression.Call(bind, patchedExpr, lambda) :> Expression
)
Expression.Lambda(e.Type, Expression.Convert(newBody, e.Body.Type), e.Parameters) :?> 'T

let source = DataObject.Expr
let patched = fix source
printfn "%O" patched
(*
obj => Convert(ClassBind(obj, p0 => ClassBind(p0.Name, p1 => p1.Split(new [] {,}))))
*)
 
GeekySpeaky: Submit Your Site!