Posted 2024-04-17Updated 2024-04-1719 minutes read (About 2891 words)

[Compiler Construction] Control Flow Graph, Liveness Analysis, Dataflow Analysis and Interference Graph, Implemented in OCaml

I am super busy, but I want to spend some time talking about how to write a compiler in ocaml that has liveness analysis, and can generate interference graph from CFG and liveness analysis, which is fundamental to doing correct, if not efficient, register allocation.

PeterYaoNYU/CompilerInOCaml

A compiler for a ML-ish Languange, with a ML->Scheme->C->CFG->MIPS Pipeline, Register Allocation and Garbage Collection.

The Code can be found under the folder ps7, if not restructured after the writing of this post.

This is part of the NYU graduate level capstone project, with Prof. Joseph Tassarotti.

An interference graph is a prerequisite for building correct register allocation in a compiler. An interference graph relies on correct liveness analysis, and liveness is dependent on control flow graph, or cfg. CFG is a directed graph between blocks of code. The granularity of the block is adjustable, but to build efficient compiler that converges fast enough, one common optimization is to collapse codes together, with the last line of code in the cfg block being jump, return or if.

CFG AST

We have an ast very similar to assembly, but has an infinite number of registers to store temporaries.

type operand = Int of int | Var of var | Lab of label | Reg of Mips.reg
let sp = Reg(Mips.R29)
let fp = Reg(Mips.R30)
let ra = Reg(Mips.R31)

type compareop = Eq | Neq | Lt | Lte | Gt | Gte
type arithop = Plus | Minus | Times | Div 

(* essentially, a subset of the MIPS instructions but with
 * support for using temps as operands. *)
type inst = 
  Label of label
| Move of operand * operand                     (* x := y *)
| Arith of operand * operand * arithop * operand (* x := y + z *)
| Load of operand * operand * int               (* x := *(y+i) *)
| Store of operand * int * operand              (* *(x+i) := y *)
| Call of operand                               (* invoke f--result in R2 *)
| Jump of label  (* j L *)
| If of operand * compareop * operand * label * label
      (* if x < y then goto L1 else goto L2 *)
| Return  (* return to caller -- result assumed in R2 *)

(* basic blocks -- instead of capturing the structure of basic
 * blocks (as done in class), we just represent them as lists
 * of instructions where we assume that each block starts with a 
 * Label and ends with either a Jump, If, or Return and that 
 * there is no intervening Label, Jump, If, or Return. *)
type block = inst list 

(* a function is a list of basic blocks -- the first block is
 * assumed to be the entry point to the function. *)
type func = block list

(* a program is a list of functions -- the function named main
 * is considered the entry point. *)
type program = func list

Build Control Flow Graph

The second step would be to connect blocks of instructions together, based on the final jump statement in each block.

let make_graph (f: func) : flow_graph = 
  let graph = FGraph.empty in
  let graph_with_nodes = List.fold_left (fun acc_graph block -> FGraph.addNode (Block block) acc_graph) graph f in
  
  let add_edges acc_graph block = 
    match List.rev block with 
    | (Jump label) :: _ -> 
      let target_block_node = find_block_with_label f label in
      FGraph.addEdge (Block block) target_block_node acc_graph
    | (If (_, _, _, label_true, label_false)) :: _ -> 
      let true_block_node = find_block_with_label f label_true in
      let false_block_node = find_block_with_label f label_false in
      let graph_with_true_edge = FGraph.addEdge (Block block) true_block_node acc_graph in
      FGraph.addEdge (Block block) false_block_node graph_with_true_edge
    | Return :: _ -> acc_graph
    | _ -> raise BlockError
  in
  List.fold_left add_edges graph_with_nodes f

There are only two cases that we need to consider, a jump and a if branch. We simply iterate over the list of blocks, which is of type func, and fold the result.

Def and Use Map for each block (Kill and Gen in other words)

This is an optimization to make the convergence of dataflow analysis faster, so that we do not have to recalculate the gen and kill at each iteration.

let process_instruction (def, use) inst = 
  match inst with 
  | Move (dest, src) ->
    let use = (
      match dest with 
      | Var v -> (if (NodeSet.mem (VarNode v) use) then NodeSet.remove (VarNode v) use else use)
      | Reg r -> (if (NodeSet.mem (RegNode r) use) then NodeSet.remove (RegNode r) use else use)
      | _ -> use
    )
    in
    let def = (
      match src with
      | Var v -> (if (NodeSet.mem (VarNode v) def) then NodeSet.remove (VarNode v) def else def)
      | Reg r -> (if (NodeSet.mem (RegNode r) def) then NodeSet.remove (RegNode r) def else def)
      | _ -> def
    )
    in
    let (def, use) = (
      match dest with 
      | Var v -> (add_node_to_def def (VarNode v), use)
      | Reg r -> (add_node_to_def def (RegNode r), use)
      | _ -> (def, use)
    ) in
    let (def, use) = (
      match src with 
      | Var v -> (def, add_node_to_use use (VarNode v))
      | Reg r -> (def, add_node_to_use use (RegNode r))
      | _ -> (def, use)
    ) in
    (def, use)
  | Arith (dest, src, _, src2) -> 
    let use = (
      match dest with 
      | Var v -> (if (NodeSet.mem (VarNode v) use) then NodeSet.remove (VarNode v) use else use)
      | Reg r -> (if (NodeSet.mem (RegNode r) use) then NodeSet.remove (RegNode r) use else use)
      | _ -> use
    ) in
    let def = (
      match src with
      | Var v -> (if (NodeSet.mem (VarNode v) def) then NodeSet.remove (VarNode v) def else def)
      | Reg r -> (if (NodeSet.mem (RegNode r) def) then NodeSet.remove (RegNode r) def else def)
      | _ -> def
    ) in
    let def = (
      match src2 with
      | Var v -> (if (NodeSet.mem (VarNode v) def) then NodeSet.remove (VarNode v) def else def)
      | Reg r -> (if (NodeSet.mem (RegNode r) def) then NodeSet.remove (RegNode r) def else def)
      | _ -> def
    ) in
    let (def, use) = match dest with 
      | Var v -> (add_node_to_def def (VarNode v), use)
      | Reg r -> (add_node_to_def def (RegNode r), use)
      | _ -> (def, use)
    in
    let (def, use)= match src with 
      | Var v -> (def, add_node_to_use use (VarNode v))
      | Reg r -> (def, add_node_to_use use (RegNode r))
      | _ -> (def, use)
    in
    let (def, use) = match src2 with 
      | Var v -> (def, add_node_to_use use (VarNode v))
      | Reg r -> (def, add_node_to_use use (RegNode r))
      | _ -> (def, use)
    in
    (def, use)
  | If (src1, _, src2, _, _) -> 
    let (def, use) = (
      match src1 with 
      | Var v -> (def, add_node_to_use use (VarNode v))
      | Reg r -> (def, add_node_to_use use (RegNode r))
      | _ -> (def, use)
    ) in
    let (def, use) = (
      match src2 with 
      | Var v -> (def, add_node_to_use use (VarNode v))
      | Reg r -> (def, add_node_to_use use (RegNode r))
      | _ -> (def, use)
    ) in
    (def, use)
  | Load (dest, src, _) -> 
    let use = (
      match dest with 
      | Var v -> (if (NodeSet.mem (VarNode v) use) then NodeSet.remove (VarNode v) use else use)
      | Reg r -> (if (NodeSet.mem (RegNode r) use) then NodeSet.remove (RegNode r) use else use)
      | _ -> use
    ) in
    let def = (
      match src with
      | Var v -> (if (NodeSet.mem (VarNode v) def) then NodeSet.remove (VarNode v) def else def)
      | Reg r -> (if (NodeSet.mem (RegNode r) def) then NodeSet.remove (RegNode r) def else def)
      | _ -> def
    ) in
    let (def, use) = (
      match dest with 
      | Var v -> (add_node_to_def def (VarNode v), use)
      | Reg r -> (add_node_to_def def (RegNode r), use)
      | _ -> (def, use)
    ) in
    let (def, use) = (
      match src with 
      | Var v -> (def, add_node_to_use use (VarNode v))
      | Reg r -> (def, add_node_to_use use (RegNode r))
      | _ -> (def, use)
    ) in
    (def, use)
  | Store (_, _, src) -> 
    let def = (
      match src with
      | Var v -> (if (NodeSet.mem (VarNode v) def) then NodeSet.remove (VarNode v) def else def)
      | Reg r -> (if (NodeSet.mem (RegNode r) def) then NodeSet.remove (RegNode r) def else def)
      | _ -> def
    ) in
    let (def, use) = (
      match src with 
      | Var v -> (def, add_node_to_use use (VarNode v))
      | Reg r -> (def, add_node_to_use use (RegNode r))
      | _ -> (def, use)
    ) in
    (def, use)
    (* Pending, do not know precisely how to handle the call case *)
  | Call _ -> 
    let def = (NodeSet.union (NodeSet.of_list (List.map (fun x -> RegNode x) call_kill_list_reg)) def) in
    let use = (NodeSet.union (NodeSet.of_list (List.map (fun x -> RegNode x) call_gen_list_reg)) use) in
    (def, use)
  | Return -> 
    let def = (if (NodeSet.mem (RegNode R2) def) then NodeSet.remove (RegNode R2) def else def)
    in
    let use = NodeSet.add (RegNode R2) use in
    (def, use)
  | _ -> (def, use)

let update_maps block (def_map, use_map) =
  let instructions = List.rev block in 
  let def, use = List.fold_left process_instruction (NodeSet.empty, NodeSet.empty) instructions in
  let block_node = Block block in
  (BlockMap.add block_node def def_map, BlockMap.add block_node use use_map)

let build_maps (f: func) = List.fold_left (fun maps block -> update_maps block maps) (BlockMap.empty, BlockMap.empty) f

Let me just make an important remark on the definition of the gen and kill set of each block. (or def and use in the Appel’s book, which is equivalent). I want to emphasize that because it is not in Appel’s book (Appel did not use the block coalesce optimization, which merge together non jump instructions). Nor does Prof Joe thought of it immediately.

A def set of a block is the set of variables that are defined before ever used in that block.
A use set is, on the contrary, the set of variables that are used before ever defined.

So, when doing the def use summary of the block, you start from the bottom, and if you see a variable defined, then you should remove it from the use set, if it exists in the use set.

Liveness Analysis

The liveness analysis algorithm is well established. We just need to implement it in ocaml.

let rec analyze_liveness flow_graph def_use_map live_in_sets live_out_sets =
  let changes = ref false in 

  let new_live_in_sets, new_live_out_sets = 

    FlowNodeSet.fold (fun block_node (acc_live_in_sets, acc_live_out_sets) -> 

      let gen_set = BlockMap.find block_node (snd def_use_map) in 
      let kill_set = BlockMap.find block_node (fst def_use_map) in 
      
      let live_out = FlowNodeSet.fold(fun succ live_out_acc -> 
        let succ_live_in = try BlockMap.find succ acc_live_in_sets with Not_found -> NodeSet.empty in
        NodeSet.union succ_live_in live_out_acc
      ) (FGraph.succ block_node flow_graph) NodeSet.empty in 

      print_endline ">>>>>>>>>>>>>>>>>>>>>>>live out";
      print_blocknode block_node;
      print_set live_out;

      let live_in = NodeSet.union gen_set (NodeSet.diff live_out kill_set) in
      print_endline ">>>>>>>>>>>>>>>>>>>>>>>live in";
      print_blocknode block_node;
      print_set live_in;
      let prev_live_in = try BlockMap.find block_node acc_live_in_sets with Not_found -> NodeSet.empty in
      let prev_live_out = try BlockMap.find block_node acc_live_out_sets with Not_found -> NodeSet.empty in
      if (NodeSet.equal live_out prev_live_out) then (
        print_endline ">>>>>>>>>>>>>>>>>>>>>>>nothing changes, cont to next block";
        let updated_live_in_sets = BlockMap.add block_node prev_live_in acc_live_in_sets in
        let updated_live_out_sets = BlockMap.add block_node prev_live_out acc_live_out_sets in
        (updated_live_in_sets, updated_live_out_sets)
      )
      else (
        changes := true;
        let updated_live_in_sets = BlockMap.add block_node live_in acc_live_in_sets in 
        let updated_live_out_sets = BlockMap.add block_node live_out acc_live_out_sets in
        (updated_live_in_sets, updated_live_out_sets)
      )
    ) (FGraph.nodes flow_graph) (live_in_sets, live_out_sets) 
  in if !changes then analyze_liveness flow_graph def_use_map new_live_in_sets new_live_out_sets else (new_live_in_sets, new_live_out_sets)

What I implemented is just a relatively truthful ocaml version of this algorithm.

Construct Interference Graph

let add_edges_to_igraph (live_in_sets: NodeSet.t BlockMap.t) (live_out_sets: NodeSet.t BlockMap.t): interfere_graph =
  let interfere_graph = IUGraph.empty in
  let add_interferences block_live_out graph = 
    NodeSet.fold (fun node acc_graph -> 
      NodeSet.fold (fun other_node acc_inner_graph ->
        specialAddEdge node other_node acc_inner_graph
      ) block_live_out acc_graph
    ) block_live_out graph
  in
  let combined_sets = BlockMap.merge (fun _ live_in live_out ->
    match live_in, live_out with
    | Some ins, Some outs -> Some (NodeSet.union ins outs)
    | Some ins, None -> Some ins
    | None, Some outs -> Some outs
    | None, None -> None
  ) live_in_sets live_out_sets in
  BlockMap.fold (fun _ combined_live_set acc_graph -> 
    add_interferences combined_live_set acc_graph
  ) combined_sets interfere_graph

let instruction_def_use live_set inst = 
  match inst with 
  | Move (dest, src) ->
    let live_set = (
      match dest with 
      | Var v -> (if (NodeSet.mem (VarNode v) live_set) then NodeSet.remove (VarNode v) live_set else live_set)
      | Reg r -> (if (NodeSet.mem (RegNode r) live_set) then NodeSet.remove (RegNode r) live_set else live_set)
      | _ -> live_set
    )
    in
    let live_set = (
      match src with 
      | Var v -> add_node_to_use live_set (VarNode v)
      | Reg r -> add_node_to_use live_set (RegNode r)
      | _ -> live_set
    ) in
    live_set
  | Arith (dest, src, _, src2) -> 
    let live_set = (
      match dest with 
      | Var v -> (if (NodeSet.mem (VarNode v) live_set) then NodeSet.remove (VarNode v) live_set else live_set)
      | Reg r -> (if (NodeSet.mem (RegNode r) live_set) then NodeSet.remove (RegNode r) live_set else live_set)
      | _ -> live_set
    ) in
    let live_set= match src with 
      | Var v -> add_node_to_use live_set (VarNode v)
      | Reg r -> add_node_to_use live_set (RegNode r)
      | _ -> live_set
    in
    let live_set = match src2 with 
      | Var v -> add_node_to_use live_set (VarNode v)
      | Reg r -> add_node_to_use live_set (RegNode r)
      | _ -> live_set
    in
    live_set
  | If (src1, _, src2, _, _) -> 
    let live_set = (
      match src1 with 
      | Var v -> add_node_to_use live_set (VarNode v)
      | Reg r -> add_node_to_use live_set (RegNode r)
      | _ -> live_set
    ) in
    let live_set = (
      match src2 with 
      | Var v -> add_node_to_use live_set (VarNode v)
      | Reg r -> add_node_to_use live_set (RegNode r)
      | _ -> live_set
    ) in
    live_set
  | Load (dest, src, _) -> 
    let live_set = (
      match dest with 
      | Var v -> (if (NodeSet.mem (VarNode v) live_set) then NodeSet.remove (VarNode v) live_set else live_set)
      | Reg r -> (if (NodeSet.mem (RegNode r) live_set) then NodeSet.remove (RegNode r) live_set else live_set)
      | _ -> live_set
    ) in
    let live_set = (
      match src with 
      | Var v -> add_node_to_use live_set (VarNode v)
      | Reg r -> add_node_to_use live_set (RegNode r)
      | _ -> live_set
    ) in
    live_set
  | Store (_, _, src) -> 
    let live_set = (
      match src with 
      | Var v -> add_node_to_use live_set (VarNode v)
      | Reg r -> add_node_to_use live_set (RegNode r)
      | _ -> live_set
    ) in
    live_set
    (* Pending, do not know precisely how to handle the call case *)
  | Call _ -> 
    let live_set = (NodeSet.union (NodeSet.of_list (List.map (fun x -> RegNode x) call_kill_list_reg)) live_set) in
    let live_set = (NodeSet.union (NodeSet.of_list (List.map (fun x -> RegNode x) call_gen_list_reg)) live_set) in
    live_set
  | _ -> live_set

let build_interference_graph (blocks: block list) (live_out_map: NodeSet.t BlockMap.t) = 
  let interfere_graph = IUGraph.empty in
  let add_interference g n1 n2 = specialAddEdge n1 n2 g in
  let add_interference_for_set g live_set =
    NodeSet.fold (fun node acc_graph ->
      NodeSet.fold (fun other_node inner_acc_graph ->
        add_interference inner_acc_graph node other_node
      ) live_set acc_graph
    ) live_set g
  in
  let process_block block live_out acc_graph = 
    let rec handle_instruction instrs live_set graph = 
      match instrs with
      | [] -> graph
      | instr::rest ->
        (* let defined, used = instruction_def_use instr in 
        let new_live_set = NodeSet.union (NodeSet.diff live_set defined) used in *)
        let new_live_set = instruction_def_use live_set instr in
        let new_graph = add_interference_for_set graph new_live_set in
        handle_instruction rest new_live_set new_graph
    in
    handle_instruction (List.rev block) live_out acc_graph
  in 
  BlockMap.fold (fun block live_out acc_graph -> 
    match block with
    | Block b -> process_block b live_out acc_graph
    | _ -> raise FatalError "BlockMap should only contain blocks"
  ) live_out_map interfere_graph

To construct interference graph, we use the live-out set for each block as a starting point, move backward, take out and add in variables to the live set along the way, according to the gen and kill rules. At each instruction step, we add edges between every 2 nodes in the live set, to the interference graph.

Putting everything together

I wrote a main function to put everythong together.

let build_interfere_graph (f : func) = 
    let flow_graph = make_graph f in
    let def_use_map = build_maps f in
    print_endline ">>>>>>>>>>>>>>>>>>>>>>>";
    print_endline "Def Map:";
    print_map (fst def_use_map);
    print_endline ">>>>>>>>>>>>>>>>>>>>>>>";
    print_endline "Use Map:";
    print_map (snd def_use_map);
    print_endline ">>>>>>>>>>>>>>>>>>>>>>>";
    print_endline ">>>>>>>>>>>>>>>>>>>>>>>";
    let initial_live_out_sets = BlockMap.empty in
    let final_live_in_sets, final_live_out_sets = analyze_liveness flow_graph def_use_map (snd def_use_map) initial_live_out_sets in
    print_endline ">>>>>>>>>>>>>>>>>>>>>>>";
    print_endline "Live In Sets:";
    print_map final_live_in_sets;
    print_endline ">>>>>>>>>>>>>>>>>>>>>>>";
    print_endline "Live Out Sets:";
    print_map final_live_out_sets;
    let final_interfere_graph = build_interference_graph f final_live_out_sets in
    final_interfere_graph

[Compiler Construction] Control Flow Graph, Liveness Analysis, Dataflow Analysis and Interference Graph, Implemented in OCaml

http://peteryaonyu.github.io/2024/04/17/Compiler-Construction-Control-Flow-Graph-Liveness-Analysis-Dataflow-Analysis-and-Interference-Graph-Implemented-in-OCaml/

Author

Yuncheng Yao

Posted on

2024-04-17

Updated on

2024-04-17

Licensed under

#Compiler

[Compiler Construction] Control Flow Graph, Liveness Analysis, Dataflow Analysis and Interference Graph, Implemented in OCaml

CFG AST

Build Control Flow Graph

Def and Use Map for each block (Kill and Gen in other words)

Liveness Analysis

Construct Interference Graph

Putting everything together

Author

Posted on

Updated on

Licensed under

Recents

Archives

Tags