(** The data structures and the generator for generating whole programs. *)

(*
    il4c  --  Compiler for the IL4 Lisp-ahtava langauge
    Copyright (C) 2007 Jere Sanisalo

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*)

include Il4_reader

(** Identifier name. *)
type id_name =
	string

(** Function attributes. *)
type fun_attr =
	| FunAttr_UseVmFloatTest
type fun_attrs = fun_attr list

(** Raw assembly function. Contains the raw code and the number of parameters. *)
type asm_fun = int * (string list) * fun_attrs * Pos_obj.range

(** Normal program function. *)
type func =
{
	func_params: string list;
	func_locals: string list;
	func_code: stmt;
	func_range: Pos_obj.range;
}
and stmts = stmt list
and stmt =
	| Nop of Pos_obj.range
	| Block of stmts * Pos_obj.range
	| ConstInt of int32 * Pos_obj.range
	| ConstFloat of float * Pos_obj.range
	| ConstRawArray of string * Pos_obj.range
	| GetGlobal of string * Pos_obj.range
	| GetLocal of string * Pos_obj.range
	| GetParam of string * Pos_obj.range
	| GetExtSymbol of string * Pos_obj.range
	| SetGlobal of string * stmt * Pos_obj.range
	| SetLocal of string * stmt * Pos_obj.range
	| SetParam of string * stmt * Pos_obj.range
	| Call of string * stmts * Pos_obj.range
	| CallAsm of string * stmts * Pos_obj.range
	| CallC of call_type * stmts * Pos_obj.range
	| If of stmt * stmt * stmt * Pos_obj.range
	| While of stmt * stmt * Pos_obj.range
	| Break of Pos_obj.range
	| Continue of Pos_obj.range
	| Return of stmt * Pos_obj.range
and call_type =
	| Calltype_Cdecl
	| Calltype_Cdecl_FP
	| Calltype_Stdcall
	| Calltype_Stdcall_FP

(** Global variable types. *)
type global_var =
	| Global_Empty of Pos_obj.range
	| Global_Int of int32 * Pos_obj.range
	| Global_Float of float * Pos_obj.range
	| Global_Raw_Array of string * Pos_obj.range
	| Global_Uninitialized_Array of int * Pos_obj.range

(** The main program type. *)
type prg =
{
	prg_heap_size: int;
	prg_constants: (id_name * stmt) list;
	prg_globals: (id_name * global_var) list;
	prg_asmfun_list: (id_name * asm_fun) list;
	prg_fun_list: (id_name * func) list;
}

(** Converts a function attribute to a string. *)
let string_of_fun_attr attr =
	match attr with
	| FunAttr_UseVmFloatTest -> "attr_use_vm_float_test"

(** Converts a string to a function attribute. Throws Not_found if it's an invalid string. *)
let fun_attr_of_string str =
	match str with
	| "attr_use_vm_float_test" -> FunAttr_UseVmFloatTest
	| _ -> raise Not_found

(** Tests if the attribute is in the in the list. *)
let has_attr attrs attr =
	List.exists (fun a -> attr = a) attrs

(** Creates the empty function. *)
let empty_func range =
	{
		func_params = [];
		func_locals = [];
		func_code = Nop range;
		func_range = range;
	}

(** The empty program. *)
let empty_prg =
	{
		prg_heap_size = 128 * 1024 * 1024;
		prg_constants = [];
		prg_globals = [];
		prg_asmfun_list = [];
		prg_fun_list = [];
	}

(** Checks if a symbol is used in the program. *)
let has_symbol prg sym =
	(List.mem_assoc sym prg.prg_constants) ||
	(List.mem_assoc sym prg.prg_globals) ||
	(List.mem_assoc sym prg.prg_asmfun_list) ||
	(List.mem_assoc sym prg.prg_fun_list)

(** Returns a list of atom strings from an item (the item must be of type Items; that is a list of items). *)
let atoms_of_item_list_item item =
	let lst = items_of_item item in
	List.map atom_of_item lst

(** Collects the top level items (returned in defining order). Also does some top level sanity checks. *)
let collect_top_level items =
	let collect_item prg item =
		(* Get the list contents. *)
		let contents = items_of_item item in
		let contents_range = range_of_item item in
		let contents_range_str = Pos_obj.range_desc contents_range in
		if contents = [] then failwith ("Empty top level form at " ^ contents_range_str);

		(* Parse function for: heapsize *)
		let parse_heapsize prg items =
			match items with
			| Int (v,r) :: [] ->
				let vi = Int32.to_int v in
				if vi <= 0 then failwith ("Heapsize must be bigger than 0 at " ^ (Pos_obj.range_desc r));
				{ prg with prg_heap_size = vi }
			| _ -> failwith ("Heapsize expects only one integer argument at " ^ contents_range_str)
		in

		(* Parse function for: fun *)
		let parse_fun (prg,name,items) =
			if items = [] then failwith ("Function '" ^ name ^ "' must have at least 1 parameter (the parameter list) at " ^ contents_range_str);

			(* Check the parameters for uniqueness. *)
			let params = atoms_of_item_list_item (List.hd items) in
			(match Util.get_nonunique_item params with
			| Some x -> failwith ("Function '" ^ name ^ "' has defined the parameter '" ^ x ^ "' at least twice at " ^ contents_range_str);
			| None -> ());

			let f = empty_func contents_range in
			let f = { f with func_params = params } in
			{ prg with prg_fun_list = (name, f) :: prg.prg_fun_list }
		in

		(* Parse function for: asmfun *)
		let parse_asmfun (prg,name,items) =
			if (List.length items) < 2 then failwith ("Asmfun '" ^ name ^ "' must have at least 2 parameters (the parameter list and at least one line of code) at " ^ contents_range_str);
			let asm_params = atoms_of_item_list_item (List.hd items) in

			(* Collect the assembly code and interleaved attributes. *)
			let collect_strings_and_attrs (strs,attrs) item =
				match item with
				| Atom (attr_str,r) ->
					(try	let attr = fun_attr_of_string attr_str in
						if has_attr attrs attr then
							(strs, attrs)
						else
							(strs, attr :: attrs)
					with _ -> failwith ("Asmfun '" ^ name ^ "' has an unrecognized attribute '" ^ attr_str ^ "' at " ^ (Pos_obj.range_desc r)))
				| _ -> ((string_of_item item) :: strs, attrs)
			in
			let (asm_rev_strings,asm_attrs) = List.fold_left collect_strings_and_attrs ([],[]) (List.tl items) in

			let asm_fun = (List.length asm_params, List.rev asm_rev_strings, asm_attrs, contents_range) in
			{ prg with prg_asmfun_list = (name, asm_fun) :: prg.prg_asmfun_list }
		in

		(* Parse function for: global *)
		let parse_global (prg,name,items) =
			let new_var =
				match items with
				| [] -> Global_Empty contents_range
				| _ :: _ :: _ -> failwith ("Global variable '" ^ name ^ "' may have either 0 or 1 initializers at " ^ contents_range_str)
				| Int(v,_) :: [] -> Global_Int (v, contents_range)
				| Float(v,_) :: [] -> Global_Float (v, contents_range)
				| Str(v,_) :: [] -> Global_Raw_Array (v, contents_range)
				| _ -> failwith ("Global variable '" ^ name ^ "' may be initialized with contants only at " ^ contents_range_str)
			in
			{ prg with prg_globals = (name, new_var) :: prg.prg_globals }
		in

		(* Parse function for: const *)
		let parse_const (prg,name,items) =
			let value =
				match items with
				| [] | _ :: _ :: _ -> failwith ("Constant variable '" ^ name ^ "' must have 1 initializer at " ^ contents_range_str)
				| Int(v,_) :: [] -> ConstInt (v, contents_range)
				| Float(v,_) :: [] -> ConstFloat (v, contents_range)
				| Str(v,_) :: [] -> ConstRawArray (v, contents_range)
				| _ -> failwith ("Constant variable '" ^ name ^ "' may be initialized with contants only at " ^ contents_range_str)
			in
			{ prg with prg_constants = (name, value) :: prg.prg_constants }
		in

		(* Parses a unique symbol from the list, then returns the program, the symbol and the rest of the list. *)
		let parse_symbol prg items =
			if items = [] then failwith ("Missing form name at " ^ contents_range_str);
			let name = atom_of_item (List.hd items) in
			if has_symbol prg name then failwith ("Global symbol '" ^ name ^ "' already defined elsewhere; redefined at " ^ contents_range_str);
			(prg, name, List.tl items)
		in

		(* Parse the form. *)
		match atom_of_item (List.hd contents) with
		| "heapsize" -> parse_heapsize prg (List.tl contents)
		| "fun" -> parse_fun (parse_symbol prg (List.tl contents))
		| "asmfun" -> parse_asmfun (parse_symbol prg (List.tl contents))
		| "var" -> parse_global (parse_symbol prg (List.tl contents))
		| "const" -> parse_const (parse_symbol prg (List.tl contents))
		| s -> failwith ("Unknown top level form '" ^ s ^ "' type at " ^ contents_range_str)
	in

	(* Collect the items *)
	let prg = List.fold_left collect_item empty_prg items in

	(* Return the program, with the elements in right order. *)
	{ prg with
		prg_constants = List.rev prg.prg_constants;
		prg_globals = List.rev prg.prg_globals;
		prg_asmfun_list = List.rev prg.prg_asmfun_list;
		prg_fun_list = List.rev prg.prg_fun_list;
	}

(** Collects the function bodies from the item list. *)
let collect_funcs prg items =
	(* Processes the function code. *)
	let process_func prg fname code =
		(* Get the function body. *)
		let func = List.assoc fname prg.prg_fun_list in
		let params = func.func_params in
		let fun_range = func.func_range in

		(* Set up the local collecting. *)	
		let all_locals = ref [] in

		let add_local locals name range =
			if List.exists (fun x -> name = x) params then failwith ("Variable '" ^ name ^ "' already defined as a parameter; redefined at " ^ (Pos_obj.range_desc range));
			if List.exists (fun x -> name = x) !locals then failwith ("Variable '" ^ name ^ "' already defined; redefined at " ^ (Pos_obj.range_desc range));
			if List.exists (fun x -> name = x) !all_locals then () else all_locals := name :: !all_locals;
			locals := name :: !locals
		in

		(* Recursively parses a statement list. *)
		let rec parse_stmt_list locals code code_range =
			(* New list of locals for any sub-statements. *)
			let new_locals () = ref !locals in

			let code_range_str = Pos_obj.range_desc code_range in

			(* Parses a variable resolution. *)
			let parse_var var_name range =
				if List.exists (fun x -> var_name = x) !locals then
					GetLocal (var_name,range)
				else if List.exists (fun x -> var_name = x) params then
					GetParam (var_name,range)
				else if List.mem_assoc var_name prg.prg_globals then
					GetGlobal (var_name,range)
				else if List.mem_assoc var_name prg.prg_constants then
					List.assoc var_name prg.prg_constants
				else failwith ("Function '" ^ fname ^ "' refers to an undefined variable '" ^ var_name ^ "' at " ^ (Pos_obj.range_desc range))
			in

			(* Parses a variable set. *)
			let parse_set var_name code range =
				if List.exists (fun x -> var_name = x) !locals then
					SetLocal (var_name,code,range)
				else if List.exists (fun x -> var_name = x) params then
					SetParam (var_name,code,range)
				else if List.mem_assoc var_name prg.prg_globals then
					SetGlobal (var_name,code,range)
				else if List.mem_assoc var_name prg.prg_constants then
					failwith ("Function '" ^ fname ^ "' cannot redefine a constant '" ^ var_name ^ "' at " ^ (Pos_obj.range_desc range))
				else failwith ("Function '" ^ fname ^ "' refers to an undefined variable '" ^ var_name ^ "' at " ^ (Pos_obj.range_desc range))
			in

			(* Parses an external C call. *)
			let parse_ccall calltype stmts =
				let children =
					match stmts with
					| [] -> failwith ("Empty external call in function '" ^ fname ^ "' at " ^ code_range_str)
					| x -> List.map (fun item -> parse_stmt_list (new_locals ()) [item] (Il4_reader.range_of_item item)) x
				in
				CallC (calltype, children, code_range)
			in

			(* Parses a statement list node. It's either a list of sub statements (run in order), a function call, a varaible fetch or a statement. *)
			match code with
			| [] -> Nop code_range
			(* Single statement sub-list. *)
			| Items (sub_stmnts, range) :: [] -> parse_stmt_list (new_locals ()) sub_stmnts range

			(* Multistatement sub-list. *)
			| Items (sub_stmnts, range) :: tl ->
				let sub_block_locals = new_locals () in
				let this_elem = parse_stmt_list sub_block_locals sub_stmnts range in
				let next_elem = parse_stmt_list sub_block_locals tl code_range in
				(match next_elem with
				| Block (code,_) -> Block (this_elem :: code, code_range)
				| code -> Block (this_elem :: [code], code_range))

			(* Variable definition. *)
			| Atom ("var",r) :: Atom(name,_) :: [] ->
				add_local locals name code_range;
				Nop code_range
			(* Variable/parameter assignment. *)
			| Atom ("var",_) :: Atom(name,r) :: init_code :: [] ->
				let init_code = parse_stmt_list (new_locals ()) [init_code] code_range in
				add_local locals name r;
				parse_set name init_code code_range
			| Atom ("var",r) :: _ -> failwith ("Invalid var statement in function '" ^ fname ^ "' at " ^ code_range_str)

			(* Variable setting. *)
			| Atom ("set",_) :: Atom(name,r) :: value_code :: [] ->
				let value_code = parse_stmt_list (new_locals ()) [value_code] code_range in
				parse_set name value_code code_range
			| Atom ("set",r) :: _ :: _ :: _ -> failwith ("Invalid var statement (too many parameters) in function '" ^ fname ^ "' at " ^ code_range_str)
			| Atom ("set",r) :: _ -> failwith ("Invalid var statement (missing variable name and new value) in function '" ^ fname ^ "' at " ^ code_range_str)

			(* External symbol fetch. *)
			| Atom ("external_symbol",_) :: Str (sym,_) :: [] -> GetExtSymbol (sym, code_range)
			| Atom ("external_symbol",_) :: _ -> failwith ("Invalid external_symbol statement (only one string constant expected) in function '" ^ fname ^ "' at " ^ code_range_str)

			(* Parse ccall/stdcall (calls to external C functions). *)
			| Atom ("ccall",r) :: tl -> parse_ccall Calltype_Cdecl tl
			| Atom ("ccall-fp",r) :: tl -> parse_ccall Calltype_Cdecl_FP tl
			| Atom ("stdcall",r) :: tl -> parse_ccall Calltype_Stdcall tl
			| Atom ("stdcall-fp",r) :: tl -> parse_ccall Calltype_Stdcall_FP tl

			(* Return statement. *)
			| Atom ("return",r) :: [] -> Return (Nop r,r)
			| Atom ("return",r) :: retval :: [] -> Return (parse_stmt_list (new_locals ()) [retval] code_range,r)
			| Atom ("return",r) :: _ -> failwith ("Invalid return statement in function '" ^ fname ^ "' at " ^ code_range_str)

			(* If statement. *)
			| Atom ("if",r) :: cond_code :: true_code :: false_code :: [] ->
				let cond_code = parse_stmt_list (new_locals ()) [cond_code] (Il4_reader.range_of_item cond_code) in
				let true_code = parse_stmt_list (new_locals ()) [true_code] (Il4_reader.range_of_item true_code) in
				let false_code = parse_stmt_list (new_locals ()) [false_code] (Il4_reader.range_of_item false_code) in
				If (cond_code, true_code, false_code, code_range)
			| Atom ("if",r) :: cond_code :: true_code :: [] ->
				let cond_code = parse_stmt_list (new_locals ()) [cond_code] (Il4_reader.range_of_item cond_code) in
				let true_code = parse_stmt_list (new_locals ()) [true_code] (Il4_reader.range_of_item true_code) in
				If (cond_code, true_code, Nop code_range, code_range)
			| Atom ("if",r) :: _ -> failwith ("Invalid if statement in function '" ^ fname ^ "' at " ^ code_range_str)

			(* While statement. *)
			| Atom ("while",r) :: cond_code :: tl ->
				let while_body_locals = new_locals () in
				let cond_code = parse_stmt_list while_body_locals [cond_code] (Il4_reader.range_of_item cond_code) in

(*				let stmts =
					let compile_stmt p =
						let ret = parse_stmt_list while_body_locals [p] (Il4_reader.range_of_item p) in
						ret
					in
					List.map compile_stmt tl
				in

				(match stmts with
				| [] -> While (cond_code, Nop code_range, code_range)
				| x :: [] -> While (cond_code, x, code_range)
				| x -> While (cond_code, Block (x, code_range), code_range))*)

				let stmt = parse_stmt_list while_body_locals tl code_range in
				(match stmt with
				| Block (s::[],_) -> While (cond_code, s, code_range)
				| x -> While (cond_code, x, code_range))
			| Atom ("while",r) :: _ -> failwith ("Invalid while statement in function '" ^ fname ^ "' at " ^ code_range_str)
				

			(* Break statement. *)
			| Atom ("break",r) :: [] -> Break r
			| Atom ("break",r) :: _ -> failwith ("Invalid break statement in function '" ^ fname ^ "' at " ^ code_range_str)

			(* Continue statement. *)
			| Atom ("continue",r) :: [] -> Continue r
			| Atom ("continue",r) :: _ -> failwith ("Invalid continue statement in function '" ^ fname ^ "' at " ^ code_range_str)

			(* Variable read. *)
			| Atom (v,r) :: [] -> parse_var v r

			(* Function call. *)
			| Atom (v,r) :: tl ->
				let params =
					let compile_param p =
						parse_stmt_list (new_locals ()) [p] (Il4_reader.range_of_item p)
					in
					(* A special case for empty parameter lists. *)
					match tl with
					| Items([],_) :: [] -> []
					| x -> List.map compile_param x
				in
				let param_count = List.length params in
				let param_count_str = string_of_int param_count in

				(try
					(* First try variables. *)
					let get_var = parse_var v r in
					ignore get_var;
					failwith ("Invalid function call to a variable '" ^ v ^ "' in function '" ^ fname ^ "' at " ^ code_range_str)
				with
				| _ ->  (* Next try if it's an assembly function. *)
					if List.mem_assoc v prg.prg_asmfun_list then
						let (asm_param_count,_,_,_) = List.assoc v prg.prg_asmfun_list in
						if asm_param_count <> param_count then failwith ("The function call to '" ^ v ^ "' has an invalid number of parameters (need: " ^ (string_of_int asm_param_count) ^ ", got: " ^ param_count_str ^ ") in function '" ^ fname ^ "' at " ^ code_range_str);
						CallAsm (v, params, code_range)
					else if List.mem_assoc v prg.prg_fun_list then
						let fun_param_count = List.length (List.assoc v prg.prg_fun_list).func_params in
						if fun_param_count <> param_count then failwith ("The function call to '" ^ v ^ "' has an invalid number of parameters (need: " ^ (string_of_int fun_param_count) ^ ", got: " ^ param_count_str ^ ") in function '" ^ fname ^ "' at " ^ code_range_str);
						Call (v, params, code_range)
					else
						failwith ("Invalid function call to an unknown function/variable '" ^ v ^ "' in function '" ^ fname ^ "' at " ^ code_range_str)
)
			(* Constants. *)
			| Int (v,r) :: [] -> ConstInt (v,r)
			| Float (v,r) :: [] -> ConstFloat (v,r)
			| Str (v,r) :: [] -> ConstRawArray (v,r)
			| _ -> failwith ("Error at a statement list in function '" ^ fname ^ "' at " ^ code_range_str)
		in

		(* Return the transformed function. *)
		let code = parse_stmt_list (ref []) code fun_range in
		{ func with
			func_locals = List.rev !all_locals;
			func_code = code }
	in

	(* Collects a single function. *)
	let collect_func prg fname code =
		let new_func = process_func prg fname code in
		{ prg with prg_fun_list = Util.assoc_replace (fname, new_func) prg.prg_fun_list }
	in

	(* Collects only the "fun" nodes. *)
	let maybe_collect_func prg item =
		match item with
		| Items (Atom("fun",_) :: Atom(name,_) :: _ :: code, _) -> collect_func prg name code
		| _ -> prg
	in

	List.fold_left maybe_collect_func prg items

(** Generates a program from the IL4 tree. *)
let generate items =
	let prg = collect_top_level items in
	collect_funcs prg items
