(* TODO: - clever factorization of common regexps - handle utf-8, special characters in type names, ... *) open Printf open Pxp_yacc open Pxp_lexer_types open Pxp_types let mixed_table : ('a,unit) Hashtbl.t = Hashtbl.create 127 let regexp_table : ('a,unit) Hashtbl.t = Hashtbl.create 127 let kw = [ "and"; "as"; "assert"; "begin"; "class"; "constraint"; "do"; "done"; "downto"; "else"; "end"; "exception"; "external"; "false"; "for"; "fun"; "function"; "functor"; "if"; "in"; "include"; "inherit"; "initializer"; "lazy"; "let"; "match"; "method"; "module"; "mutable"; "new"; "object"; "of"; "open"; "or"; "private"; "rec"; "sig"; "struct"; "then"; "to"; "true"; "try"; "type"; "val"; "virtual"; "when"; "while"; "with"; "mod"; "land"; "lor"; "lxor"; "lsl"; "lsr"; "asr"; "map"; "div"; "namespace"; ] let split_qname s = try let i = String.index s ':' in String.sub s 0 i, String.sub s (i+1) (String.length s - i - 1) with Not_found -> "", s let id s = let s = snd (split_qname s) in if List.mem s kw then "_" ^ s else s let field s = s (* try if List.mem s xkw then raise Exit; for i = 0 to String.length s - 1 do match s.[i] with | '.' | 'a'..'z' | 'A'..'Z' | '_' -> () | _ -> raise Exit done; s with Exit -> "#\"" ^ s ^ "\"" *) let import_dtd ppf dtd = let rec regexp ppf = function | Optional re -> Format.fprintf ppf "%a?" regexp re | Repeated re -> Format.fprintf ppf "%a*" regexp re | Repeated1 re -> Format.fprintf ppf "%a+" regexp re | Seq (re1 :: res) -> Format.fprintf ppf "(@[ %a" regexp re1; List.iter (fun re -> Format.fprintf ppf "@ %a" regexp re) res; Format.fprintf ppf " @])" | Alt (re1 :: res) -> Format.fprintf ppf "(@[ %a" regexp re1; List.iter (fun re -> Format.fprintf ppf "@ | %a" regexp re) res; Format.fprintf ppf " @])" | Child s -> Format.fprintf ppf "%s" (id s) | _ -> assert false in let content ppf = function | Unspecified | Any -> Format.fprintf ppf "Any*" | Empty -> Format.fprintf ppf "" | Mixed l -> (try Hashtbl.find mixed_table l; Format.fprintf ppf "MIXED:CACHED!"; raise Not_found with Not_found -> (* Hashtbl.add mixed_table l (); *) let l = List.map (function | MPCDATA -> "Char" | MChild s -> id s) l in Format.fprintf ppf "( %s )*" (String.concat " | " l)) | Regexp r -> (try Hashtbl.find regexp_table r; Format.fprintf ppf "REGEXP:CACHED!"; raise Not_found with Not_found -> (* Hashtbl.add regexp_table r (); *) regexp ppf r ) in let att_type ppf = function | A_enum l -> Format.fprintf ppf "("; ignore (List.fold_left (fun first s -> if not first then Format.fprintf ppf " | "; Format.fprintf ppf "\"%s\"" s; false) true l); Format.fprintf ppf ")" | _ -> Format.fprintf ppf "String" in let attrib ppf e = List.iter (fun a -> let (at,ad) = e # attribute a in match ad with | D_fixed _ -> () | _ -> Format.fprintf ppf " %s=%s%a" (field a) (if ad = D_required then "" else "?") att_type at; ) (e # attribute_names) in let elt first ppf e = Format.fprintf ppf "%s @[<2>%s = @[<3>{{<%s%a>[@ @[%a@]@ ]}}@]@]@\n" (if first then "type" else " and") (id (e # name)) (e # name) attrib e content (e # content_model) in ignore (List.fold_left (fun first x -> elt first ppf (dtd # element x); false) true (dtd # element_names)) let () = if (Array.length Sys.argv < 2) then ( prerr_endline "Usage: dtd2types <.dtd file> {prefix:URI}"; exit 2 ); let src = from_file Sys.argv.(1) in let dtd = try parse_dtd_entity { default_config with encoding = `Enc_utf8 } src with exn -> prerr_endline (Pxp_types.string_of_exn exn); exit 1 in let ppf = Format.std_formatter in Format.fprintf ppf "(* This file has been automatically by dtd2types *)@\n"; Format.fprintf ppf "(* Command line: "; for i = 0 to Array.length Sys.argv - 1 do Format.fprintf ppf "%s " Sys.argv.(i) done; Format.fprintf ppf "*)@\n"; for i = 2 to Array.length Sys.argv - 1 do let (pr,uri) = split_qname Sys.argv.(i) in if pr = "" then Format.fprintf ppf "{{ namespace \"%s\" }}@\n" uri else Format.fprintf ppf "{{ namespace %s = \"%s\" }}@\n" pr uri done; import_dtd ppf dtd