This procedure is defined in the server but not documented via ad_proc or proc_doc and may be intended as a private interface.
The procedure is defined as:
proc tdom::xmlOpenFileWorker {filename encodingString forSimple forRead} { # This partly (mis-)use the encoding of a channel handed to [dom # parse -channel ..] as a marker: if the channel encoding is utf-8 # then behind the scene Tcl_Read() is used, otherwise # Tcl_ReadChars(). This is used for the encodings understood (and # checked) by the used expat implementation: utf-8 and utf-16 (in # either byte order). set fd [open $filename] if {$encodingString != {}} { upvar $encodingString encString } # The autodetection of the encoding follows # XML Recomendation, Appendix F fconfigure $fd -encoding binary if {![binary scan [read $fd 4] "H8" firstBytes]} { # very short (< 4 Bytes) file seek $fd 0 start set encString UTF-8 return $fd } # First check for BOM switch [string range $firstBytes 0 3] { "feff" { # feff: UTF-16, big-endian BOM if {$forSimple || $forRead} { error "UTF-16be is not supported" } seek $fd 0 start set encString UTF-16be fconfigure $fd -encoding utf-8 return $fd } "fffe" { # ffef: UTF-16, little-endian BOM set encString UTF-16le if {$forSimple || $forRead} { seek $fd 2 start fconfigure $fd -encoding unicode } else { seek $fd 0 start fconfigure $fd -encoding utf-8 } return $fd } } # If the entity has a XML Declaration, the first four characters # must be "<?xm". switch $firstBytes { "3c3f786d" { # UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, # EUC, or any other 7-bit, 8-bit, or mixed-width encoding which # ensures that the characters of ASCII have their normal positions, # width and values; the actual encoding declaration must be read to # detect which of these applies, but since all of these encodings # use the same bit patterns for the ASCII characters, the encoding # declaration itself be read reliably. # First 300 bytes should be enough for a XML Declaration # This is of course not 100 percent bullet-proof. set head [read $fd 296] # Try to find the end of the XML Declaration set closeIndex [string first ">" $head] if {$closeIndex == -1} { error "Weird XML data or not XML data at all" } seek $fd 0 start set xmlDeclaration [read $fd [expr {$closeIndex + 5}]] # extract the encoding information set pattern {^[^>]+encoding=[\x20\x9\xd\xa]*["']([^ "']+)['"]} # emacs: " if {![regexp $pattern $head - encStr]} { # Probably something like <?xml version="1.0"?>. # Without encoding declaration this must be UTF-8 set encoding utf-8 set encString UTF-8 } else { set encoding [IANAEncoding2TclEncoding $encStr] set encString $encStr } } "0000003c" - "0000003c" - "3c000000" - "00003c00" { # UCS-4 error "UCS-4 not supported" } "003c003f" { # UTF-16, big-endian, no BOM if {$forSimple} { error "UTF-16be is not supported by the simple parser" } seek $fd 0 start set encoding utf-8 set encString UTF-16be } "3c003f00" { # UTF-16, little-endian, no BOM if {$forSimple} { seek $fd 2 start set encoding unicode } else { seek $fd 0 start set encoding utf-8 } set encString UTF-16le } "4c6fa794" { # EBCDIC in some flavor error "EBCDIC not supported" } default { # UTF-8 without an encoding declaration seek $fd 0 start set encoding utf-8 set encString "UTF-8" } } fconfigure $fd -encoding $encoding return $fd }