This procedure is defined in the server but not documented via ad_proc or proc_doc and may be intended as a private interface.
The procedure is defined as:
proc tdom::xmlOpenFileWorker {filename encodingString forSimple forRead} {
# This partly (mis-)use the encoding of a channel handed to [dom
# parse -channel ..] as a marker: if the channel encoding is utf-8
# then behind the scene Tcl_Read() is used, otherwise
# Tcl_ReadChars(). This is used for the encodings understood (and
# checked) by the used expat implementation: utf-8 and utf-16 (in
# either byte order).
set fd [open $filename]
if {$encodingString != {}} {
upvar $encodingString encString
}
# The autodetection of the encoding follows
# XML Recomendation, Appendix F
fconfigure $fd -encoding binary
if {![binary scan [read $fd 4] "H8" firstBytes]} {
# very short (< 4 Bytes) file
seek $fd 0 start
set encString UTF-8
return $fd
}
# First check for BOM
switch [string range $firstBytes 0 3] {
"feff" {
# feff: UTF-16, big-endian BOM
if {$forSimple || $forRead} {
error "UTF-16be is not supported"
}
seek $fd 0 start
set encString UTF-16be
fconfigure $fd -encoding utf-8
return $fd
}
"fffe" {
# ffef: UTF-16, little-endian BOM
set encString UTF-16le
if {$forSimple || $forRead} {
seek $fd 2 start
fconfigure $fd -encoding unicode
} else {
seek $fd 0 start
fconfigure $fd -encoding utf-8
}
return $fd
}
}
# If the entity has a XML Declaration, the first four characters
# must be "<?xm".
switch $firstBytes {
"3c3f786d" {
# UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS,
# EUC, or any other 7-bit, 8-bit, or mixed-width encoding which
# ensures that the characters of ASCII have their normal positions,
# width and values; the actual encoding declaration must be read to
# detect which of these applies, but since all of these encodings
# use the same bit patterns for the ASCII characters, the encoding
# declaration itself be read reliably.
# First 300 bytes should be enough for a XML Declaration
# This is of course not 100 percent bullet-proof.
set head [read $fd 296]
# Try to find the end of the XML Declaration
set closeIndex [string first ">" $head]
if {$closeIndex == -1} {
error "Weird XML data or not XML data at all"
}
seek $fd 0 start
set xmlDeclaration [read $fd [expr {$closeIndex + 5}]]
# extract the encoding information
set pattern {^[^>]+encoding=[\x20\x9\xd\xa]*["']([^ "']+)['"]}
# emacs: "
if {![regexp $pattern $head - encStr]} {
# Probably something like <?xml version="1.0"?>.
# Without encoding declaration this must be UTF-8
set encoding utf-8
set encString UTF-8
} else {
set encoding [IANAEncoding2TclEncoding $encStr]
set encString $encStr
}
}
"0000003c" -
"0000003c" -
"3c000000" -
"00003c00" {
# UCS-4
error "UCS-4 not supported"
}
"003c003f" {
# UTF-16, big-endian, no BOM
if {$forSimple} {
error "UTF-16be is not supported by the simple parser"
}
seek $fd 0 start
set encoding utf-8
set encString UTF-16be
}
"3c003f00" {
# UTF-16, little-endian, no BOM
if {$forSimple} {
seek $fd 2 start
set encoding unicode
} else {
seek $fd 0 start
set encoding utf-8
}
set encString UTF-16le
}
"4c6fa794" {
# EBCDIC in some flavor
error "EBCDIC not supported"
}
default {
# UTF-8 without an encoding declaration
seek $fd 0 start
set encoding utf-8
set encString "UTF-8"
}
}
fconfigure $fd -encoding $encoding
return $fd
}