# **** note that this file should be placed in your AOLserver's private
# Tcl directory if you want to call parse_all from a regular .tcl page
# (Alternatively, you can cut and paste this entire file into the top of
# the .tcl page that needs to use it.)
# This defines an XML parser for a particular DTD, that of quotations.
# An effort is made to keep this as general as possible for later extension
# to a generalized XML parser.
#
# Ben Adida (ben@mit.edu), 12/29/1998
#
# First we define a data abstraction of a parsed object
# so that we can easily work with the parsing of a tag, one by one
# This data abstraction packages up a tag, the tag's content, and the rest of
# string to parse
proc make_parsed_object {tag content rest} {
return [list $tag $content $rest]
}
proc parsed_object_tag {obj} {
return [lindex $obj 0]
}
proc parsed_object_content {obj} {
return [lindex $obj 1]
}
proc parsed_object_rest {obj} {
return [lindex $obj 2]
}
# We then define a procedure that performs the simple parsing of the next tag in
# the XML string. This procedure will effectively look for the first "open" tag,
# find the corresponding "close" tag, ignore what came before the open tag, parse out
# what is between the open and close tag, and package the tag, tag content, and rest into
# a parsed object.
#
# For efficiency and greediness reasons, we don't use regexps.
proc simple_parse {str} {
set first_open_bracket [string first "<" $str]
set first_close_bracket [string first ">" $str]
# If we have malformed XML, we return 0
if {($first_close_bracket < $first_open_bracket) || ($first_close_bracket == -1) || ($first_open_bracket == -1)} {
# ns_log Notice "simple parse malformed XML 1, $first_open_bracket, $first_close_bracket, "
return 0
}
# Get the tag
set tag [string range $str [expr $first_open_bracket + 1] [expr $first_close_bracket - 1]]
# Find the first closing of that tag
# FIX NEEDED in case the close of the tag is not same case as start tag. (ben@mit.edu)
set close_tag_pos [string first $tag> $str]
set tag [string tolower $tag]
# There's a weird case to take care of here if the close tag appears first, before
# any open tag.... (FIX NEEDED, ben@mit.edu)
set content [string range $str [expr $first_close_bracket + 1] [expr $close_tag_pos - 1]]
# The end of the close tag is (n+3) characters after the start, where
# n is the length of the tag name, and 4 is justified by the ">" characters
# and one more for starting after the close character
set end_close_tag_pos [expr $close_tag_pos + 4 + [string length $tag]]
set rest [string range $str $end_close_tag_pos end]
return [make_parsed_object $tag $content $rest]
}
# Now we write the DTD-specific parsing
# This should be generalized by having a DTD parser that automatically creates these
# parse_ procs (oh so very much like Scheme lambda expression, it makes you want to scream)
# and then dispatches on each tag. But that's for another day
# This procedure looks for a quotations XML object
# to parse. THIS IS THE MAIN PROCEDURE TO CALL!
proc parse_all {str} {
set one_parse [simple_parse $str]
# ns_log Notice "first parse"
# If we don't have the right tag here, abort.
if {[parsed_object_tag $one_parse] != "quotations"} {
# ns_log Notice "not a quotations! [parsed_object_tag $one_parse]"
return 0
}
# Note that we don't care about the "rest" here
return [parse_quotations [parsed_object_content $one_parse]]
}
# A quotations is a bunch of onequotes inside an open quotation / close quotation set of
# tags. We will keep using the parsed_object abstraction, varying the data type of
# the content position.... (we're almost in Scheme first-class citizen heaven)
proc parse_quotations {str} {
set list_of_onequotes [list]
# Loop until we break
while 1 {
set one_parse [simple_parse $str]
# ns_log Notice "parsing one onequotes"
# If nothing comes out, we're done
if {$one_parse == 0} {
return [list quotations $list_of_onequotes]
}
# If something other than a onequote comes out, we have an error
if {[parsed_object_tag $one_parse] != "onequote"} {
return 0
}
# Otherwise, parse the onequote!
# We might want to check for malformed elements... depends how harsh
# we want to be if one of the onequotes is broken!
lappend list_of_onequotes [parse_onequote [parsed_object_content $one_parse]]
set str [parsed_object_rest $one_parse]
}
}
# This procedure parses a onequote. We could here have a parse_PCDATA
# procedure that is called each time, but that is useless here, and
# we want to make sure that we have a way of distinguishing between errors
# and the text 0... we yearn for the NULL.
proc parse_onequote {str} {
### QUOTATION_ID
###
set one_parse [simple_parse $str]
# Check that we have a quotation_id
if {($one_parse == 0) || ([parsed_object_tag $one_parse] != "quotation_id")} {
# ns_log Notice "no quotation_id"
return 0
}
set quotation_id [parsed_object_content $one_parse]
set str [parsed_object_rest $one_parse]
### INSERTION_DATE
###
set one_parse [simple_parse $str]
# Check that we have an insertion_date
if {($one_parse == 0) || ([parsed_object_tag $one_parse] != "insertion_date")} {
# ns_log Notice "no insertion_date"
return 0
}
set insertion_date [parsed_object_content $one_parse]
set str [parsed_object_rest $one_parse]
### AUTHOR_NAME
###
set one_parse [simple_parse $str]
# Check that we have an author name
if {($one_parse == 0) || ([parsed_object_tag $one_parse] != "author_name")} {
# ns_log Notice "no author_name!"
return 0
}
set author_name [parsed_object_content $one_parse]
set str [parsed_object_rest $one_parse]
### CATEGORY
###
set one_parse [simple_parse $str]
# Check that we have a category
if {($one_parse == 0) || ([parsed_object_tag $one_parse] != "category")} {
# ns_log Notice "no category!"
return 0
}
set category [parsed_object_content $one_parse]
set str [parsed_object_rest $one_parse]
### QUOTE
###
set one_parse [simple_parse $str]
# Check that we have a quote
if {($one_parse == 0) || ([parsed_object_tag $one_parse] != "quote")} {
return 0
}
set quote [parsed_object_content $one_parse]
set str [parsed_object_rest $one_parse]
# Now put this all together in an NS_SET
# set return_set [ns_set create]
# ns_set put $return_set quotation_id $quotation_id
# ns_set put $return_set insertion_date $insertion_date
# ns_set put $return_set author_name $author_name
# ns_set put $return_set category $category
# ns_set put $return_set quote $quote
return [list onequote \
[list quotation_id $quotation_id] \
[list insertion_date $insertion_date] \
[list author_name $author_name] \
[list category $category] \
[list quote $quote]]
}