Hi Steve,
I recently added this functionality for PDFs, DOCs, and TXT by adding a few lines to search-procs.tcl, below. You should be able to apply a similar method for Excel files.
You'll need to download and install XPDF (http://www.foolabs.com/xpdf/download.html)and catdoc (http://www.45.free.net/~vitus/ice/catdoc/).
I retyped me changes below from memory, I think it's correct!
ad_proc search_content_get {
_txt
content
mime
storage_type
} {
@author Neophytos Demetriou
@param content
holds the filename if storage_type=file
holds the text data if storage_type=text
holds the lob_id if storage_type=lob
} {
upvar $_txt txt
set txt ""
switch $storage_type {
text {
set data $content
}
file {
# get filename instead of file contents.
set data $content
#set data [db_blob_get get_file_data {}]
}
lob {
db_transaction {
set data [db_blob_get get_lob_data {}]
}
}
}
#Pass $storage_type for distinguishing text/plain text
#from text/plain file
search_content_filter txt data $mime $storage_type
}
ad_proc search_content_filter {
_txt
_data
mime
storage_type
} {
@author Neophytos Demetriou
} {
upvar $_txt txt
upvar $_data data
switch -glob -- $mime {
{text/plain*} {
if ($storage_type == "file") {
#use system cat command to feedback textfile
#contents.
set txt [exec cat $data]
} else {
set txt $data
}
}
{text/html*} {
set txt $data
}
{application/pdf*} {
#use pdftotext to convert PDF file to text
set txt [exec pdftotext $data -]
}
{application/msword*} {
#use catdoc to convert Word file to text
set txt [exec catdoc $data]
}
}
}
Restart the server and reload your content into search_observer_queue and you're ready to go.
Hope someone can use this. It works great!