search::indexer (private)

 search::indexer

Defined in packages/search/tcl/search-procs.tcl

Search indexer loops over the existing entries in the search_observer_queue table and calls the appropriate driver functions to index, update, or delete the entry.

Authors:
Neophytos Demetriou
Jeff Davis <davis@xarg.net>

Partial Call Graph (max 5 caller/called nodes):
%3 acs_object_type acs_object_type (public) acs_sc::invoke acs_sc::invoke (public) acs_sc_binding_exists_p acs_sc_binding_exists_p (public) ad_print_stack_trace ad_print_stack_trace (public) apm_package_id_from_key apm_package_id_from_key (public) search::indexer search::indexer search::indexer->acs_object_type search::indexer->acs_sc::invoke search::indexer->acs_sc_binding_exists_p search::indexer->ad_print_stack_trace search::indexer->apm_package_id_from_key

Testcases:
No testcase defined.
Source code:

    set driver [parameter::get  -package_id [apm_package_id_from_key search]  -parameter FtsEngineDriver]

    if { $driver eq ""
         || (![callback::impl_exists -callback search::index -impl $driver]  && ! [acs_sc_binding_exists_p FtsEngineDriver $driver])
     } {
        # Nothing to do if no driver
        ns_log Debug "search::indexer: driver=$driver binding exists? "  "[acs_sc_binding_exists_p FtsEngineDriver $driver]"
        return
    }
    # JCD: pull out the rows all at once so we release the handle
    foreach row [db_list_of_lists search_observer_queue_entry {}] {

        # DRB: only do Oracle shit for oracle (doh)
        if { [ns_config "ns/db/drivers" oracle] ne "" } {
            if {[nsv_incr search_static_variables item_counter] > 1000} {
                nsv_set search_static_variables item_counter 0
                db_exec_plsql optimize_intermedia_index {begin
                    ctx_ddl.sync_index ('swi_index');
                    end;
                }
            }
        }

        lassign $row object_id event_date event
        array unset datasource
        switch -- $event {
            UPDATE -
            INSERT {
                # Don't bother reindexing if we've already inserted/updated this object in this run
                if {![info exists seen($object_id)]} {
                    set object_type [acs_object_type $object_id]
                    ns_log debug "\n-----DB-----\n SEARCH INDEX object type = '${object_type}' \n------------\n "
                    if {[callback::impl_exists -callback search::datasource -impl $object_type]
                        || [acs_sc_binding_exists_p FtsContentProvider $object_type]} {

                        array set datasource {mime {} storage_type {} keywords {}}
                        if {[catch {
                            # check if a callback exists, if not fall
                            # back to service contract
                            if {[callback::impl_exists -callback search::datasource -impl $object_type]} {
                                #ns_log notice "\n-----DB-----\n SEARCH INDEX callback datasource exists for object_type '${object_type}'\n------------\n "
                                array set datasource [lindex [callback  -impl $object_type  search::datasource  -object_id $object_id] 0]
                            } else {
                                #ns_log notice "invoke contract [list acs_sc::invoke -contract FtsContentProvider -operation datasource -call_args [list $object_id] -impl $object_type]"
                                array set datasource  [acs_sc::invoke  -contract FtsContentProvider  -operation datasource  -call_args [list $object_id]  -impl $object_type]
                            }

                            search::content_get txt $datasource(content) $datasource(mime)  $datasource(storage_type) $object_id

                            if {[callback::impl_exists -callback search::index -impl $driver]} {
                                if {![info exists datasource(package_id)]} {
                                    set datasource(package_id) ""
                                }

                                if {![info exists datasource(relevant_date)]} {
                                    set datasource(relevant_date) ""
                                }
                                #ns_log notice "callback invoke search::index"
                                callback -impl $driver search::index  -object_id $object_id  -content $txt  -title $datasource(title)  -keywords $datasource(keywords)  -package_id $datasource(package_id)  -community_id $datasource(community_id)  -relevant_date $datasource(relevant_date)  -datasource datasource
                            } else {
                                #ns_log notice "acs_sc::invoke FtsEngineDriver"
                                set r [acs_sc::invoke  -contract FtsEngineDriver  -operation [expr {$event eq "UPDATE" ? "update_index" : "index"}]  -call_args [list $datasource(object_id)  $txt $datasource(title)  $datasource(keywords)]  -impl $driver]
                            }
                        } errMsg]} {
                            ns_log Error "search::indexer: error getting datasource for "  "$object_id $object_type: $errMsg\n[ad_print_stack_trace]"
                        } else {
                            # call the action so other people who do indexey things have a hook
                            callback -catch search::action  -action $event  -object_id $object_id  -datasource datasource  -object_type $object_type

                            # Remember seeing this object so we can avoid reindexing it later
                            set seen($object_id) 1

                            search::dequeue  -object_id $object_id  -event_date $event_date  -event $event
                        }
                    }
                }
            }
            DELETE {
                if {[catch {
                    set r [acs_sc::invoke  -contract FtsEngineDriver  -operation unindex  -call_args [list $object_id]  -impl $driver]
                } errMsg]} {
                    ns_log Error "search::indexer: error unindexing $object_id "  "[acs_object_type $object_id]: $errMsg\n[ad_print_stack_trace]"
                } else {
                    # call the search action callbacks.
                    callback -catch search::action  -action $event  -object_id $object_id  -datasource NONE  -object_type {}

                    search::dequeue  -object_id $object_id  -event_date $event_date  -event $event

                }
                #
                # Unset "seen" element since one could conceivably
                # delete one but then subsequently reinsert it (e.g.
                # when rolling back/forward the live revision).
                #
                if {[info exists seen($object_id)]} {
                    unset seen($object_id)
                }
            }
        }

        # Don't put that dequeue in a default block of the switch above
        # otherwise objects with insert/update and delete operations in the same
        # run would crash and never get dequeued

        search::dequeue -object_id $object_id -event_date $event_date -event $event
    }
    ns_log notice "SEARCH INDEXER END [clock format [clock seconds]]"
XQL Not present:
PostgreSQL
Generic XQL file:
<fullquery name="search::indexer.search_observer_queue_entry">
    <querytext>
            select object_id, event_date, event
            from search_observer_queue
            order by event_date asc limit 50
        </querytext>
</fullquery>
packages/search/tcl/search-procs.xql

Oracle XQL file:
<fullquery name="search::indexer.search_observer_queue_entry">
    <querytext>
            select object_id, event_date, event
            from search_observer_queue
	    where rownum < 100
            order by event_date asc
        </querytext>
</fullquery>
packages/search/tcl/search-procs-oracle.xql

[ hide source ] | [ make this the default ]
Show another procedure: