ad_text_to

ad_text_to_html (public)

 ad_text_to_html [ -no_links ] [ -no_lines ] [ -no_quote ] \
    [ -includes_html ] [ -encode ] text

Defined in packages/acs-tcl/tcl/text-html-procs.tcl

Converts plaintext to html. Also translates any recognized email addresses or URLs into a hyperlink.

Switches:

-no_links (optional, boolean): will prevent it from highlighting
-no_lines (optional, boolean)
-no_quote (optional, boolean): will prevent it from HTML-quoting output, so this can be run on semi-HTML input and preserve that formatting. This will also cause spaces/tabs to not be replaced with nbsp's, because this can too easily mess up HTML tags.
-includes_html (optional, boolean): Set this if the text parameter already contains some HTML which should be preserved.
-encode (optional, boolean): This will encode international characters into its html equivalent, like "ü" into ü

Parameters:

text (required)

Authors:

Branimir Dolicki <branimir@arsdigita.com>

Lars Pind <lars@pinds.com>

Created:

19 July 2000

Partial Call Graph (max 5 caller/called nodes):

Testcases:

ad_text_to_html, xowiki_test_cases, create_form_with_form_instance

Source code:

    if { $text eq "" } {
        return ""
    }
    set orig_text $text

    # Convert lines starting with a ">" into blockquotes.
    set text [ad_text_cite_to_blockquote $text]

    if {$includes_html_p} {
        set d [ad_enhanced_text_escape_disallowed $text]
        set text [dict get $d text]
        set tagDict [dict get $d tagDict]
    } else {
        set tagDict ""
    }

    set space_added 0
    set nr_links 0
    if { !$no_links_p } {
        #
        # We start by putting a space in front so our URL/email
        # highlighting will work for URLs/emails right in the
        # beginning of the text.
        #
        set text " $text"
        set space_added 1

        # if something is " http://" or " https://" or "ftp://" we
        # assume it is a link to an outside source.
        #
        # (bd) The only purpose of the markers is to get rid of
        # trailing dots, commas and things like that.  Note the code
        # uses utf-8 codes \u0002 (start of text) and \u0003 (end of
        # text) special chars as marker. Previously, we had \x001 and
        # \x002, which do not work reliably (regsub was missing some
        # entries, probably due to a mess-up of the internal
        # representation).
        #
        set nr_links [regsub -nocase -all  {([^a-zA-Z0-9]+)((http|https|ftp)://[^\(\)\"<>\s]+)} $text  "\\1\u0002\\2\u0003" text]

        # email links have the form xxx@xxx.xxx
        #
        # JCD: don't treat things =xxx@xxx.xxx as email since most
        # common occurrence seems to be in URLs (although VPATH bounce
        # emails like bounce-user=domain.com@sourcehost.com will then
        # not work correctly).  Another tricky case is
        #     http://www.postgresql.org/message-id/20060329203545.M43728@narrowpathinc.com
        # where we do not want turn the @ into a mailto.

        incr nr_links [regsub -nocase -all  {([^a-zA-Z0-9=/.-]+)(mailto:)?([^=\(\)\s:;,@<>/]+@[^\(\)\s.:;,@<>]+[.][^\(\)\s:;,@<>]+)} $text  "\\1\u0002mailto:\\3\u0003" text]

        #
        # Remove marker from URLs that are already HREF=... or SRC=... chunks
        #
        if { $includes_html_p && $nr_links > 0} {
            regsub -nocase -all {((href|src)\s*=\s*['\"]?)\u0002([^\u0003]*)\u0003} $text {\1\3} text
        }
    }

    # At this point, before inserting some of our own <, >, and "'s
    # we quote the ones entered by the user:
    if { !$no_quote_p } {
        set text [ns_quotehtml $text]
    }

    if { $encode_p} {
        set  myChars  {
            ª º À Á Â Ã Ä Å Æ Ç
            È É Ê Ë Ì Í Î Ï Ð Ñ
            Ò Ó Ô Õ Ö Ø Ù Ú Û Ü
            Ý Þ ß à á â ã ä å æ
            ç è é ê ë ì í î ï ð
            ñ ò ó ô õ ö ø ù ú û
            ü ý þ ÿ ¿
        }

        set  myHTML  {
            &ordf; &ordm; &Agrave; &Aacute; &Acirc; &Atilde; &Auml; &Aring; &Aelig; &Ccedil;
            &Egrave; &Eacute; &Ecirc; &Euml; &Igrave; &Iacute; &Icirc; &Iuml; &ETH; &Ntilde;
            &Ograve; &Oacute; &Ocirc; &Otilde; &Ouml; &Oslash; &Ugrave; &Uacute; &Ucirc; &Uuml;
            &Yacute; &THORN; &szlig; &agrave; &aacute; &acirc; &atilde; &auml; &aring; &aelig;
            &ccedil; &egrave; &eacute; &ecirc; &euml; &igrave; &iacute; &icirc; &iuml; &eth;
            &ntilde; &ograve; &oacute; &ocirc; &otilde; &ouml; &oslash; &ugrave; &uacute; &ucirc;
            &uuml; &yacute; &thorn; &yuml; &iquest;
        }

        set map {}
        foreach ch $myChars entity $myHTML {
            lappend map $ch $entity
        }
        set text [string map $map $text]
    }


    # Convert line breaks
    if { !$no_lines_p } {
        if {![info exists tagDict] || ![dict exists $tagDict pre]} {
            set contains_pre ""
        } else {
            set contains_pre "-contains_pre"
        }
        #ns_log notice "... contains_pre <$contains_pre> "
        set text [util_convert_line_breaks_to_html  -includes_html=$includes_html_p  {*}$contains_pre  -- $text]
        #
        # The function strips all leading white space!
        #
        set space_added 0
    }

    if { !$no_quote_p } {
        # Convert every two spaces to an nbsp
        regsub -all -- {  } $text "\\\&nbsp; " text

        # Convert tabs to four nbsp's
        regsub -all -- {\t} $text {\&nbsp;\&nbsp;\&nbsp;\&nbsp;} text
    }

    if { $nr_links > 0} {
        #
        # Move the end of the link before any punctuation marks at the
        # end of the URL.
        #
        regsub -all -- {([\]!?.:;,<>\(\)\}\"'-]+)(\u0003)} $text {\2\1} text

        #
        # Convert the marked links and emails into "<a href=...>..."
        #
        regsub -all -- {\u0002([^\u0003]+?)\u0003} $text {<a href="\1">\1</a>} text

        set changed_back [regsub -all -- {(\u0002|\u0003)} $text {} text]
        if {$includes_html_p} {
            #
            # All markers should be gone now.
            #
            # In case we changed something back (means something is
            # broken in our regexps above), provide a warning, we have
            # to debug.
            #
            if {$changed_back > 0} {
                ad_log warning "Replaced spurious magic marker in ad_text_to_html, orig:\n$orig_text"
            }
        }
    }

    if {$space_added} {
        set text [string range $text 1 end]
    }

    if {[info exists tagDict]} {
        set closeTags 0
        set reason ""
        foreach {tag count} $tagDict {
            if {$count > 0} {
                set reason "count of $tag not 0"
                set closeTags 1
                break
            }
        }
        #ns_log notice "closeTags $closeTags tagDict <$tagDict> includes_html_p $includes_html_p"
    } else {
        set reason "no tag dict"
        set closeTags 1
    }
    if {$closeTags} {
        if {[ns_conn isconnected]} {
            append reason " called in [ns_conn url]?[ns_conn query]"
        }
        ns_log notice "early call closeTags, reason: $reason"
        set text [util_close_html_tags_ns_parsehtml $text]
    }
    return $text

XQL Not present:

Generic, PostgreSQL, Oracle

[ hide source ] | [ make this the default ]