Forum OpenACS Q&A: Re: OpenACS Knoppix CD: Data needed

Collapse
Posted by Frank Bergmann on
Hi Malte,

I had the same problem to prepare demo data for Project/Open. As a solution, I've created an "Anonymizer" script to be able to "anonymized" (=replace strings with random characters) the data from a productive system. I don't know if that's of help for you. I've attached the script below.

Bests,
Frank

# /www/intranet/anonymize.tcl

ad_page_contract {
    Changes all clients, users, prices etc to allow
    to convert a productive system into a demo.
} {
    { return_url "" }
}

if {![string equal "true" [ad_parameter TestDemoDevServer "" false]]} {
    ad_return_complaint 1 "<LI>This is not a Test/Demo/Development server.<BR>
    So you probably don't want to destroy all data, right?!?<br>&nbsp;<br>
    If this IS a Test/Demo/Development server, then check '/parameters/*.ini'
    and set the TestDemoDevServer flag to 'true'."
    return
}

set user_id [ad_maybe_redirect_for_registration]

ad_proc anonymize_name { org_string } {
    Replace org string letter with random letter to
    anonymize names.
    Returns the anonymized string.
} {
    set word_list [split $org_string " "]
    set result_list [list]
    foreach word $word_list {
    lappend result_list [anonymize_word $word]
    }
    return [join $result_list " "]
}

ad_proc anonymize_email { org_email } {
    Replace the email with an anonymized version
} {
    if {[regexp {([^@]*)\@(.*)} $org_email match name domain]} {
    set name_mod [anonymize_word $name]
    set domain_mod [anonymize_word $domain]
    return "$name_mod@$domain_mod"
    } else {
    ns_log Notice "bad email: $org_email"
    return "mailto:nobody@nowhere.com";
    }
}

ad_proc anonymize_url { org_url } {
    Replace the url with an anonymized version
} {
    return [anonymize_word $org_url]
}

ad_proc anonymize_word { org_word } {
    Anonymizes a single word. This allows to
    preserve "Inc.", "S.L." etc
} {
    set len [string length $org_word]
    set result ""
    for {set i 0} {$i < $len} {incr i} {
    set org_char [string range $org_word $i $i]
    set anon_char [anonymize_char $org_char]
    set result "$result$anon_char"
    }
#    ns_log Notice "anonymize_word: $org_word => $result"
    return $result
}

ad_proc anonymize_char { org_char } {
    Anonymizes a single character
} {
    # 0123456789 -> 0123456789
    # aeiouy -> aeiouy
    # bdgkpqt -> bdgkpqt
    # cjsxz -> cjsxz
    # mn -> mn
    # fvw -> fvw
    # h -> {}
    # lr -> lr

    set pos [string last $org_char "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
    if {$pos >= 0} {
    set org_char [string range "abcdefghijklmnopqrstuvwxyz" $pos $pos]
    }

    set res $org_char
    if {[string last $org_char {'"}]>=0} {set res ""}
    if {[string last $org_char "0123456789"]>=0} {set res [pick_char "0123456789"]}
    if {[string last $org_char "aeiouy"]>=0} {set res [pick_char "aeiouy"]}
    if {[string last $org_char "bdgkpqt"]>=0} {set res [pick_char "bdgkpqt"]}
    if {[string last $org_char "cjsxz"]>=0} {set res [pick_char "cjsxz"]}
    if {[string last $org_char "mn"]>=0} {set res [pick_char "mn"]}
    if {[string last $org_char "fvw"]>=0} {set res [pick_char "fvw"]}
    if {[string last $org_char "lr"]>=0} {set res [pick_char "lr"]}

    if {$pos >= 0} {
    set pos [string last $res "abcdefghijklmnopqrstuvwxyz"]
    set res [string range "ABCDEFGHIJKLMNOPQRSTUVWXYZ" $pos $pos]
    }

    return $res
}

ad_proc pick_char { char_set } {
    Picks a random char from char_set
} {
    set len [string length $char_set]
    set pos [expr round(1000*rand()) % $len]
    set result [string range $char_set $pos $pos]
    return $result
}

# ---------------------- im_projects -------------------------------

set im_projects_sql "
select
    group_id,
    description,
    note,
    customer_project_nr,
    final_customer
from
    im_projects"

db_foreach im_projects_select $im_projects_sql {

    set im_projects_update_sql "
    update im_projects set
        description='[anonymize_name $description]',
        note='[anonymize_name $note]',
        customer_project_nr='[anonymize_name $customer_project_nr]',
        final_customer='[anonymize_name $final_customer]'
    where group_id=:group_id"

    db_dml im_projects_update $im_projects_update_sql
}

# ---------------------- im_tasks -------------------------------

set im_tasks_sql "
select
    task_id,
    task_name,
    description
from
    im_tasks"

db_foreach im_tasks_select $im_tasks_sql {

    set im_tasks_update_sql "
    update im_tasks set
        task_name='[anonymize_name $task_name]',
        description='[anonymize_name $description]'
    where task_id=:task_id"

    db_dml im_tasks_update $im_tasks_update_sql
}

    set user_password_update_sql "
    update users
    set password='xxx'
    "

    db_dml user_password_update $user_password_update_sql

[... and so on for all tables with critical data]

if {"" != $return_url} {
    ad_return_redirect $return_url
} else {
    set page_body "<H1>Anonymize</H1>Successfully finished"
    doc_return  200 text/html [im_return_template]
}