github::ActivityMonitor method backfill_repo_history (public)

 <instance of github::ActivityMonitor[i]> backfill_repo_history \
    [ -repo repo ] [ -branch branch ] [ -start_page start_page ] \
    [ -max_pages max_pages ]

Defined in packages/xowiki/tcl/github-activity-monitor-procs.tcl

Backfill commit history for a single repo/branch using /repos/{repo}/commits. Uses synthetic negative event_id values. repo e.g. "openacs/openacs-core" branch e.g. "main" or "oacs-5-10"

Switches:
-repo (optional)
-branch (optional, defaults to "main")
-start_page (optional, defaults to "1")
-max_pages (optional, defaults to "50")

Testcases:
No testcase defined.
Source code:
ns_log Notice "GitHub backfill_repo_history: $repo ($branch)"

# We’ll assign event_id from next negative downwards.
set next_event_id [:next_backfill_event_id]

set page $start_page
set pages_done 0
set done 0            
set count 0

while {!$done} {
    if {$pages_done > $max_pages} {
        ns_log notice "GitHub backfill_repo_history: reached"  "max_pages=$max_pages for $repo ($branch)"
        break
    }

    # List commits, newest first
    set body [:get "/repos/$repo/commits" [list sha $branch page $page per_page 100]]
    set batch [::util::json2dict $body]

    if {[llength $batch] == 0} {
        ns_log notice "GitHub backfill_repo_history: no more commits"  at page $page for $repo ($branch)
        break
    }

    ::xo::dc transaction {
        foreach summary $batch {
            set sha [dict get $summary sha]

            # Stop if we already have this commit (from events or prior backfill)
            set exists [xo::dc 0or1row exists_commit {
                select 1 from github_activity where repo = :repo and sha = :sha limit 1
            }]

            if {$exists} {
                # We assume older pages are fully covered as well
                ns_log notice "GitHub backfill_repo_history: found existing commit"  "$repo $sha, stopping at page $page"
                #set done 1
                #break
                continue
            }

            incr count
            
            #
            # Fetch full commit details (stats, files, etc.),
            # similar to summarize_push_event.
            #
            set commit_body   [:get "/repos/$repo/commits/$sha"]                        
            set c             [::util::json2dict $commit_body]
            set commit        [dict get $c commit]
            
            #ns_log notice "commit keys: [dict keys $commit]"
            ns_log notice "(page $page, count $count) commit.message($count):"  [dict get $commit message]
            
            set author_dict   [dict get $commit author]
            set author_name   [dict get $author_dict name]
            set commit_date   [dict get $author_dict date]
            set stats         [dict get $c stats]
            set additions     [dict get $stats additions]
            set deletions     [dict get $stats deletions]
            set files         [dict get $c files]
            set files_changed [llength $files]

            set message_full  [dict get $commit message]

            if {$message_full eq ""} {
                # Fallback: don’t leave title NULL, DB wants NOT NULL
                set title "(no commit message)"
            } else {
                set title [lindex [split [string trimleft $message_full] \n] 0]
            }
            #ns_log notice "commit.title: $title"
            
            if {[dict exists $c html_url]} {
                set url [dict get $c html_url]
            } else {
                # construct GitHub web URL as a fallback
                set url "https://github.com/$repo/commit/$sha"
            }                        
            
            # Synthetic negative event_id
            set event_id $next_event_id
            incr next_event_id -1
            
            set created_at $commit_date   ;# for backfill, we just reuse commit time
            
            ::xo::dc dml insert_backfill {
                insert into github_activity (
                                             event_id, repo, branch, sha,
                                             author_name, author_login,
                                             commit_date, created_at,
                                             title, url,
                                             files_changed, additions, deletions,
                                             raw_payload
                                             ) values (
                                                       :event_id, :repo, :branch, :sha,
                                                       :author_name, NULL,
                                                       :commit_date, :created_at,
                                                       :title, :url,
                                                       :files_changed, :additions, :deletions,
                                                       :commit_body
                                                       )
                on conflict (repo, sha) do nothing
            }
        }
    }

    if {$done} {
        break
    }

    incr page
    incr pages_done
}

ns_log notice "GitHub backfill_repo_history: done for $repo ($branch)"
XQL Not present:
Generic, PostgreSQL, Oracle
[ hide source ] | [ make this the default ]
Show another procedure: