Forum OpenACS Development: File Storage/Content Repository Delete Perfomance Upgrade

As you may already know, using loops inside transactions causes significant performance issues.  This has caused issues for us, because we handle a large volume of files, so I've attempted to rewrite the delete functions for PG to use fewer loops in the transaction and improve performance.

This code includes a rewrite of content_item__del and content_folder__del, but it also adds a new content_item__del(integer[]) to accept an array of item_ids that can be used in any(), which is used by the new content_folder__del.  I've also included a function, fs_child_objects which basically just returns all the child objects based on the treesort_key.  This last function will only work in 8.3 or newer because it returns a query() for the setof.  Preliminary testing indicates that this code doubles the speed of the deletes for about 1000 items, but will likely be much more beneficial with higher volumes.

This will probably paste ugly, but the code should work fine.
----- Code Below ----
create or replace function content_item__del (integer)
  returns integer as '
  declare
    delete__item_id                alias for $1;
    v_symlink_val                  record;
    v_revision_val                record;
    v_rel_val                      record;
    v_revision_id          integer;
    v_revision_id_hold          varchar;
    sql                  varchar;
  begin

  -- Delete symlinks
  delete from cr_items where item_id in (select symlink_id from cr_symlinks where target_id = delete__item_id);
  delete from cr_symlinks where symlink_id in (select symlink_id from cr_symlinks where target_id = delete__item_id);
  delete from acs_attribute_values where object_id in (select symlink_id from cr_symlinks where target_id = delete__item_id);
  delete from acs_permissions where object_id in (select symlink_id from cr_symlinks where target_id = delete__item_id) or
  grantee_id in (select symlink_id from cr_symlinks where target_id = delete__item_id);
  delete from acs_objects where context_id in (select symlink_id from cr_symlinks where target_id = delete__item_id);
  delete from acs_objects where object_id in (select symlink_id from cr_symlinks where target_id = delete__item_id);


  delete from cr_release_periods where item_id = delete__item_id;
  delete from cr_item_template_map where item_id = delete__item_id;

  -- Delete Relations
  delete from acs_attribute_values where object_id in (select rel_id from cr_item_rels where item_id = delete__item_id or related_object_id = delete__item_id);
  delete from acs_permissions where object_id in (select rel_id from cr_item_rels where item_id = delete__item_id) or grantee_id in (select rel_id from cr_item_rels where item_id = delete__item_id);
  delete from acs_objects where context_id in (select rel_id from cr_item_rels where item_id = delete__item_id);
  delete from acs_objects where object_id in (select rel_id from cr_item_rels where item_id = delete__item_id);

  delete from acs_attribute_values where object_id in (select rel_id from cr_item_rels where item_id = delete__item_id or related_object_id = delete__item_id);
  delete from acs_permissions where object_id in (select rel_id from cr_item_rels where item_id = delete__item_id) or grantee_id in (select rel_id from cr_item_rels where item_id = delete__item_id);
  delete from acs_objects where context_id in (select rel_id from cr_item_rels where item_id = delete__item_id);
  delete from acs_objects where object_id in (select rel_id from cr_item_rels where item_id = delete__item_id);

  for v_rel_val in select child_id from  cr_child_rels  where  parent_id = delete__item_id
  loop
    perform content_item__del(v_rel_val.child_id);
  end loop;


  -- Delete Revisions
  delete from cr_item_publish_audit where old_revision in (select revision_id from cr_revisions where item_id = delete__item_id) or
  new_revision in (select revision_id from cr_revisions where item_id = delete__item_id);
  update cr_items set live_revision=NULL, latest_revision=NULL where item_id= delete__item_id;
  delete from acs_attribute_values where object_id in (select revision_id from cr_revisions where item_id = delete__item_id);
  delete from acs_permissions where object_id in (select revision_id from cr_revisions where item_id = delete__item_id) or
  grantee_id in (select revision_id from cr_revisions where item_id = delete__item_id);
  select array_to_string((select array(select revision_id from cr_revisions where item_id = delete__item_id)),'','') into v_revision_id_hold;
  if v_revision_id_hold != '''' then
    sql:=''delete from acs_objects where context_id in ('' || v_revision_id_hold || '');'';
    execute sql;
    sql:=''delete from acs_objects where object_id in ('' || v_revision_id_hold || '');'';
    execute sql;
    sql:=''delete from cr_revisions where item_id in ('' || v_revision_id_hold || '');'';
    execute sql;
  end if;


  delete from acs_permissions where object_id = delete__item_id;
  delete from cr_item_keyword_map where item_id = delete__item_id;
  perform journal_entry__delete_for_object(delete__item_id);

  -- Delete item object
  delete from acs_objects where context_id = delete__item_id;
  delete from acs_objects where object_id = delete__item_id;
  delete from acs_attribute_values where object_id = delete__item_id;
  delete from acs_permissions where object_id = delete__item_id or
  grantee_id = delete__item_id;
  delete from acs_objects where context_id = delete__item_id;

  return 0;
end;' language 'plpgsql';

create or replace function content_item__del (integer[])
  returns integer as '
  declare
    delete__item_id                alias for $1;
    v_symlink_val                  record;
    v_revision_val                record;
    v_rel_val                      record;
    v_child_ids                    integer[];
    v_revision_id                  integer;
    v_revision_id_hold            varchar;
    sql                            varchar;
    i                              integer;
    j                              integer;
    journal_rec                    record;
  begin
  if array_lower(delete__item_id,1) is null then
    return 0;
  end if;

  -- Delete symlinks
  delete from cr_items where item_id in (select symlink_id from cr_symlinks where target_id = any(delete__item_id));
  delete from cr_symlinks where symlink_id in (select symlink_id from cr_symlinks where target_id = any(delete__item_id));
  delete from acs_attribute_values where object_id in (select symlink_id from cr_symlinks where target_id = any(delete__item_id));
  delete from acs_permissions where object_id in (select symlink_id from cr_symlinks where target_id = any(delete__item_id)) or
  grantee_id in (select symlink_id from cr_symlinks where target_id = any(delete__item_id));
  delete from acs_objects where context_id in (select symlink_id from cr_symlinks where target_id = any(delete__item_id));
  delete from acs_objects where object_id in (select symlink_id from cr_symlinks where target_id = any(delete__item_id));

  delete from cr_release_periods where item_id = any(delete__item_id);
  delete from cr_item_template_map where item_id = any(delete__item_id);

  -- Delete Relations
  delete from acs_attribute_values where object_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id) or related_object_id = any(delete__item_id));
  delete from acs_permissions where object_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id)) or grantee_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id));
  delete from acs_objects where context_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id));
  delete from acs_objects where object_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id));

  delete from acs_attribute_values where object_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id) or related_object_id = any(delete__item_id));
  delete from acs_permissions where object_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id)) or grantee_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id));
  delete from acs_objects where context_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id));
  delete from acs_objects where object_id in (select rel_id from cr_item_rels where item_id = any(delete__item_id));

  perform content_item__del((select array(select child_id from  cr_child_rels  where parent_id = any(delete__item_id))));


  -- Delete Revisions
  delete from cr_item_publish_audit where old_revision in (select revision_id from cr_revisions where item_id = any(delete__item_id)) or
  new_revision in (select revision_id from cr_revisions where item_id = any(delete__item_id));
  update cr_items set live_revision=NULL, latest_revision=NULL where item_id = any(delete__item_id);
  delete from acs_attribute_values where object_id in (select revision_id from cr_revisions where item_id = any(delete__item_id));
  delete from acs_permissions where object_id in (select revision_id from cr_revisions where item_id = any(delete__item_id)) or
  grantee_id in (select revision_id from cr_revisions where item_id = any(delete__item_id));
  select array_to_string((select array(select revision_id from cr_revisions where item_id = any(delete__item_id))),'','') into v_revision_id_hold;
  if v_revision_id_hold != '''' then
    sql:=''delete from acs_objects where context_id in ('' || v_revision_id_hold || '');'';
    execute sql;
    sql:=''delete from acs_objects where object_id in ('' || v_revision_id_hold || '');'';
    execute sql;
    sql:=''delete from cr_revisions where item_id in ('' || v_revision_id_hold || '');'';
    execute sql;
  end if;


  delete from acs_permissions where object_id = any(delete__item_id);
  delete from cr_item_keyword_map where item_id = any(delete__item_id);
  for journal_rec in select journal_id from journal_entries where object_id = any(delete__item_id)
    loop
      perform journal_entry__delete(journal_rec.journal_id);
    end loop;


  -- Delete item object
  delete from acs_objects where context_id = any(delete__item_id);
  delete from acs_objects where object_id = any(delete__item_id);
  delete from acs_attribute_values where object_id = any(delete__item_id);
  delete from acs_permissions where object_id = any(delete__item_id) or
  grantee_id = any(delete__item_id);
  delete from acs_objects where context_id = any(delete__item_id);

  return 0;
end;' language 'plpgsql';

create or replace function content_folder__del (integer, boolean)
returns integer as '
declare
  delete__folder_id              alias for $1;
  p_cascade_p                    alias for $2; -- default ''f''
  v_count                        integer;
  v_child_row                    record;
  v_path                        varchar;
  v_folder_sortkey              varbit;
begin
  if p_cascade_p = ''f'' then
    select count(*) into v_count from cr_items
    where parent_id = delete__folder_id;
    -- check if the folder contains any items
    if v_count > 0 then
      v_path := content_item__get_path(delete__folder_id, null);
      raise EXCEPTION ''-20000: Folder ID % (%) cannot be deleted because it is not empty.'', delete__folder_id, v_path;
    end if;
  else
  -- delete children
    perform content_item__del((select array((select subtree.object_id
        from acs_objects parent, acs_objects subtree
        where subtree.tree_sortkey between parent.tree_sortkey and tree_right(parent.tree_sortkey)
        and parent.tree_sortkey <> subtree.tree_sortkey
        and parent.object_id = delete__folder_id and subtree.object_type = ''content_item''))));



    select into v_folder_sortkey tree_sortkey
    from cr_items where item_id=delete__folder_id;

    for v_child_row in select subtree.object_id
        from acs_objects parent, acs_objects subtree
        where subtree.tree_sortkey between parent.tree_sortkey and tree_right(parent.tree_sortkey)
        and parent.tree_sortkey <> subtree.tree_sortkey
        and parent.object_id = delete__folder_id and subtree.object_type = ''content_folder''
        order by subtree.tree_sortkey desc
    loop
      perform content_folder__del(v_child_row.object_id,''t'');
    end loop;
  end if;

  perform content_folder__unregister_content_type(
      delete__folder_id,
      ''content_revision'',
      ''t''
  );

  delete from cr_folder_type_map
    where folder_id = delete__folder_id;

  perform content_item__del(delete__folder_id);
  return 0;
end;' language 'plpgsql';

create or replace function fs_child_objects(integer) returns setof integer
    AS '
    declare
    fs__object_id      alias for $1;

    begin
    return query(select subtree.object_id
              from acs_objects parent, acs_objects subtree
              where subtree.tree_sortkey between parent.tree_sortkey and tree_right(parent.tree_sortkey)
              and parent.tree_sortkey <> subtree.tree_sortkey
              and parent.object_id = fs__object_id and subtree.object_type in (''content_item'',''content_folder'',''file_storage_object'')
            );
    end;' language 'plpgsql';

I tried this on a CSV HEAD (5.6.0 development) new install with file storage. I created file storage 131,000 items with the populate package.

The delete took 35 mins 50 seconds which is around 0.16 seconds per item. This is about a 6x improvement from the default openacs functions.