Forum OpenACS Q&A: Forum XML feed
The idea of being able to subscribe to recent posts from the OpenACS forums in my Radio News Aggregator alongside other news feeds seems pretty cool to me.
Any thoughts?
/Simon
-Roberto
I know this isn't the 'right' way to do it, but it worked for me. It's your call whether revising this would be easier than implementing something serverside on openacs.org.
#!/usr/bin/perl
use strict;
use LWP::Simple;
use XML::RSS;
use HTTP::Date;
#use Data::Denter;
#use Carp;
#use File::Slurp;
use HTML::TokeParser;
use URI;
use constant IS_CGI => 1;
my $URL = 'https://openacs.org/bboard/q-and-a.tcl?topic_id=11&topic=OpenACS';;
use constant COPYRIGHT => "";
use constant DESCRIPTION => "OpenACS BBoard";
use constant TITLE => "OpenACS BBoard";
use constant EMAIL => '';
use constant RSS_FILE => 'openacs.rss';
use constant DATE_FILE => 'openacs.dat';
my ($content_type, $document_length, $current_build_time, $expires, $server) = &LWP::Simple::head($URL);
my $RSS_FILE = RSS_FILE;
my %items; # rss items title=>description
my @item_keys;
my $content = &read_file('index.html') || &get( $URL ) or die $!;
#Create a TokeParser object, using our downloaded HTML.
my $stream = HTML::TokeParser->new( \$content ) or die $!;
#For every h4 element, parse out title and description
while ( my $tag = $stream->get_tag("li") ) {
my $permalink = $stream->get_tag("a");
my $link = $permalink->[1]{href} || "--";
last if $link =~ /category=Development/;
my $url = URI->new_abs( $link, $URL );
my $title = $stream->get_trimmed_text("/a"); # use contents of h2 tag for item title
$url =~ s/&/&/g;
# $stream->get_tag('a');
$items{$title} = { title => &clean_url( $title ),
description => $stream->get_trimmed_text('a') . " " . $stream->get_trimmed_text('/a'),
link => $url } ; # go to next table tag (which begins comments)
push @item_keys, $title;
}
#die Denter ( \%items );
if (-e $RSS_FILE) {
if (! &homepage_changed ($current_build_time ) ) {
# redirect to the old file
print &read_file($RSS_FILE);
exit;
}
}
my $rss = new XML::RSS (version => '0.91');
my $current_build_date = time2str($current_build_time);
my $link = $URL;
$link =~ s/&/&/g;
$rss->channel(title => TITLE ,
link => $link,
language => 'en',
description => DESCRIPTION,
# rating => '(PICS-1.1 "http://www.classify.org/safesur1 r (SS~~000 1))',
copyright => COPYRIGHT,
pubDate => $current_build_date,
lastBuildDate => $current_build_date,
managingEditor => EMAIL,
webMaster => EMAIL,
);
for (@item_keys) {
$rss->add_item( %{ $items{$_} } );
}
print "Content-type: text/xml\n\n" if IS_CGI;
print $rss->as_string;
$rss->save($RSS_FILE);
exit;
#utility function for cleaning the feed
sub clean {
my $text = shift;
$text =~ s/read more//gi;
$text;
}
sub clean_url {
$_[0] =~ s/&/&/g;
$_[0];
}
#returns true if homepage has changed since last time we checked
sub homepage_changed {
my $current_build_time = shift;
my $date_file = DATE_FILE;
# my $last_build_time;
#if (-e $date_file) {
my $last_build_time = &read_file($date_file);
#}
if ( ($current_build_time ne $last_build_time) ||
(! -e $date_file)) {
open OUTFILE, "> $date_file";
print OUTFILE $current_build_time;
close OUTFILE;
return 1;
}
return undef;
}
#utility function to slurp a file
sub read_file {
my $file = shift;
local $/;
open INFILE, "< $file" || die $!; #not sure if I want to die here...
my $text = <INFILE>;
close INFILE;
return $text;
}
__END__
=pod
=head1 NAME
rss.pl - takes lars' home page and converts it into an RSS file
=head1 DESCRIPTION
This script can be run in several ways, and also receive it's input in several ways:
If it's run from a directory containing index.html, it will use that file as it's source.
If it can't locate index.html, it will grab a home page using LWP::Simple as it's source.
Once it has the home page, it will parse the news items and transform them into RSS.
It will print the resulting RSS file to STDOUT, and also save it to an RSS file (mono.rss)
It will only reparse the home page if determines that the content has changed (it figures this out by performing a HEAD)
=head1 USAGE
perl rss.pl
OR
install it as a CGI and call http://yourserver/cgi-bin/rss.pl
=head1 DEPENDENCIES
=item *
LWP::Simple
=item *
XML::RSS
=item *
HTTP::Date
=item *
HTML::TokeParser
=head1 REFERENCES
http://www.perl.com/pub/a/2001/11/15/creatingrss.html
=head1 AUTHOR
John Sequeira
5/2002
mailto:johnseq@pobox.com
=cut