| 1 | ########################################################################### |
|---|
| 2 | # whatbot/Command/RSS.pm |
|---|
| 3 | ########################################################################### |
|---|
| 4 | # Monitor a RSS feed |
|---|
| 5 | ########################################################################### |
|---|
| 6 | # the whatbot project - http://www.whatbot.org |
|---|
| 7 | ########################################################################### |
|---|
| 8 | |
|---|
| 9 | package whatbot::Command::RSS; |
|---|
| 10 | use Moose; |
|---|
| 11 | BEGIN { extends 'whatbot::Command' } |
|---|
| 12 | |
|---|
| 13 | use Data::Dumper; |
|---|
| 14 | use Digest::MD5 'md5_hex'; |
|---|
| 15 | use LWP::UserAgent (); |
|---|
| 16 | use XML::Simple; |
|---|
| 17 | use namespace::autoclean; |
|---|
| 18 | |
|---|
| 19 | has 'ua' => ( is => 'ro', isa => 'LWP::UserAgent', default => sub { |
|---|
| 20 | LWP::UserAgent->new( 'agent' => 'Mozilla/5.0', 'timeout' => 10 ); |
|---|
| 21 | } ); |
|---|
| 22 | has 'last_entry' => ( is => 'rw', isa => 'HashRef', default => sub { {} } ); |
|---|
| 23 | has 'last_check' => ( is => 'rw' ); |
|---|
| 24 | has 'feeds' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); |
|---|
| 25 | |
|---|
| 26 | sub register { |
|---|
| 27 | my ( $self ) = @_; |
|---|
| 28 | |
|---|
| 29 | $self->command_priority('Extension'); |
|---|
| 30 | $self->require_direct(0); |
|---|
| 31 | |
|---|
| 32 | if ( $self->{'my_config'} ) { |
|---|
| 33 | if ( $self->my_config->{'feed'} ) { |
|---|
| 34 | my $feeds = $self->my_config->{'feed'}; |
|---|
| 35 | $feeds = [ $feeds ] unless ( ref($feeds) eq 'ARRAY' ); |
|---|
| 36 | |
|---|
| 37 | foreach my $feed (@$feeds) { |
|---|
| 38 | next unless ( $feed->{'io'} ); |
|---|
| 39 | my $md5 = md5_hex( $feed->{'url'} ); |
|---|
| 40 | $feed->{'md5'} = $md5; |
|---|
| 41 | my $last_entry = $self->model('Soup')->get($md5); |
|---|
| 42 | if ($last_entry) { |
|---|
| 43 | $last_entry =~ s/^.*?\{/\{/; |
|---|
| 44 | $self->last_entry->{$md5} = eval "$last_entry"; |
|---|
| 45 | } |
|---|
| 46 | push( @{ $self->feeds }, $feed ); |
|---|
| 47 | } |
|---|
| 48 | if ( scalar(@{ $self->feeds }) ) { |
|---|
| 49 | $self->timer->enqueue( 10, \&retrieve_rss, $self ); |
|---|
| 50 | } else { |
|---|
| 51 | $self->log->write('RSS: Config found, but one or more feeds are missing a feed URL. Skipping.'); |
|---|
| 52 | } |
|---|
| 53 | } else { |
|---|
| 54 | $self->log->write('RSS: Config found, but missing a feed. Skipping.'); |
|---|
| 55 | } |
|---|
| 56 | } else { |
|---|
| 57 | $self->log->write('RSS: No config found, skipping.'); |
|---|
| 58 | } |
|---|
| 59 | } |
|---|
| 60 | |
|---|
| 61 | sub retrieve_rss : Command { |
|---|
| 62 | my ( $self ) = @_; |
|---|
| 63 | |
|---|
| 64 | foreach my $feed (@{ $self->feeds }) { |
|---|
| 65 | my $response = $self->ua->get( $feed->{'url'} ); |
|---|
| 66 | |
|---|
| 67 | if ( $response->is_success ) { |
|---|
| 68 | my $xml_doc; |
|---|
| 69 | eval { |
|---|
| 70 | $xml_doc = XMLin( $response->content ); |
|---|
| 71 | }; |
|---|
| 72 | if ($@) { |
|---|
| 73 | $self->last_check({ |
|---|
| 74 | 'stamp' => scalar( localtime(time) ), |
|---|
| 75 | 'status' => 'Error parsing feed: ' . $@ |
|---|
| 76 | }); |
|---|
| 77 | } else { |
|---|
| 78 | my $last_entry; |
|---|
| 79 | my @items = reverse( @{ $xml_doc->{'channel'}->{'item'} } ) if ( $xml_doc->{'channel'}->{'item'} and ref( $xml_doc->{'channel'}->{'item'} ) eq 'ARRAY' ); |
|---|
| 80 | if ( $self->last_entry->{ $feed->{'md5'} } and $self->last_entry->{ $feed->{'md5'} }->{'guid'} ) { |
|---|
| 81 | $last_entry = $self->last_entry->{ $feed->{'md5'} }->{'guid'}; |
|---|
| 82 | } else { |
|---|
| 83 | my $last_num = ( scalar(@items) > 1 ? scalar(@items) - 2 : scalar(@items) - 1 ); |
|---|
| 84 | $last_entry = ( ref( $items[$last_num]->{'guid'} ) eq 'HASH' ? $items[$last_num]->{'guid'}->{'content'} : $items[$last_num]->{'guid'} ); |
|---|
| 85 | } |
|---|
| 86 | |
|---|
| 87 | my $seen_last; |
|---|
| 88 | foreach my $item (@items) { |
|---|
| 89 | my $guid = ( ref( $item->{'guid'} ) eq 'HASH' ? $item->{'guid'}->{'content'} : $item->{'guid'} ); |
|---|
| 90 | if ( $guid eq $last_entry ) { |
|---|
| 91 | $seen_last++; |
|---|
| 92 | } elsif ($seen_last) { |
|---|
| 93 | my $entry = { |
|---|
| 94 | 'guid' => $guid, |
|---|
| 95 | 'text' => ( $item->{'title'} or $item->{'description'} ), |
|---|
| 96 | 'url' => $item->{'link'} |
|---|
| 97 | }; |
|---|
| 98 | if ( $feed->{'include'} ) { |
|---|
| 99 | my $include = $feed->{'include'}; |
|---|
| 100 | next unless ( $entry->{'text'} =~ /$include/ ); |
|---|
| 101 | } |
|---|
| 102 | if ( $feed->{'exclude'} ) { |
|---|
| 103 | my $exclude = $feed->{'exclude'}; |
|---|
| 104 | next unless ( $entry->{'text'} =~ /$exclude/ ); |
|---|
| 105 | } |
|---|
| 106 | $self->last_entry->{ $feed->{'md5'} } = $entry; |
|---|
| 107 | my $message = whatbot::Message->new( |
|---|
| 108 | 'to' => '', |
|---|
| 109 | 'from' => '', |
|---|
| 110 | 'content' => '[RSS] ' . $xml_doc->{'channel'}->{'title'} . ': ' . $entry->{'text'} . ' (' . $entry->{'url'} . ')', |
|---|
| 111 | 'base_component' => $self->parent->base_component |
|---|
| 112 | ); |
|---|
| 113 | $self->ios->{ $feed->{'io'} }->send_message($message); |
|---|
| 114 | |
|---|
| 115 | $Data::Dumper::Indent = 0; |
|---|
| 116 | $self->model('Soup')->set( $feed->{'md5'}, Data::Dumper::Dumper($entry) ); |
|---|
| 117 | $self->last_check({ |
|---|
| 118 | 'stamp' => scalar( localtime(time) ), |
|---|
| 119 | 'status' => 'Successfully retrieved ' . scalar(@items) . ' from ' . $xml_doc->{'channel'}->{'title'} |
|---|
| 120 | }); |
|---|
| 121 | } |
|---|
| 122 | } |
|---|
| 123 | } |
|---|
| 124 | } else { |
|---|
| 125 | $self->last_check({ |
|---|
| 126 | 'stamp' => scalar( localtime(time) ), |
|---|
| 127 | 'status' => 'Error retrieving feed: ' . $response->status_line |
|---|
| 128 | }); |
|---|
| 129 | } |
|---|
| 130 | } |
|---|
| 131 | $self->timer->enqueue( ( $self->my_config->{'interval'} or 60 ), \&retrieve_rss, $self ); |
|---|
| 132 | return; |
|---|
| 133 | } |
|---|
| 134 | |
|---|
| 135 | sub status : Command { |
|---|
| 136 | my ( $self, $message ) = @_; |
|---|
| 137 | |
|---|
| 138 | return ( $self->last_check ? 'Last checked on ' . $self->last_check->{'stamp'} . ', status: ' . $self->last_check->{'status'} : 'No valid check found.' ); |
|---|
| 139 | } |
|---|
| 140 | |
|---|
| 141 | sub last : Command { |
|---|
| 142 | my ( $self, $message ) = @_; |
|---|
| 143 | |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | sub help { |
|---|
| 147 | return 'RSS monitors an RSS feed for new entries. You can retrieve the last entry using "rss last", and get the status of the monitor with "rss status".'; |
|---|
| 148 | } |
|---|
| 149 | |
|---|
| 150 | __PACKAGE__->meta->make_immutable; |
|---|
| 151 | |
|---|
| 152 | 1; |
|---|
| 153 | |
|---|
| 154 | =pod |
|---|
| 155 | |
|---|
| 156 | =head1 NAME |
|---|
| 157 | |
|---|
| 158 | whatbot::Command::RSS - Monitor RSS feeds |
|---|
| 159 | |
|---|
| 160 | =head1 SYNOPSIS |
|---|
| 161 | |
|---|
| 162 | Config: |
|---|
| 163 | |
|---|
| 164 | <rss> |
|---|
| 165 | <interval>480</interval> |
|---|
| 166 | <feed> |
|---|
| 167 | <url><![CDATA[http://www.whatbot.org/timeline?format=rss]]></url> |
|---|
| 168 | <include>^Changeset</include> |
|---|
| 169 | <io>IRC_irc.exampleserver.com_#example</io> |
|---|
| 170 | </feed> |
|---|
| 171 | </rss> |
|---|
| 172 | |
|---|
| 173 | =head1 DESCRIPTION |
|---|
| 174 | |
|---|
| 175 | whatbot::Command::RSS will monitor one or more RSS feeds for you. Feeds must be |
|---|
| 176 | valid XML, or they won't parse properly. Configure your RSS feeds in |
|---|
| 177 | whatbot.conf under commands -> rss. The 'interval' node is the number of |
|---|
| 178 | seconds between each check, and one or more feed nodes should represent each |
|---|
| 179 | feed to be checked. The feed node requires the 'url' and 'io' nodes, and can |
|---|
| 180 | include 'include' and 'exclude'. url is the full, valid http URL to the feed |
|---|
| 181 | you want to grab, and io is the name of the io node to output the results to. |
|---|
| 182 | If you don't name your IO nodes, the name is auto-generated for you by the IO |
|---|
| 183 | module, so each one would be slightly different. For instance, a IRC channel |
|---|
| 184 | would be IRC_<hostname>_<channel>, like IRC_irc.efnet.org_#whatbot, or AIM |
|---|
| 185 | would be AIM_<screenname>, like AIM_aimwhatbot. The include node is a regex |
|---|
| 186 | to selectively include certain entries based on the title or description. |
|---|
| 187 | The exclude node does the same thing, but filters items out based on the |
|---|
| 188 | regex. In the synopsis, we get the whatbot change log via Trac, but only |
|---|
| 189 | include SVN commits. |
|---|
| 190 | |
|---|
| 191 | =head1 INHERITANCE |
|---|
| 192 | |
|---|
| 193 | =over 4 |
|---|
| 194 | |
|---|
| 195 | =item whatbot::Component |
|---|
| 196 | |
|---|
| 197 | =over 4 |
|---|
| 198 | |
|---|
| 199 | =item whatbot::Command |
|---|
| 200 | |
|---|
| 201 | =over 4 |
|---|
| 202 | |
|---|
| 203 | =item whatbot::Command::RSS |
|---|
| 204 | |
|---|
| 205 | =back |
|---|
| 206 | |
|---|
| 207 | =back |
|---|
| 208 | |
|---|
| 209 | =back |
|---|
| 210 | |
|---|
| 211 | =head1 LICENSE/COPYRIGHT |
|---|
| 212 | |
|---|
| 213 | Be excellent to each other and party on, dudes. |
|---|
| 214 | |
|---|
| 215 | =cut |
|---|