# Blosxom Plugin: atomfeed -*-cperl-*- # Author(s): Rael Dornfest # Version: beta-5 # Blosxom Home/Docs/Licensing: http://www.raelity.org/apps/blosxom/ # Blosxom Plugin Docs: http://www.raelity.org/apps/blosxom/plugin.shtml # Upgrade to Atom 1.0 by Sam Pearson http://sgp.me.uk/ # Incorporates code from the enclosures plugin by Dave Slusher and Keith Irwin # Please see http://sgp.me.uk/sam/2005/07/20/atom-1-beta for more information package atomfeed; # ----- Configurable variables ----- # $default_author and $feed_yr are important and should be set to meaningful values. # The other variables can be safely left alone, or changed as required. # Who would you like your feed to credit as the default author of each entry? # Leave blank and the atomfeed plugin will attempt to use the whoami and # fauxami plugins $default_author = ''; # What year was your weblog started? This will be used # to form part of your weblog's unique ID. $feed_yr = ''; # ----- # What is the default author's URI? # Blank defaults to $blosxom::url $author_uri = ''; # What is the default author's email address? # Leave blank to exclude. $author_email = ''; # Copyright statement: # leave blank to exclude. $copyright = ''; # What domain should Blosxom use in ID tags? # Leave blank if you don't understand or for Blosxom to use the domain in $url. $id_domain = ''; # Feed url # Set your feed url here. Defaults to $blosxom::url/index.atom $feed_url = ''; # What template placeholder in your flavour template should I replace with feed-level ? my $template_placeholder = "{{{updated}}}"; # Enclosures support # ------------------ # # You can add enclosures to your atom feed by linking to them in your post # and giving the anchor tag a class attribute of atom-enclosure. # Set $use_full_enclosures to 1 if you wish to add length and content-type # to your enclosures. This function relies upon your webserver having # LWP modules installed. $use_full_enclosures = '0'; # Name of a file to cache info about your enclosures: $DataFile = "$blosxom::plugin_state_dir/enclosures.dat"; # ----- END OF CONFIGURABLE VARIABLES ----- # --- Plug-in package variables ----- $author = ''; $T = 'T'; $colon = ':'; $zerozero = '00'; # URL where an atom.css might reside $css_url = "$blosxom::url/atom.css"; # Try to glean the domain from $url $id_domain or ($id_domain) = $blosxom::url =~ m#http://(?:www\.)?([^\/]+)#; # Feed url chacking: $feed_url or $feed_url = "$blosxom::url/index.atom"; # Glean year for feed id: now set in configurable variables #$feed_yr = (gmtime(time))[5] + 1900; $utc_date = ''; $feed_utc_date = ''; use vars qw/$feed_utc_date/; $category; $links; $copyright and $copyright = "$copyright"; $author_uri or $author_uri = "$blosxom::url"; $author_uri = "$author_uri"; $author_email and $author_email = "\n $author_email"; # ----- plugin subroutines ----- sub start { # Check for the existence of already-loaded flavour templates or theme, # loading templates if there's nothing: # Note that it looks like this condition should *never* be met, so why # did Rael put this code here? Can't we just do _load_templates(); $blosxom::template{'atom'}{'head'} or _load_templates(); # changed to require from use to make plugin work for those # without XML::Parser. Consequence: entries will never be labelled # type='xhtml', only 'text' or 'html'. Thanks, S2! eval { require XML::Parser; $parser = new XML::Parser; }; %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); $escape_re = join '|' => keys %escape; # If required, initialise the enclosures data cache: $use_full_enclosures and _load_cache(); 1; } sub head { # Check and prepare a and <subtitle>: ($blog_title_type, $blog_title) = _parse_markup($blosxom::blog_title); ($blog_description_type, $blog_description) = _parse_markup($blosxom::blog_description); 1; } sub story { my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; use File::stat; # set up <category>: if ( $path ) { $category = "<category term=\"$path\"/>"; } # <published>: derive from %blosxom::files my @published_utc = gmtime($blosxom::files{"$blosxom::datadir$path/$filename.$blosxom::file_extension"}); $published_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z", $published_utc[5]+1900, $published_utc[4]+1, $published_utc[3], $published_utc[2], $published_utc[1]); # <updated>: derive by stat()ing the file for its mtime: my @updated_utc = gmtime(stat("$blosxom::datadir$path/$filename.$blosxom::file_extension")->mtime); $updated_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z", $updated_utc[5]+1900, $updated_utc[4]+1, $updated_utc[3], $updated_utc[2], $updated_utc[1]); # Date/time of most recently-modified story becomes date/time of the feed. $feed_utc_date = $updated_utc_date if $updated_utc_date > $feed_utc_date; # use %blosxom::files for the year component of feed-level <atom:id> # in case the creation time is cached somewhere. $utc_yr = $published_utc[5]+1900; # Set authorship if available, falling back to $atomfeed::author $author = $whoami::fullname || $fauxami::name || $default_author || ''; # take look through $$boy_ref for any enclosures or via/related links: my @anchors = ( $$body_ref =~ /(<a [^>]+>)/gis ); $links = "\n"; foreach my $anchor ( @anchors ) { if ( $anchor =~ /class\s*=\s*"?\s*atom-(via|enclosure|related)"?/is ) { my( $type, $href ); $type = $1; if ( $anchor =~ /href\s*=\s*"([^"]+)"/is ) { $href = $1; } elsif ( $anchor =~ /href\s*=\s*([^\s]+)/is ) { $href = $1; } if ( $href ){ $href =~ s/\s//g; if ( $use_full_enclosures && ( $type eq "enclosure" ) ) { my( $mime, $length ); # Check for presence of enclosure in $info: unless ( $info->{$href} ) { _get_info($href); }; if ( $info->{$href} ) { # Check again for data on enclosure in $info, just in case of problems getting it. $mime = $info->{$href}->{type}; $length = $info->{$href}->{length}; $links .= " <link rel=\"$type\" href=\"$href\" type=\"$mime\" length=\"$length\"/>\n"; } else { # Fall back on a basic link: $links .= " <link rel=\"$type\" href=\"$href\"/>\n"; } } else { # Basic link: $links .= " <link rel=\"$type\" href=\"$href\"/>\n"; } } } } # Parse post title: ($title_type, $title) = _parse_markup($$title_ref); # Parse the post body: ($body_type, $body) = _parse_markup($$body_ref); 1; } sub foot { my($pkg, $currentdir, $foot_ref) = @_; # Replace the placeholder with the feed-level <updated> element: $feed_utc_date = "<updated>$feed_utc_date</updated>"; $blosxom::output =~ s/$template_placeholder/$feed_utc_date/m; return 1; } # ----- private subroutines ----- sub _parse_markup { # Pass in some test to parse, and I'll return a type and the text suitably configured. my $text = shift; my $type; # First, check to see if $text appears to contain markup. # This regex should match any tag-like string: opening, closing or orphan tags. if ( $text =~ m!</?[a-zA-Z0-9]+ ?/?>! ) { # OK, looks like markup in there. # Now, check to see if it looks well-formed: if ( eval{$parser->parse("<div>$text</div>")}) { # Yes? XHTML it is, then. I hope. $type = 'xhtml'; $text = "<div xmlns=\"http://www.w3.org/1999/xhtml\">$text</div>"; } else { # No? Good old tag soup. $type = 'html'; $text =~ s/($escape_re)/$escape{$1}/g; } } else { # We'll assume it's plaintext then. $type = 'text'; } # Out go the results: return $type, $text; } sub _load_cache { # Loads the data stored in $DataFile: $info = {}; #open data file local *FH; if( -e "$DataFile") { open FH, "$DataFile" or return $info; } flock(FH, 2); while (<FH>) { chomp ($_); my ($url, $size, $type) = split (/ /, $_); $info->{$url}->{length} = $size; $info->{$url}->{type} = $type; } close (FH); return $info; } sub _save_cache { # Saves enclosure data structure in $info out to $DataFile local *FH; open FH, ">$DataFile" or return 0; flock(FH, 2); foreach $url (keys (%{$info})) { print FH $url." ".$info->{$url}->{length} ." ". $info->{$url}->{type}."\n"; } close FH; return 1; } sub _get_info { # Uses LWP to get content-type and content-length data # for a given URL, adds this to the $info data structure # and then calls _save_cache to preserve $info return 0 unless eval "require LWP::UserAgent"; my $url = shift; my $ua = LWP::UserAgent->new; $ua->agent('BlosxomAtomFeed/0.5'); my $req = HTTP::Request->new(HEAD => "$url"); my $res = $ua->request($req); my( $ct, $cl ); if ( $res->is_success ){ $ct = $res->header('content-type'); $cl = $res->header('content-length'); $info->{$url}->{type} = $ct; $info->{$url}->{length} = $cl; _save_cache(); return 1; } return 0; } sub _load_templates { $blosxom::template{'atom'}{'content_type'} = 'application/atom+xml'; $blosxom::template{'atom'}{'date'} = "\n"; $blosxom::template{'atom'}{'head'} =<<'HEAD'; <?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <title type="$atomfeed::blog_title_type">$atomfeed::blog_title $atomfeed::blog_description tag$atomfeed::colon$atomfeed::id_domain,$atomfeed::feed_yr$atomfeed::colon/$blosxom::path_info Blosxom $atomfeed::copyright {{{updated}}} HEAD $blosxom::template{'atom'}{'story'} =<<'STORY'; tag$atomfeed::colon$atomfeed::id_domain,$atomfeed::utc_yr$atomfeed::colon$path/$fn $atomfeed::links $atomfeed::title $atomfeed::published_utc_date $atomfeed::updated_utc_date $atomfeed::category $atomfeed::author $atomfeed::author_uri$atomfeed::author_email $atomfeed::body STORY $blosxom::template{'atom'}{'foot'} =<<'FOOT'; FOOT 1; } 1; __END__ =head1 NAME Blosxom Plug-in: atomfeed =head1 SYNOPSIS Provides an Atom feed of your weblog. The plugin has all you need right on-board, including the appropriate flavour template components and a couple-three configuration directives. Point your browser/Atom feed reader at http://yoururl/index.atom. =head1 VERSION 2003-12-11 =head1 AUTHOR Rael Dornfest , http://www.raelity.org/ Sam Ruby , http://www.intertwingly.net/ - contributed the XML::Parser magic Frank Hecker , http://www.hecker.org/ - contributed patches for Atom 0.3 compliance, UTC date/time fix =head1 INSTALLATION Drop atomfeed into your plugins directory. =head1 CONFIGURATION Drop atomfeed into your plugins directory. All other configuration is optional. $default_author is where you specify who you'd like to credit as the default author of each entry. I say "default" since if you leave it blank it'll attempt to use the value provided by the whoami or fauxami plugins if you happen to have them installed. e.g. $default_author = 'Rael Dornfest'; Atom associates unique ID tags with the feed itself and individual entries. By default it'll attempt to glean your domain from the specified or calculated value of $blosxom::url. But you can override this by setting $id_domain in the configuration section above. e.g. $id_domain = "raelity.org"; Just be sure it's a domain name sans any http://, slashes, extra path bits, or what-have-you. =head1 OTHER PLUGINS The atomfeed plugin assumes you're not running any fancy interpolation plugin (e.g. interpolate_fancy) which changes the way variables are specified in a template (e.g. <$foo /> rather than $foo). If you are running interpolate_fancy or the like--I am--use the config plugin and a config.atom file in your blosxom $datadir consisting of: $blosxom::plugins{"interpolate_fancy"} = 0; Where "interpolate_fancy" is the name of the interpolation plugin you're turning off _just for the atom feed_. =head1 SEE ALSO Blosxom Home/Docs/Licensing: http://www.raelity.org/apps/blosxom/ Blosxom Plugin Docs: http://www.raelity.org/apps/blosxom/plugin.shtml =head1 BUGS Address bug reports and comments to the Blosxom mailing list [http://www.yahoogroups.com/group/blosxom]. =head1 LICENSE Blosxom and this Blosxom Plug-in Copyright 2003, Rael Dornfest Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.