2009-11-23 12:46:12 +00:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
/*
|
|
|
|
Author: jim teeuwen <jimteeuwen@gmail.com>
|
|
|
|
Dependencies: go-pkg-xmlx (http://github.com/jteeuwen/go-pkg-xmlx)
|
|
|
|
|
|
|
|
This package allows us to fetch Rss and Atom feeds from the internet.
|
|
|
|
They are parsed into an object tree which is a hyvrid of both the RSS and Atom
|
|
|
|
standards.
|
|
|
|
|
|
|
|
Supported feeds are:
|
|
|
|
- Rss v0.91, 0.91 and 2.0
|
|
|
|
- Atom 1.0
|
|
|
|
|
|
|
|
The package allows us to maintain cache timeout management. This prevents us
|
2010-05-23 14:21:30 +00:00
|
|
|
from querying the servers for feed updates too often and risk ip bams. Appart
|
2009-11-23 12:46:12 +00:00
|
|
|
from setting a cache timeout manually, the package also optionally adheres to
|
|
|
|
the TTL, SkipDays and SkipHours values specied in the feeds themselves.
|
|
|
|
|
|
|
|
Note that the TTL, SkipDays and SkipHour fields are only part of the RSS spec.
|
|
|
|
For Atom feeds, we use the CacheTimeout in the Feed struct.
|
|
|
|
|
|
|
|
Because the object structure is a hybrid between both RSS and Atom specs, not
|
|
|
|
all fields will be filled when requesting either an RSS or Atom feed. I have
|
|
|
|
tried to create as many shared fields as possiblem but some of them simply do
|
|
|
|
not occur in either the RSS or Atom spec.
|
|
|
|
*/
|
|
|
|
package feeder
|
|
|
|
|
|
|
|
import "os"
|
|
|
|
import "http"
|
|
|
|
import "time"
|
|
|
|
import "xmlx"
|
|
|
|
import "fmt"
|
|
|
|
import "strconv"
|
|
|
|
import "strings"
|
2010-05-23 14:21:30 +00:00
|
|
|
import "io/ioutil"
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
type Feed struct {
|
|
|
|
// Custom cache timeout in minutes.
|
2010-05-23 14:21:30 +00:00
|
|
|
CacheTimeout int
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
// Make sure we adhere to the cache timeout specified in the feed. If
|
|
|
|
// our CacheTimeout is higher than that, we will use that instead.
|
2010-05-23 14:21:30 +00:00
|
|
|
EnforceCacheLimit bool
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
// Type of feed. Rss, Atom, etc
|
2010-05-23 14:21:30 +00:00
|
|
|
Type string
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
// Version of the feed. Major and Minor.
|
2010-05-23 14:21:30 +00:00
|
|
|
Version [2]int
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
// Channels with content.
|
2010-05-23 14:21:30 +00:00
|
|
|
Channels []Channel
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
// Url from which this feed was created.
|
2010-05-23 14:21:30 +00:00
|
|
|
Url string
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
// Last time content was fetched. Used in conjunction with CacheTimeout
|
|
|
|
// to ensure we don't get content too often.
|
2010-05-23 14:21:30 +00:00
|
|
|
lastupdate int64
|
2009-11-23 12:46:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func New(cachetimeout int, enforcecachelimit bool) *Feed {
|
|
|
|
return &Feed{
|
2010-05-23 14:21:30 +00:00
|
|
|
CacheTimeout: cachetimeout,
|
2009-11-23 12:46:12 +00:00
|
|
|
EnforceCacheLimit: enforcecachelimit,
|
2010-05-23 14:21:30 +00:00
|
|
|
Type: "none",
|
|
|
|
Version: [2]int{0, 0},
|
|
|
|
Channels: make([]Channel, 0),
|
2009-11-23 12:46:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (this *Feed) addChannel(ch Channel) {
|
2010-05-23 14:21:30 +00:00
|
|
|
slice := make([]Channel, len(this.Channels)+1)
|
|
|
|
for i, v := range this.Channels {
|
|
|
|
slice[i] = v
|
2009-11-23 12:46:12 +00:00
|
|
|
}
|
2010-05-23 14:21:30 +00:00
|
|
|
slice[len(slice)-1] = ch
|
|
|
|
this.Channels = slice
|
2009-11-23 12:46:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (this *Feed) Fetch(uri string) (err os.Error) {
|
2010-05-23 14:21:30 +00:00
|
|
|
if !this.canUpdate() {
|
|
|
|
return
|
|
|
|
}
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
// Fetch data from remote location.
|
2010-05-23 14:21:30 +00:00
|
|
|
r, _, err := http.Get(uri)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2009-11-23 12:46:12 +00:00
|
|
|
|
2010-05-23 14:21:30 +00:00
|
|
|
defer r.Body.Close()
|
2009-11-23 12:46:12 +00:00
|
|
|
|
2010-05-23 14:21:30 +00:00
|
|
|
var b []byte
|
|
|
|
if b, err = ioutil.ReadAll(r.Body); err != nil {
|
|
|
|
return
|
|
|
|
}
|
2009-11-23 12:46:12 +00:00
|
|
|
|
2010-05-23 14:21:30 +00:00
|
|
|
this.Url = uri
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
// Extract type and version of the feed so we can have the appropriate
|
|
|
|
// function parse it (rss 0.91, rss 0.92, rss 2, atom etc).
|
2010-05-23 14:21:30 +00:00
|
|
|
doc := xmlx.New()
|
|
|
|
if err = doc.LoadString(string(b)); err != nil {
|
|
|
|
return
|
2009-11-23 12:46:12 +00:00
|
|
|
}
|
2010-05-23 14:21:30 +00:00
|
|
|
this.Type, this.Version = this.GetVersionInfo(doc)
|
2009-11-23 12:46:12 +00:00
|
|
|
|
2010-05-23 14:21:30 +00:00
|
|
|
if ok := this.testVersions(); !ok {
|
|
|
|
err = os.NewError(fmt.Sprintf("Unsupported feed: %s, version: %+v", this.Type, this.Version))
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = this.buildFeed(doc); err != nil || len(this.Channels) == 0 {
|
|
|
|
return
|
|
|
|
}
|
2009-11-23 12:46:12 +00:00
|
|
|
|
|
|
|
// reset cache timeout values according to feed specified values (TTL)
|
2010-05-23 14:21:30 +00:00
|
|
|
if this.EnforceCacheLimit && this.CacheTimeout < this.Channels[0].TTL {
|
|
|
|
this.CacheTimeout = this.Channels[0].TTL
|
2009-11-23 12:46:12 +00:00
|
|
|
}
|
2010-05-23 14:21:30 +00:00
|
|
|
return
|
2009-11-23 12:46:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (this *Feed) canUpdate() bool {
|
|
|
|
// Make sure we are not within the specified cache-limit.
|
|
|
|
// This ensures we don't request data too often.
|
2010-05-23 14:21:30 +00:00
|
|
|
utc := time.UTC()
|
|
|
|
if utc.Seconds()-this.lastupdate < int64(this.CacheTimeout*60) {
|
2009-11-23 12:46:12 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// If skipDays or skipHours are set in the RSS feed, use these to see if
|
|
|
|
// we can update.
|
|
|
|
if len(this.Channels) == 0 && this.Type == "rss" {
|
|
|
|
if this.EnforceCacheLimit && len(this.Channels[0].SkipDays) > 0 {
|
2010-05-23 14:21:30 +00:00
|
|
|
for _, v := range this.Channels[0].SkipDays {
|
2009-11-23 12:46:12 +00:00
|
|
|
if v == utc.Weekday {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if this.EnforceCacheLimit && len(this.Channels[0].SkipHours) > 0 {
|
2010-05-23 14:21:30 +00:00
|
|
|
for _, v := range this.Channels[0].SkipHours {
|
2009-11-23 12:46:12 +00:00
|
|
|
if v == utc.Hour {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-05-23 14:21:30 +00:00
|
|
|
this.lastupdate = utc.Seconds()
|
2009-11-23 12:46:12 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func (this *Feed) buildFeed(doc *xmlx.Document) (err os.Error) {
|
|
|
|
switch this.Type {
|
2010-05-23 14:21:30 +00:00
|
|
|
case "rss":
|
|
|
|
err = this.readRss2(doc)
|
|
|
|
case "atom":
|
|
|
|
err = this.readAtom(doc)
|
2009-11-23 12:46:12 +00:00
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (this *Feed) testVersions() bool {
|
|
|
|
switch this.Type {
|
|
|
|
case "rss":
|
|
|
|
if this.Version[0] > 2 || (this.Version[0] == 2 && this.Version[1] > 0) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
case "atom":
|
|
|
|
if this.Version[0] > 1 || (this.Version[0] == 1 && this.Version[1] > 0) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2010-05-23 14:21:30 +00:00
|
|
|
return true
|
2009-11-23 12:46:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (this *Feed) GetVersionInfo(doc *xmlx.Document) (ftype string, fversion [2]int) {
|
2010-05-23 14:21:30 +00:00
|
|
|
node := doc.SelectNode("http://www.w3.org/2005/Atom", "feed")
|
|
|
|
if node == nil {
|
|
|
|
goto rss
|
|
|
|
}
|
|
|
|
ftype = "atom"
|
|
|
|
fversion = [2]int{1, 0}
|
|
|
|
return
|
2009-11-23 12:46:12 +00:00
|
|
|
|
2010-05-23 14:21:30 +00:00
|
|
|
rss:
|
|
|
|
node = doc.SelectNode("", "rss")
|
|
|
|
if node == nil {
|
|
|
|
goto end
|
|
|
|
}
|
|
|
|
ftype = "rss"
|
|
|
|
version := node.GetAttr("", "version")
|
|
|
|
p := strings.Index(version, ".")
|
|
|
|
major, _ := strconv.Atoi(version[0:p])
|
|
|
|
minor, _ := strconv.Atoi(version[p+1 : len(version)])
|
|
|
|
fversion = [2]int{major, minor}
|
|
|
|
return
|
|
|
|
|
|
|
|
end:
|
|
|
|
ftype = "unknown"
|
|
|
|
fversion = [2]int{0, 0}
|
|
|
|
return
|
|
|
|
}
|