commit
269db380a3
47
atom.go
47
atom.go
|
@ -6,32 +6,15 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
|
|||
ns := "http://www.w3.org/2005/Atom"
|
||||
channels := doc.SelectNodes(ns, "feed")
|
||||
|
||||
getChan := func(id, title string) *Channel {
|
||||
for _, c := range this.Channels {
|
||||
switch {
|
||||
case len(id) > 0:
|
||||
if c.Id == id {
|
||||
return c
|
||||
}
|
||||
case len(title) > 0:
|
||||
if c.Title == title {
|
||||
return c
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var foundChannels []*Channel
|
||||
var ch *Channel
|
||||
var i *Item
|
||||
var tn *xmlx.Node
|
||||
var list []*xmlx.Node
|
||||
|
||||
for _, node := range channels {
|
||||
if ch = getChan(node.S(ns, "id"), node.S(ns, "title")); ch == nil {
|
||||
ch = new(Channel)
|
||||
this.Channels = append(this.Channels, ch)
|
||||
}
|
||||
foundChannels = append(foundChannels, ch)
|
||||
|
||||
ch.Title = node.S(ns, "title")
|
||||
ch.LastBuildDate = node.S(ns, "updated")
|
||||
|
@ -67,14 +50,9 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
|
|||
ch.Author.Email = tn.S("", "email")
|
||||
}
|
||||
|
||||
itemcount := len(ch.Items)
|
||||
list = node.SelectNodes(ns, "entry")
|
||||
|
||||
for _, item := range list {
|
||||
if isItemPresent(ch, item.S(ns, "id"), item.S(ns, "title")) {
|
||||
continue
|
||||
}
|
||||
|
||||
i = new(Item)
|
||||
i.Title = item.S(ns, "title")
|
||||
i.Id = item.S(ns, "id")
|
||||
|
@ -120,26 +98,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
|
|||
|
||||
ch.Items = append(ch.Items, i)
|
||||
}
|
||||
|
||||
if itemcount != len(ch.Items) && this.itemhandler != nil {
|
||||
this.itemhandler(this, ch, ch.Items[itemcount:])
|
||||
}
|
||||
}
|
||||
this.Channels = foundChannels
|
||||
return
|
||||
}
|
||||
|
||||
func isItemPresent(ch *Channel, id, title string) bool {
|
||||
for _, item := range ch.Items {
|
||||
switch {
|
||||
case len(id) > 0:
|
||||
if item.Id == id {
|
||||
return true
|
||||
}
|
||||
case len(title) > 0:
|
||||
if item.Title == title {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -28,3 +28,12 @@ type Channel struct {
|
|||
Author Author
|
||||
SubTitle SubTitle
|
||||
}
|
||||
|
||||
func (c *Channel) Key() string {
|
||||
switch {
|
||||
case len(c.Id) != 0:
|
||||
return c.Id
|
||||
default:
|
||||
return c.Title
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
Credits go to github.com/SlyMarbo/rss for inspiring this solution.
|
||||
*/
|
||||
package feeder
|
||||
|
||||
type database struct {
|
||||
request chan string
|
||||
response chan bool
|
||||
known map[string]struct{}
|
||||
}
|
||||
|
||||
func (d *database) Run() {
|
||||
d.known = make(map[string]struct{})
|
||||
var s string
|
||||
|
||||
for {
|
||||
s = <-d.request
|
||||
if _, ok := d.known[s]; ok {
|
||||
d.response <- true
|
||||
} else {
|
||||
d.response <- false
|
||||
d.known[s] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func NewDatabase() *database {
|
||||
database := new(database)
|
||||
database.request = make(chan string)
|
||||
database.response = make(chan bool)
|
||||
go database.Run()
|
||||
return database
|
||||
}
|
35
feed.go
35
feed.go
|
@ -58,6 +58,9 @@ type Feed struct {
|
|||
// Url from which this feed was created.
|
||||
Url string
|
||||
|
||||
// Database containing a list of known Items and Channels for this instance
|
||||
database *database
|
||||
|
||||
// A notification function, used to notify the host when a new channel
|
||||
// has been found.
|
||||
chanhandler ChannelHandler
|
||||
|
@ -76,6 +79,7 @@ func New(cachetimeout int, enforcecachelimit bool, ch ChannelHandler, ih ItemHan
|
|||
v.CacheTimeout = cachetimeout
|
||||
v.EnforceCacheLimit = enforcecachelimit
|
||||
v.Type = "none"
|
||||
v.database = NewDatabase()
|
||||
v.chanhandler = ch
|
||||
v.itemhandler = ih
|
||||
return v
|
||||
|
@ -150,24 +154,43 @@ func (this *Feed) makeFeed(doc *xmlx.Document) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
chancount := len(this.Channels)
|
||||
if err = this.buildFeed(doc); err != nil || len(this.Channels) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Notify host of new channels
|
||||
if chancount != len(this.Channels) && this.chanhandler != nil {
|
||||
this.chanhandler(this, this.Channels[chancount:])
|
||||
}
|
||||
|
||||
// reset cache timeout values according to feed specified values (TTL)
|
||||
if this.EnforceCacheLimit && this.CacheTimeout < this.Channels[0].TTL {
|
||||
this.CacheTimeout = this.Channels[0].TTL
|
||||
}
|
||||
|
||||
this.notifyListeners()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (this *Feed) notifyListeners() {
|
||||
var newchannels []*Channel
|
||||
for _, channel := range this.Channels {
|
||||
if this.database.request <- channel.Key(); !<-this.database.response {
|
||||
newchannels = append(newchannels, channel)
|
||||
}
|
||||
|
||||
var newitems []*Item
|
||||
for _, item := range channel.Items {
|
||||
if this.database.request <- item.Key(); !<-this.database.response {
|
||||
newitems = append(newitems, item)
|
||||
}
|
||||
}
|
||||
if len(newitems) > 0 && this.itemhandler != nil {
|
||||
this.itemhandler(this, channel, newitems)
|
||||
}
|
||||
}
|
||||
|
||||
if len(newchannels) > 0 && this.chanhandler != nil {
|
||||
this.chanhandler(this, newchannels)
|
||||
}
|
||||
}
|
||||
|
||||
// This function returns true or false, depending on whether the CacheTimeout
|
||||
// value has expired or not. Additionally, it will ensure that we adhere to the
|
||||
// RSS spec's SkipDays and SkipHours values (if Feed.EnforceCacheLimit is set to
|
||||
|
|
40
feed_test.go
40
feed_test.go
|
@ -7,20 +7,6 @@ import (
|
|||
|
||||
var items []*Item
|
||||
|
||||
func Test_NewItem(t *testing.T) {
|
||||
content, _ := ioutil.ReadFile("testdata/initial.atom")
|
||||
feed := New(1, true, chanHandler, itemHandler)
|
||||
err := feed.FetchBytes("http://example.com", content, nil)
|
||||
if err != nil { t.Error(err) }
|
||||
|
||||
content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom")
|
||||
feed.FetchBytes("http://example.com", content, nil)
|
||||
expected := "Second title"
|
||||
if expected != items[0].Title {
|
||||
t.Errorf("Expected %s, got %s", expected, items[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFeed(t *testing.T) {
|
||||
urilist := []string{
|
||||
//"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml", // Non-utf8 encoding.
|
||||
|
@ -43,6 +29,26 @@ func TestFeed(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func Test_NewItem(t *testing.T) {
|
||||
content, _ := ioutil.ReadFile("testdata/initial.atom")
|
||||
feed := New(1, true, chanHandler, itemHandler)
|
||||
err := feed.FetchBytes("http://example.com", content, nil)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom")
|
||||
feed.FetchBytes("http://example.com", content, nil)
|
||||
expected := "Second title"
|
||||
if len(items) != 1 {
|
||||
t.Errorf("Expected %s new item, got %s", 1, len(items))
|
||||
}
|
||||
|
||||
if expected != items[0].Title {
|
||||
t.Errorf("Expected %s, got %s", expected, items[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_AtomAuthor(t *testing.T) {
|
||||
content, err := ioutil.ReadFile("testdata/idownload.atom")
|
||||
if err != nil {
|
||||
|
@ -51,7 +57,7 @@ func Test_AtomAuthor(t *testing.T) {
|
|||
feed := New(1, true, chanHandler, itemHandler)
|
||||
err = feed.FetchBytes("http://example.com", content, nil)
|
||||
|
||||
item := items[0]
|
||||
item := feed.Channels[0].Items[0]
|
||||
expected := "Cody Lee"
|
||||
if item.Author.Name != expected {
|
||||
t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name)
|
||||
|
@ -63,7 +69,7 @@ func Test_RssAuthor(t *testing.T) {
|
|||
feed := New(1, true, chanHandler, itemHandler)
|
||||
feed.FetchBytes("http://example.com", content, nil)
|
||||
|
||||
item := items[0]
|
||||
item := feed.Channels[0].Items[0]
|
||||
expected := "Cory Doctorow"
|
||||
if item.Author.Name != expected {
|
||||
t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name)
|
||||
|
@ -75,7 +81,7 @@ func Test_CData(t *testing.T) {
|
|||
feed := New(1, true, chanHandler, itemHandler)
|
||||
feed.FetchBytes("http://example.com", content, nil)
|
||||
|
||||
item := items[0]
|
||||
item := feed.Channels[0].Items[0]
|
||||
expected := `<p>abc<div>"def"</div>ghi`
|
||||
if item.Description != expected {
|
||||
t.Errorf("Expected item.Description to be [%s] but item.Description=[%s]", expected, item.Description)
|
||||
|
|
20
item.go
20
item.go
|
@ -1,5 +1,10 @@
|
|||
package feeder
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"io"
|
||||
)
|
||||
|
||||
type Item struct {
|
||||
// RSS and Shared fields
|
||||
Title string
|
||||
|
@ -19,3 +24,18 @@ type Item struct {
|
|||
Contributors []string
|
||||
Content *Content
|
||||
}
|
||||
|
||||
func (i *Item) Key() string {
|
||||
switch {
|
||||
case i.Guid != nil && len(*i.Guid) != 0:
|
||||
return *i.Guid
|
||||
case len(i.Id) != 0:
|
||||
return i.Id
|
||||
case len(i.Title) > 0 && len(i.PubDate) > 0:
|
||||
return i.Title + i.PubDate
|
||||
default:
|
||||
h := md5.New()
|
||||
io.WriteString(h, i.Description)
|
||||
return string(h.Sum(nil))
|
||||
}
|
||||
}
|
||||
|
|
46
rss.go
46
rss.go
|
@ -6,32 +6,18 @@ import (
|
|||
xmlx "github.com/jteeuwen/go-pkg-xmlx"
|
||||
)
|
||||
|
||||
var days = map[string]int{
|
||||
"Monday": 1,
|
||||
"Tuesday": 2,
|
||||
"Wednesday": 3,
|
||||
"Thursday": 4,
|
||||
"Friday": 5,
|
||||
"Saturday": 6,
|
||||
"Sunday": 7,
|
||||
}
|
||||
|
||||
func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
||||
days := make(map[string]int)
|
||||
days["Monday"] = 1
|
||||
days["Tuesday"] = 2
|
||||
days["Wednesday"] = 3
|
||||
days["Thursday"] = 4
|
||||
days["Friday"] = 5
|
||||
days["Saturday"] = 6
|
||||
days["Sunday"] = 7
|
||||
|
||||
getChan := func(pubdate, title string) *Channel {
|
||||
for _, c := range this.Channels {
|
||||
switch {
|
||||
case len(pubdate) > 0:
|
||||
if c.PubDate == pubdate {
|
||||
return c
|
||||
}
|
||||
case len(title) > 0:
|
||||
if c.Title == title {
|
||||
return c
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var foundChannels []*Channel
|
||||
var ch *Channel
|
||||
var i *Item
|
||||
var n *xmlx.Node
|
||||
|
@ -49,10 +35,8 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
|||
|
||||
channels := root.SelectNodes(ns, "channel")
|
||||
for _, node := range channels {
|
||||
if ch = getChan(node.S(ns, "pubDate"), node.S(ns, "title")); ch == nil {
|
||||
ch = new(Channel)
|
||||
this.Channels = append(this.Channels, ch)
|
||||
}
|
||||
foundChannels = append(foundChannels, ch)
|
||||
|
||||
ch.Title = node.S(ns, "title")
|
||||
list = node.SelectNodes(ns, "link")
|
||||
|
@ -125,7 +109,6 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
|||
ch.TextInput.Link = n.S(ns, "link")
|
||||
}
|
||||
|
||||
itemcount := len(ch.Items)
|
||||
list = node.SelectNodes(ns, "item")
|
||||
if len(list) == 0 {
|
||||
list = doc.SelectNodes(ns, "item")
|
||||
|
@ -193,10 +176,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
|||
|
||||
ch.Items = append(ch.Items, i)
|
||||
}
|
||||
|
||||
if itemcount != len(ch.Items) && this.itemhandler != nil {
|
||||
this.itemhandler(this, ch, ch.Items[itemcount:])
|
||||
}
|
||||
}
|
||||
this.Channels = foundChannels
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue