diff --git a/atom.go b/atom.go index 08808a6..cab3935 100644 --- a/atom.go +++ b/atom.go @@ -6,32 +6,15 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { ns := "http://www.w3.org/2005/Atom" channels := doc.SelectNodes(ns, "feed") - getChan := func(id, title string) *Channel { - for _, c := range this.Channels { - switch { - case len(id) > 0: - if c.Id == id { - return c - } - case len(title) > 0: - if c.Title == title { - return c - } - } - } - return nil - } - + var foundChannels []*Channel var ch *Channel var i *Item var tn *xmlx.Node var list []*xmlx.Node for _, node := range channels { - if ch = getChan(node.S(ns, "id"), node.S(ns, "title")); ch == nil { - ch = new(Channel) - this.Channels = append(this.Channels, ch) - } + ch = new(Channel) + foundChannels = append(foundChannels, ch) ch.Title = node.S(ns, "title") ch.LastBuildDate = node.S(ns, "updated") @@ -67,14 +50,9 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { ch.Author.Email = tn.S("", "email") } - itemcount := len(ch.Items) list = node.SelectNodes(ns, "entry") for _, item := range list { - if isItemPresent(ch, item.S(ns, "id"), item.S(ns, "title")) { - continue - } - i = new(Item) i.Title = item.S(ns, "title") i.Id = item.S(ns, "id") @@ -120,26 +98,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { ch.Items = append(ch.Items, i) } - - if itemcount != len(ch.Items) && this.itemhandler != nil { - this.itemhandler(this, ch, ch.Items[itemcount:]) - } } + this.Channels = foundChannels return } - -func isItemPresent(ch *Channel, id, title string) bool { - for _, item := range ch.Items { - switch { - case len(id) > 0: - if item.Id == id { - return true - } - case len(title) > 0: - if item.Title == title { - return true - } - } - } - return false -} diff --git a/channel.go b/channel.go index f602675..fe3318b 100644 --- a/channel.go +++ b/channel.go @@ -28,3 +28,12 @@ type Channel struct { Author Author SubTitle SubTitle } + +func (c *Channel) Key() string { + switch { + case len(c.Id) != 0: + return c.Id + default: + return c.Title + } +} diff --git a/database.go b/database.go new file mode 100644 index 0000000..9b5a867 --- /dev/null +++ b/database.go @@ -0,0 +1,33 @@ +/* +Credits go to github.com/SlyMarbo/rss for inspiring this solution. +*/ +package feeder + +type database struct { + request chan string + response chan bool + known map[string]struct{} +} + +func (d *database) Run() { + d.known = make(map[string]struct{}) + var s string + + for { + s = <-d.request + if _, ok := d.known[s]; ok { + d.response <- true + } else { + d.response <- false + d.known[s] = struct{}{} + } + } +} + +func NewDatabase() *database { + database := new(database) + database.request = make(chan string) + database.response = make(chan bool) + go database.Run() + return database +} diff --git a/feed.go b/feed.go index 4a68c9a..8046520 100644 --- a/feed.go +++ b/feed.go @@ -58,6 +58,9 @@ type Feed struct { // Url from which this feed was created. Url string + // Database containing a list of known Items and Channels for this instance + database *database + // A notification function, used to notify the host when a new channel // has been found. chanhandler ChannelHandler @@ -76,6 +79,7 @@ func New(cachetimeout int, enforcecachelimit bool, ch ChannelHandler, ih ItemHan v.CacheTimeout = cachetimeout v.EnforceCacheLimit = enforcecachelimit v.Type = "none" + v.database = NewDatabase() v.chanhandler = ch v.itemhandler = ih return v @@ -150,24 +154,43 @@ func (this *Feed) makeFeed(doc *xmlx.Document) (err error) { return } - chancount := len(this.Channels) if err = this.buildFeed(doc); err != nil || len(this.Channels) == 0 { return } - // Notify host of new channels - if chancount != len(this.Channels) && this.chanhandler != nil { - this.chanhandler(this, this.Channels[chancount:]) - } - // reset cache timeout values according to feed specified values (TTL) if this.EnforceCacheLimit && this.CacheTimeout < this.Channels[0].TTL { this.CacheTimeout = this.Channels[0].TTL } + this.notifyListeners() + return } +func (this *Feed) notifyListeners() { + var newchannels []*Channel + for _, channel := range this.Channels { + if this.database.request <- channel.Key(); !<-this.database.response { + newchannels = append(newchannels, channel) + } + + var newitems []*Item + for _, item := range channel.Items { + if this.database.request <- item.Key(); !<-this.database.response { + newitems = append(newitems, item) + } + } + if len(newitems) > 0 && this.itemhandler != nil { + this.itemhandler(this, channel, newitems) + } + } + + if len(newchannels) > 0 && this.chanhandler != nil { + this.chanhandler(this, newchannels) + } +} + // This function returns true or false, depending on whether the CacheTimeout // value has expired or not. Additionally, it will ensure that we adhere to the // RSS spec's SkipDays and SkipHours values (if Feed.EnforceCacheLimit is set to diff --git a/feed_test.go b/feed_test.go index c069660..dca55b3 100644 --- a/feed_test.go +++ b/feed_test.go @@ -7,20 +7,6 @@ import ( var items []*Item -func Test_NewItem(t *testing.T) { - content, _ := ioutil.ReadFile("testdata/initial.atom") - feed := New(1, true, chanHandler, itemHandler) - err := feed.FetchBytes("http://example.com", content, nil) - if err != nil { t.Error(err) } - - content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom") - feed.FetchBytes("http://example.com", content, nil) - expected := "Second title" - if expected != items[0].Title { - t.Errorf("Expected %s, got %s", expected, items[0].Title) - } -} - func TestFeed(t *testing.T) { urilist := []string{ //"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml", // Non-utf8 encoding. @@ -43,6 +29,26 @@ func TestFeed(t *testing.T) { } } +func Test_NewItem(t *testing.T) { + content, _ := ioutil.ReadFile("testdata/initial.atom") + feed := New(1, true, chanHandler, itemHandler) + err := feed.FetchBytes("http://example.com", content, nil) + if err != nil { + t.Error(err) + } + + content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom") + feed.FetchBytes("http://example.com", content, nil) + expected := "Second title" + if len(items) != 1 { + t.Errorf("Expected %s new item, got %s", 1, len(items)) + } + + if expected != items[0].Title { + t.Errorf("Expected %s, got %s", expected, items[0].Title) + } +} + func Test_AtomAuthor(t *testing.T) { content, err := ioutil.ReadFile("testdata/idownload.atom") if err != nil { @@ -51,7 +57,7 @@ func Test_AtomAuthor(t *testing.T) { feed := New(1, true, chanHandler, itemHandler) err = feed.FetchBytes("http://example.com", content, nil) - item := items[0] + item := feed.Channels[0].Items[0] expected := "Cody Lee" if item.Author.Name != expected { t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name) @@ -63,7 +69,7 @@ func Test_RssAuthor(t *testing.T) { feed := New(1, true, chanHandler, itemHandler) feed.FetchBytes("http://example.com", content, nil) - item := items[0] + item := feed.Channels[0].Items[0] expected := "Cory Doctorow" if item.Author.Name != expected { t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name) @@ -75,7 +81,7 @@ func Test_CData(t *testing.T) { feed := New(1, true, chanHandler, itemHandler) feed.FetchBytes("http://example.com", content, nil) - item := items[0] + item := feed.Channels[0].Items[0] expected := `

abc

"def"
ghi` if item.Description != expected { t.Errorf("Expected item.Description to be [%s] but item.Description=[%s]", expected, item.Description) diff --git a/item.go b/item.go index c5e65fb..ea13f85 100644 --- a/item.go +++ b/item.go @@ -1,5 +1,10 @@ package feeder +import ( + "crypto/md5" + "io" +) + type Item struct { // RSS and Shared fields Title string @@ -19,3 +24,18 @@ type Item struct { Contributors []string Content *Content } + +func (i *Item) Key() string { + switch { + case i.Guid != nil && len(*i.Guid) != 0: + return *i.Guid + case len(i.Id) != 0: + return i.Id + case len(i.Title) > 0 && len(i.PubDate) > 0: + return i.Title + i.PubDate + default: + h := md5.New() + io.WriteString(h, i.Description) + return string(h.Sum(nil)) + } +} diff --git a/rss.go b/rss.go index 4ff720f..b6c6d73 100644 --- a/rss.go +++ b/rss.go @@ -6,32 +6,18 @@ import ( xmlx "github.com/jteeuwen/go-pkg-xmlx" ) +var days = map[string]int{ + "Monday": 1, + "Tuesday": 2, + "Wednesday": 3, + "Thursday": 4, + "Friday": 5, + "Saturday": 6, + "Sunday": 7, +} + func (this *Feed) readRss2(doc *xmlx.Document) (err error) { - days := make(map[string]int) - days["Monday"] = 1 - days["Tuesday"] = 2 - days["Wednesday"] = 3 - days["Thursday"] = 4 - days["Friday"] = 5 - days["Saturday"] = 6 - days["Sunday"] = 7 - - getChan := func(pubdate, title string) *Channel { - for _, c := range this.Channels { - switch { - case len(pubdate) > 0: - if c.PubDate == pubdate { - return c - } - case len(title) > 0: - if c.Title == title { - return c - } - } - } - return nil - } - + var foundChannels []*Channel var ch *Channel var i *Item var n *xmlx.Node @@ -49,10 +35,8 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { channels := root.SelectNodes(ns, "channel") for _, node := range channels { - if ch = getChan(node.S(ns, "pubDate"), node.S(ns, "title")); ch == nil { - ch = new(Channel) - this.Channels = append(this.Channels, ch) - } + ch = new(Channel) + foundChannels = append(foundChannels, ch) ch.Title = node.S(ns, "title") list = node.SelectNodes(ns, "link") @@ -125,7 +109,6 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { ch.TextInput.Link = n.S(ns, "link") } - itemcount := len(ch.Items) list = node.SelectNodes(ns, "item") if len(list) == 0 { list = doc.SelectNodes(ns, "item") @@ -193,10 +176,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { ch.Items = append(ch.Items, i) } - - if itemcount != len(ch.Items) && this.itemhandler != nil { - this.itemhandler(this, ch, ch.Items[itemcount:]) - } } + this.Channels = foundChannels return }