From cc25852f3019da703b09b2b41cc5922e472b838f Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 12:55:59 +0100 Subject: [PATCH 01/16] Revert "Implements check if item is already present" This reverts commit d79101645df6026cdbe48b07307fe101413a7729. --- atom.go | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/atom.go b/atom.go index 08808a6..ccbe786 100644 --- a/atom.go +++ b/atom.go @@ -71,10 +71,6 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { list = node.SelectNodes(ns, "entry") for _, item := range list { - if isItemPresent(ch, item.S(ns, "id"), item.S(ns, "title")) { - continue - } - i = new(Item) i.Title = item.S(ns, "title") i.Id = item.S(ns, "id") @@ -127,19 +123,3 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { } return } - -func isItemPresent(ch *Channel, id, title string) bool { - for _, item := range ch.Items { - switch { - case len(id) > 0: - if item.Id == id { - return true - } - case len(title) > 0: - if item.Title == title { - return true - } - } - } - return false -} From 902e92886352ba8e409e084c6f3d3276e9fdf987 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 12:56:00 +0100 Subject: [PATCH 02/16] Revert "Adds test" This reverts commit b311fe3f8dab10a6c8811dcc2204b7c156d8b537. --- feed_test.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/feed_test.go b/feed_test.go index c069660..fad7b7f 100644 --- a/feed_test.go +++ b/feed_test.go @@ -7,20 +7,6 @@ import ( var items []*Item -func Test_NewItem(t *testing.T) { - content, _ := ioutil.ReadFile("testdata/initial.atom") - feed := New(1, true, chanHandler, itemHandler) - err := feed.FetchBytes("http://example.com", content, nil) - if err != nil { t.Error(err) } - - content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom") - feed.FetchBytes("http://example.com", content, nil) - expected := "Second title" - if expected != items[0].Title { - t.Errorf("Expected %s, got %s", expected, items[0].Title) - } -} - func TestFeed(t *testing.T) { urilist := []string{ //"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml", // Non-utf8 encoding. From 79d418e00a22c053393cb4536e457d6e36494202 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 12:56:02 +0100 Subject: [PATCH 03/16] Revert "Adds new test data" This reverts commit 997070656fc003f824aefa3f3f2840dd39c8318f. --- testdata/initial.atom | 8 -------- testdata/initial_plus_one_new.atom | 12 ------------ 2 files changed, 20 deletions(-) delete mode 100644 testdata/initial.atom delete mode 100644 testdata/initial_plus_one_new.atom diff --git a/testdata/initial.atom b/testdata/initial.atom deleted file mode 100644 index b086591..0000000 --- a/testdata/initial.atom +++ /dev/null @@ -1,8 +0,0 @@ - - Some title - http://www.example.com/feed/atom/ - - First title - 1 - - diff --git a/testdata/initial_plus_one_new.atom b/testdata/initial_plus_one_new.atom deleted file mode 100644 index 2461a66..0000000 --- a/testdata/initial_plus_one_new.atom +++ /dev/null @@ -1,12 +0,0 @@ - - Some title - http://www.example.com/feed/atom/ - - First title - 1 - - - Second title - 2 - - From 4fa6c97010b302f1bfcd87e8aeffa5c7ba80044b Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 13:53:57 +0100 Subject: [PATCH 04/16] Makes days map a global lookup table --- rss.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/rss.go b/rss.go index 4ff720f..c50a3a7 100644 --- a/rss.go +++ b/rss.go @@ -6,15 +6,17 @@ import ( xmlx "github.com/jteeuwen/go-pkg-xmlx" ) +var days = map[string]int{ + "Monday": 1, + "Tuesday": 2, + "Wednesday": 3, + "Thursday": 4, + "Friday": 5, + "Saturday": 6, + "Sunday": 7, +} + func (this *Feed) readRss2(doc *xmlx.Document) (err error) { - days := make(map[string]int) - days["Monday"] = 1 - days["Tuesday"] = 2 - days["Wednesday"] = 3 - days["Thursday"] = 4 - days["Friday"] = 5 - days["Saturday"] = 6 - days["Sunday"] = 7 getChan := func(pubdate, title string) *Channel { for _, c := range this.Channels { From 6ef84d35e2d7d6b93974c643a57e4d9d5726133f Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 14:05:24 +0100 Subject: [PATCH 05/16] Makes rss replace channels and items, thus making it stateless --- rss.go | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/rss.go b/rss.go index c50a3a7..b6c6d73 100644 --- a/rss.go +++ b/rss.go @@ -17,23 +17,7 @@ var days = map[string]int{ } func (this *Feed) readRss2(doc *xmlx.Document) (err error) { - - getChan := func(pubdate, title string) *Channel { - for _, c := range this.Channels { - switch { - case len(pubdate) > 0: - if c.PubDate == pubdate { - return c - } - case len(title) > 0: - if c.Title == title { - return c - } - } - } - return nil - } - + var foundChannels []*Channel var ch *Channel var i *Item var n *xmlx.Node @@ -51,10 +35,8 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { channels := root.SelectNodes(ns, "channel") for _, node := range channels { - if ch = getChan(node.S(ns, "pubDate"), node.S(ns, "title")); ch == nil { - ch = new(Channel) - this.Channels = append(this.Channels, ch) - } + ch = new(Channel) + foundChannels = append(foundChannels, ch) ch.Title = node.S(ns, "title") list = node.SelectNodes(ns, "link") @@ -127,7 +109,6 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { ch.TextInput.Link = n.S(ns, "link") } - itemcount := len(ch.Items) list = node.SelectNodes(ns, "item") if len(list) == 0 { list = doc.SelectNodes(ns, "item") @@ -195,10 +176,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { ch.Items = append(ch.Items, i) } - - if itemcount != len(ch.Items) && this.itemhandler != nil { - this.itemhandler(this, ch, ch.Items[itemcount:]) - } } + this.Channels = foundChannels return } From 73442f806a9cf07e9e44b4332d1ce8cc6138b5e6 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 14:08:41 +0100 Subject: [PATCH 06/16] Makes atom replace channels and items, thus making it stateless --- atom.go | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/atom.go b/atom.go index ccbe786..4982030 100644 --- a/atom.go +++ b/atom.go @@ -6,32 +6,15 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { ns := "http://www.w3.org/2005/Atom" channels := doc.SelectNodes(ns, "feed") - getChan := func(id, title string) *Channel { - for _, c := range this.Channels { - switch { - case len(id) > 0: - if c.Id == id { - return c - } - case len(title) > 0: - if c.Title == title { - return c - } - } - } - return nil - } - + var foundChannels []*Channel var ch *Channel var i *Item var tn *xmlx.Node var list []*xmlx.Node for _, node := range channels { - if ch = getChan(node.S(ns, "id"), node.S(ns, "title")); ch == nil { - ch = new(Channel) - this.Channels = append(this.Channels, ch) - } + ch = new(Channel) + foundChannels = append(foundChannels, ch) ch.Title = node.S(ns, "title") ch.LastBuildDate = node.S(ns, "updated") @@ -121,5 +104,6 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { this.itemhandler(this, ch, ch.Items[itemcount:]) } } + this.Channels = foundChannels return } From d75037c0ab97904084b63d0c5e2cb4164261a541 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 14:10:22 +0100 Subject: [PATCH 07/16] Removes notification of new channels This is going to be moved to the stateful Feed, this is yet to be implemented. --- feed.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/feed.go b/feed.go index 4a68c9a..6b9fe33 100644 --- a/feed.go +++ b/feed.go @@ -150,16 +150,10 @@ func (this *Feed) makeFeed(doc *xmlx.Document) (err error) { return } - chancount := len(this.Channels) if err = this.buildFeed(doc); err != nil || len(this.Channels) == 0 { return } - // Notify host of new channels - if chancount != len(this.Channels) && this.chanhandler != nil { - this.chanhandler(this, this.Channels[chancount:]) - } - // reset cache timeout values according to feed specified values (TTL) if this.EnforceCacheLimit && this.CacheTimeout < this.Channels[0].TTL { this.CacheTimeout = this.Channels[0].TTL From e345751986fed2f336d2b257de997077e27295af Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 14:21:32 +0100 Subject: [PATCH 08/16] Removes new Item notification --- atom.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/atom.go b/atom.go index 4982030..cab3935 100644 --- a/atom.go +++ b/atom.go @@ -50,7 +50,6 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { ch.Author.Email = tn.S("", "email") } - itemcount := len(ch.Items) list = node.SelectNodes(ns, "entry") for _, item := range list { @@ -99,10 +98,6 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { ch.Items = append(ch.Items, i) } - - if itemcount != len(ch.Items) && this.itemhandler != nil { - this.itemhandler(this, ch, ch.Items[itemcount:]) - } } this.Channels = foundChannels return From bfbe2dd3be47b65dec62e31c166e0fb578c76cfe Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 14:22:03 +0100 Subject: [PATCH 09/16] Fixes the tests Reference the item directly through channel as the notification functionality has been removed. --- feed_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/feed_test.go b/feed_test.go index fad7b7f..60bad51 100644 --- a/feed_test.go +++ b/feed_test.go @@ -37,7 +37,7 @@ func Test_AtomAuthor(t *testing.T) { feed := New(1, true, chanHandler, itemHandler) err = feed.FetchBytes("http://example.com", content, nil) - item := items[0] + item := feed.Channels[0].Items[0] expected := "Cody Lee" if item.Author.Name != expected { t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name) @@ -49,7 +49,7 @@ func Test_RssAuthor(t *testing.T) { feed := New(1, true, chanHandler, itemHandler) feed.FetchBytes("http://example.com", content, nil) - item := items[0] + item := feed.Channels[0].Items[0] expected := "Cory Doctorow" if item.Author.Name != expected { t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name) @@ -61,7 +61,7 @@ func Test_CData(t *testing.T) { feed := New(1, true, chanHandler, itemHandler) feed.FetchBytes("http://example.com", content, nil) - item := items[0] + item := feed.Channels[0].Items[0] expected := `

abc

"def"
ghi` if item.Description != expected { t.Errorf("Expected item.Description to be [%s] but item.Description=[%s]", expected, item.Description) From 90c93b8fa4a94a8f3b0e5095afe48185994a89cf Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 15:30:26 +0100 Subject: [PATCH 10/16] Adds a database. This is a really simple map. Calls to it just check if the key exists and return a bool. In case of a false it adds the key. The key is just a string which might or might not be sufficient. --- database.go | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 database.go diff --git a/database.go b/database.go new file mode 100644 index 0000000..9b5a867 --- /dev/null +++ b/database.go @@ -0,0 +1,33 @@ +/* +Credits go to github.com/SlyMarbo/rss for inspiring this solution. +*/ +package feeder + +type database struct { + request chan string + response chan bool + known map[string]struct{} +} + +func (d *database) Run() { + d.known = make(map[string]struct{}) + var s string + + for { + s = <-d.request + if _, ok := d.known[s]; ok { + d.response <- true + } else { + d.response <- false + d.known[s] = struct{}{} + } + } +} + +func NewDatabase() *database { + database := new(database) + database.request = make(chan string) + database.response = make(chan bool) + go database.Run() + return database +} From 3b336dc54bdd1c0b798a2e01fe21c29804fc5512 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 15:31:49 +0100 Subject: [PATCH 11/16] Adds the database for checking new channels/items. In this iteration the key passed to the database is the Title which is obviously silly. I'm still looking for a configurable way of generating the unique key. --- feed.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/feed.go b/feed.go index 6b9fe33..05c6e52 100644 --- a/feed.go +++ b/feed.go @@ -58,6 +58,9 @@ type Feed struct { // Url from which this feed was created. Url string + // Database containing a list of known Items and Channels for this instance + database *database + // A notification function, used to notify the host when a new channel // has been found. chanhandler ChannelHandler @@ -76,6 +79,7 @@ func New(cachetimeout int, enforcecachelimit bool, ch ChannelHandler, ih ItemHan v.CacheTimeout = cachetimeout v.EnforceCacheLimit = enforcecachelimit v.Type = "none" + v.database = NewDatabase() v.chanhandler = ch v.itemhandler = ih return v @@ -159,9 +163,34 @@ func (this *Feed) makeFeed(doc *xmlx.Document) (err error) { this.CacheTimeout = this.Channels[0].TTL } + this.notifyListeners() + return } +func (this *Feed) notifyListeners() { + var newchannels []*Channel + for _, channel := range this.Channels { + if this.database.request <- channel.Title; <-this.database.response { + newchannels = append(newchannels, channel) + } + + var newitems []*Item + for _, item := range channel.Items { + if this.database.request <- item.Title; <-this.database.response { + newitems = append(newitems, item) + } + } + if len(newitems) > 0 { + this.itemhandler(this, channel, newitems) + } + } + + if len(newchannels) > 0 { + this.chanhandler(this, newchannels) + } +} + // This function returns true or false, depending on whether the CacheTimeout // value has expired or not. Additionally, it will ensure that we adhere to the // RSS spec's SkipDays and SkipHours values (if Feed.EnforceCacheLimit is set to From 6b6086e389ec379b665d08263c84b16a2bdfbb44 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 16:04:00 +0100 Subject: [PATCH 12/16] Adds Key() --- channel.go | 9 +++++++++ feed.go | 4 ++-- item.go | 11 +++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/channel.go b/channel.go index f602675..fe3318b 100644 --- a/channel.go +++ b/channel.go @@ -28,3 +28,12 @@ type Channel struct { Author Author SubTitle SubTitle } + +func (c *Channel) Key() string { + switch { + case len(c.Id) != 0: + return c.Id + default: + return c.Title + } +} diff --git a/feed.go b/feed.go index 05c6e52..c0db240 100644 --- a/feed.go +++ b/feed.go @@ -171,13 +171,13 @@ func (this *Feed) makeFeed(doc *xmlx.Document) (err error) { func (this *Feed) notifyListeners() { var newchannels []*Channel for _, channel := range this.Channels { - if this.database.request <- channel.Title; <-this.database.response { + if this.database.request <- channel.Key(); !<-this.database.response { newchannels = append(newchannels, channel) } var newitems []*Item for _, item := range channel.Items { - if this.database.request <- item.Title; <-this.database.response { + if this.database.request <- item.Key(); !<-this.database.response { newitems = append(newitems, item) } } diff --git a/item.go b/item.go index c5e65fb..86b1cad 100644 --- a/item.go +++ b/item.go @@ -19,3 +19,14 @@ type Item struct { Contributors []string Content *Content } + +func (i *Item) Key() string { + switch { + case i.Guid != nil && len(*i.Guid) != 0: + return *i.Guid + case len(i.Id) != 0: + return i.Id + default: + return i.Title + i.PubDate + } +} From b4a80d771a89ed4f7131fc27f5cde164858d9600 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 16:18:13 +0100 Subject: [PATCH 13/16] Adds a fallback ID function. --- item.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/item.go b/item.go index 86b1cad..ea13f85 100644 --- a/item.go +++ b/item.go @@ -1,5 +1,10 @@ package feeder +import ( + "crypto/md5" + "io" +) + type Item struct { // RSS and Shared fields Title string @@ -26,7 +31,11 @@ func (i *Item) Key() string { return *i.Guid case len(i.Id) != 0: return i.Id - default: + case len(i.Title) > 0 && len(i.PubDate) > 0: return i.Title + i.PubDate + default: + h := md5.New() + io.WriteString(h, i.Description) + return string(h.Sum(nil)) } } From 0aaac62e64f4a688a66373fdbd80428ac8512442 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 16:23:32 +0100 Subject: [PATCH 14/16] Add check if the handler exists. --- feed.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/feed.go b/feed.go index c0db240..8046520 100644 --- a/feed.go +++ b/feed.go @@ -181,12 +181,12 @@ func (this *Feed) notifyListeners() { newitems = append(newitems, item) } } - if len(newitems) > 0 { + if len(newitems) > 0 && this.itemhandler != nil { this.itemhandler(this, channel, newitems) } } - if len(newchannels) > 0 { + if len(newchannels) > 0 && this.chanhandler != nil { this.chanhandler(this, newchannels) } } From 5a2d6dbcdc9e0f5c88f295970988a83115ef2034 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 16:24:48 +0100 Subject: [PATCH 15/16] Revert "Revert "Adds new test data"" This reverts commit 79d418e00a22c053393cb4536e457d6e36494202. --- testdata/initial.atom | 8 ++++++++ testdata/initial_plus_one_new.atom | 12 ++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 testdata/initial.atom create mode 100644 testdata/initial_plus_one_new.atom diff --git a/testdata/initial.atom b/testdata/initial.atom new file mode 100644 index 0000000..b086591 --- /dev/null +++ b/testdata/initial.atom @@ -0,0 +1,8 @@ + + Some title + http://www.example.com/feed/atom/ + + First title + 1 + + diff --git a/testdata/initial_plus_one_new.atom b/testdata/initial_plus_one_new.atom new file mode 100644 index 0000000..2461a66 --- /dev/null +++ b/testdata/initial_plus_one_new.atom @@ -0,0 +1,12 @@ + + Some title + http://www.example.com/feed/atom/ + + First title + 1 + + + Second title + 2 + + From 49e91ffeee298b0655d148389bc4c7f32e8fc7a7 Mon Sep 17 00:00:00 2001 From: Harm Aarts Date: Thu, 5 Dec 2013 16:29:55 +0100 Subject: [PATCH 16/16] Adds test for the new items --- feed_test.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/feed_test.go b/feed_test.go index 60bad51..dca55b3 100644 --- a/feed_test.go +++ b/feed_test.go @@ -29,6 +29,26 @@ func TestFeed(t *testing.T) { } } +func Test_NewItem(t *testing.T) { + content, _ := ioutil.ReadFile("testdata/initial.atom") + feed := New(1, true, chanHandler, itemHandler) + err := feed.FetchBytes("http://example.com", content, nil) + if err != nil { + t.Error(err) + } + + content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom") + feed.FetchBytes("http://example.com", content, nil) + expected := "Second title" + if len(items) != 1 { + t.Errorf("Expected %s new item, got %s", 1, len(items)) + } + + if expected != items[0].Title { + t.Errorf("Expected %s, got %s", expected, items[0].Title) + } +} + func Test_AtomAuthor(t *testing.T) { content, err := ioutil.ReadFile("testdata/idownload.atom") if err != nil {