Merge pull request #27 from haarts/master

Resolve uniqueness confusion
This commit is contained in:
jimt 2013-12-06 02:08:15 -08:00
commit 269db380a3
7 changed files with 132 additions and 102 deletions

47
atom.go
View File

@ -6,32 +6,15 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
ns := "http://www.w3.org/2005/Atom"
channels := doc.SelectNodes(ns, "feed")
getChan := func(id, title string) *Channel {
for _, c := range this.Channels {
switch {
case len(id) > 0:
if c.Id == id {
return c
}
case len(title) > 0:
if c.Title == title {
return c
}
}
}
return nil
}
var foundChannels []*Channel
var ch *Channel
var i *Item
var tn *xmlx.Node
var list []*xmlx.Node
for _, node := range channels {
if ch = getChan(node.S(ns, "id"), node.S(ns, "title")); ch == nil {
ch = new(Channel)
this.Channels = append(this.Channels, ch)
}
foundChannels = append(foundChannels, ch)
ch.Title = node.S(ns, "title")
ch.LastBuildDate = node.S(ns, "updated")
@ -67,14 +50,9 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
ch.Author.Email = tn.S("", "email")
}
itemcount := len(ch.Items)
list = node.SelectNodes(ns, "entry")
for _, item := range list {
if isItemPresent(ch, item.S(ns, "id"), item.S(ns, "title")) {
continue
}
i = new(Item)
i.Title = item.S(ns, "title")
i.Id = item.S(ns, "id")
@ -120,26 +98,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
ch.Items = append(ch.Items, i)
}
if itemcount != len(ch.Items) && this.itemhandler != nil {
this.itemhandler(this, ch, ch.Items[itemcount:])
}
}
this.Channels = foundChannels
return
}
func isItemPresent(ch *Channel, id, title string) bool {
for _, item := range ch.Items {
switch {
case len(id) > 0:
if item.Id == id {
return true
}
case len(title) > 0:
if item.Title == title {
return true
}
}
}
return false
}

View File

@ -28,3 +28,12 @@ type Channel struct {
Author Author
SubTitle SubTitle
}
func (c *Channel) Key() string {
switch {
case len(c.Id) != 0:
return c.Id
default:
return c.Title
}
}

33
database.go Normal file
View File

@ -0,0 +1,33 @@
/*
Credits go to github.com/SlyMarbo/rss for inspiring this solution.
*/
package feeder
type database struct {
request chan string
response chan bool
known map[string]struct{}
}
func (d *database) Run() {
d.known = make(map[string]struct{})
var s string
for {
s = <-d.request
if _, ok := d.known[s]; ok {
d.response <- true
} else {
d.response <- false
d.known[s] = struct{}{}
}
}
}
func NewDatabase() *database {
database := new(database)
database.request = make(chan string)
database.response = make(chan bool)
go database.Run()
return database
}

35
feed.go
View File

@ -58,6 +58,9 @@ type Feed struct {
// Url from which this feed was created.
Url string
// Database containing a list of known Items and Channels for this instance
database *database
// A notification function, used to notify the host when a new channel
// has been found.
chanhandler ChannelHandler
@ -76,6 +79,7 @@ func New(cachetimeout int, enforcecachelimit bool, ch ChannelHandler, ih ItemHan
v.CacheTimeout = cachetimeout
v.EnforceCacheLimit = enforcecachelimit
v.Type = "none"
v.database = NewDatabase()
v.chanhandler = ch
v.itemhandler = ih
return v
@ -150,24 +154,43 @@ func (this *Feed) makeFeed(doc *xmlx.Document) (err error) {
return
}
chancount := len(this.Channels)
if err = this.buildFeed(doc); err != nil || len(this.Channels) == 0 {
return
}
// Notify host of new channels
if chancount != len(this.Channels) && this.chanhandler != nil {
this.chanhandler(this, this.Channels[chancount:])
}
// reset cache timeout values according to feed specified values (TTL)
if this.EnforceCacheLimit && this.CacheTimeout < this.Channels[0].TTL {
this.CacheTimeout = this.Channels[0].TTL
}
this.notifyListeners()
return
}
func (this *Feed) notifyListeners() {
var newchannels []*Channel
for _, channel := range this.Channels {
if this.database.request <- channel.Key(); !<-this.database.response {
newchannels = append(newchannels, channel)
}
var newitems []*Item
for _, item := range channel.Items {
if this.database.request <- item.Key(); !<-this.database.response {
newitems = append(newitems, item)
}
}
if len(newitems) > 0 && this.itemhandler != nil {
this.itemhandler(this, channel, newitems)
}
}
if len(newchannels) > 0 && this.chanhandler != nil {
this.chanhandler(this, newchannels)
}
}
// This function returns true or false, depending on whether the CacheTimeout
// value has expired or not. Additionally, it will ensure that we adhere to the
// RSS spec's SkipDays and SkipHours values (if Feed.EnforceCacheLimit is set to

View File

@ -7,20 +7,6 @@ import (
var items []*Item
func Test_NewItem(t *testing.T) {
content, _ := ioutil.ReadFile("testdata/initial.atom")
feed := New(1, true, chanHandler, itemHandler)
err := feed.FetchBytes("http://example.com", content, nil)
if err != nil { t.Error(err) }
content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom")
feed.FetchBytes("http://example.com", content, nil)
expected := "Second title"
if expected != items[0].Title {
t.Errorf("Expected %s, got %s", expected, items[0].Title)
}
}
func TestFeed(t *testing.T) {
urilist := []string{
//"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml", // Non-utf8 encoding.
@ -43,6 +29,26 @@ func TestFeed(t *testing.T) {
}
}
func Test_NewItem(t *testing.T) {
content, _ := ioutil.ReadFile("testdata/initial.atom")
feed := New(1, true, chanHandler, itemHandler)
err := feed.FetchBytes("http://example.com", content, nil)
if err != nil {
t.Error(err)
}
content, _ = ioutil.ReadFile("testdata/initial_plus_one_new.atom")
feed.FetchBytes("http://example.com", content, nil)
expected := "Second title"
if len(items) != 1 {
t.Errorf("Expected %s new item, got %s", 1, len(items))
}
if expected != items[0].Title {
t.Errorf("Expected %s, got %s", expected, items[0].Title)
}
}
func Test_AtomAuthor(t *testing.T) {
content, err := ioutil.ReadFile("testdata/idownload.atom")
if err != nil {
@ -51,7 +57,7 @@ func Test_AtomAuthor(t *testing.T) {
feed := New(1, true, chanHandler, itemHandler)
err = feed.FetchBytes("http://example.com", content, nil)
item := items[0]
item := feed.Channels[0].Items[0]
expected := "Cody Lee"
if item.Author.Name != expected {
t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name)
@ -63,7 +69,7 @@ func Test_RssAuthor(t *testing.T) {
feed := New(1, true, chanHandler, itemHandler)
feed.FetchBytes("http://example.com", content, nil)
item := items[0]
item := feed.Channels[0].Items[0]
expected := "Cory Doctorow"
if item.Author.Name != expected {
t.Errorf("Expected author to be %s but found %s", expected, item.Author.Name)
@ -75,7 +81,7 @@ func Test_CData(t *testing.T) {
feed := New(1, true, chanHandler, itemHandler)
feed.FetchBytes("http://example.com", content, nil)
item := items[0]
item := feed.Channels[0].Items[0]
expected := `<p>abc<div>"def"</div>ghi`
if item.Description != expected {
t.Errorf("Expected item.Description to be [%s] but item.Description=[%s]", expected, item.Description)

20
item.go
View File

@ -1,5 +1,10 @@
package feeder
import (
"crypto/md5"
"io"
)
type Item struct {
// RSS and Shared fields
Title string
@ -19,3 +24,18 @@ type Item struct {
Contributors []string
Content *Content
}
func (i *Item) Key() string {
switch {
case i.Guid != nil && len(*i.Guid) != 0:
return *i.Guid
case len(i.Id) != 0:
return i.Id
case len(i.Title) > 0 && len(i.PubDate) > 0:
return i.Title + i.PubDate
default:
h := md5.New()
io.WriteString(h, i.Description)
return string(h.Sum(nil))
}
}

46
rss.go
View File

@ -6,32 +6,18 @@ import (
xmlx "github.com/jteeuwen/go-pkg-xmlx"
)
var days = map[string]int{
"Monday": 1,
"Tuesday": 2,
"Wednesday": 3,
"Thursday": 4,
"Friday": 5,
"Saturday": 6,
"Sunday": 7,
}
func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
days := make(map[string]int)
days["Monday"] = 1
days["Tuesday"] = 2
days["Wednesday"] = 3
days["Thursday"] = 4
days["Friday"] = 5
days["Saturday"] = 6
days["Sunday"] = 7
getChan := func(pubdate, title string) *Channel {
for _, c := range this.Channels {
switch {
case len(pubdate) > 0:
if c.PubDate == pubdate {
return c
}
case len(title) > 0:
if c.Title == title {
return c
}
}
}
return nil
}
var foundChannels []*Channel
var ch *Channel
var i *Item
var n *xmlx.Node
@ -49,10 +35,8 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
channels := root.SelectNodes(ns, "channel")
for _, node := range channels {
if ch = getChan(node.S(ns, "pubDate"), node.S(ns, "title")); ch == nil {
ch = new(Channel)
this.Channels = append(this.Channels, ch)
}
foundChannels = append(foundChannels, ch)
ch.Title = node.S(ns, "title")
list = node.SelectNodes(ns, "link")
@ -125,7 +109,6 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
ch.TextInput.Link = n.S(ns, "link")
}
itemcount := len(ch.Items)
list = node.SelectNodes(ns, "item")
if len(list) == 0 {
list = doc.SelectNodes(ns, "item")
@ -193,10 +176,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
ch.Items = append(ch.Items, i)
}
if itemcount != len(ch.Items) && this.itemhandler != nil {
this.itemhandler(this, ch, ch.Items[itemcount:])
}
}
this.Channels = foundChannels
return
}