From 20d050f9088a9e9b9dc760fea447e9448722bf73 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Mon, 30 Sep 2013 01:23:51 -0400 Subject: [PATCH 1/2] Fix Atom/RSS feed parsing due to changes in jteeuwen/go-pkg-xmlx. Due to recent changes involving how values are dealt with in xmlx, update the RSS/Atom parsing. Instead of using the Value property of an xmlx Node, use the new GetValue function offered in PR jteeuwen/go-pkg-xmlx#15. --- atom.go | 6 +++--- rss.go | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/atom.go b/atom.go index 208e54a..d46cb35 100644 --- a/atom.go +++ b/atom.go @@ -50,14 +50,14 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { if tn = node.SelectNode(ns, "subtitle"); tn != nil { ch.SubTitle = SubTitle{} ch.SubTitle.Type = tn.As("", "type") - ch.SubTitle.Text = tn.Value + ch.SubTitle.Text = tn.GetValue() } if tn = node.SelectNode(ns, "generator"); tn != nil { ch.Generator = Generator{} ch.Generator.Uri = tn.As("", "uri") ch.Generator.Version = tn.As("", "version") - ch.Generator.Text = tn.Value + ch.Generator.Text = tn.GetValue() } if tn = node.SelectNode(ns, "author"); tn != nil { @@ -104,7 +104,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { i.Content.Type = tn.As("", "type") i.Content.Lang = tn.S("xml", "lang") i.Content.Base = tn.S("xml", "base") - i.Content.Text = tn.Value + i.Content.Text = tn.GetValue() } if tn = item.SelectNode(ns, "author"); tn != nil { diff --git a/rss.go b/rss.go index e43fbfe..eb8d359 100644 --- a/rss.go +++ b/rss.go @@ -46,7 +46,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { ch.Links = make([]Link, len(list)) for i, v := range list { - ch.Links[i].Href = v.Value + ch.Links[i].Href = v.GetValue() } ch.Description = node.S(ns, "description") @@ -63,12 +63,12 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { for i, v := range list { ch.Categories[i] = new(Category) ch.Categories[i].Domain = v.As(ns, "domain") - ch.Categories[i].Text = v.Value + ch.Categories[i].Text = v.GetValue() } if n = node.SelectNode(ns, "generator"); n != nil { ch.Generator = Generator{} - ch.Generator.Text = n.Value + ch.Generator.Text = n.GetValue() } ch.TTL = node.I(ns, "ttl") @@ -83,7 +83,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { list = node.SelectNodes(ns, "days") ch.SkipDays = make([]int, len(list)) for i, v := range list { - ch.SkipDays[i] = days[v.Value] + ch.SkipDays[i] = days[v.GetValue()] } if n = node.SelectNode(ns, "image"); n != nil { @@ -126,16 +126,16 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { tl = item.SelectNodes(ns, "link") for _, v := range tl { lnk := new(Link) - lnk.Href = v.Value + lnk.Href = v.GetValue() i.Links = append(i.Links, lnk) } if n = item.SelectNode(ns, "author"); n != nil { i.Author = Author{} - i.Author.Name = n.Value + i.Author.Name = n.GetValue() } if n = item.SelectNode(ns, "creator"); n != nil { - i.Author = Author{ Name: n.Value } + i.Author = Author{ Name: n.GetValue() } } i.Comments = item.S(ns, "comments") @@ -146,7 +146,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { for _, lv := range tl { cat := new(Category) cat.Domain = lv.As(ns, "domain") - cat.Text = lv.Value + cat.Text = lv.GetValue() i.Categories = append(i.Categories, cat) } @@ -162,7 +162,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { if src := item.SelectNode(ns, "source"); src != nil { i.Source = new(Source) i.Source.Url = src.As(ns, "url") - i.Source.Text = src.Value + i.Source.Text = src.GetValue() } tl = item.SelectNodes("http://purl.org/rss/1.0/modules/content/", "*") From 66eea2e6af57b2abcbb481f4f85f9e50ec66a54b Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Fri, 25 Oct 2013 23:26:15 -0400 Subject: [PATCH 2/2] Fix RSS feed parsing due to changes in go-pkg-xmlx. Due to the PR jteeuwen/go-pkg-xmlx#16, when using SelectNodes there is no longer any hidden recursion. Due to RSS's structure, there is a root document node. These two pieces break the RSS parsing. Fix by first selecting the root document node, and then selecting the channels for parsing. --- rss.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/rss.go b/rss.go index eb8d359..763593e 100644 --- a/rss.go +++ b/rss.go @@ -1,6 +1,10 @@ package feeder -import xmlx "github.com/jteeuwen/go-pkg-xmlx" +import ( + "errors" + + xmlx "github.com/jteeuwen/go-pkg-xmlx" +) func (this *Feed) readRss2(doc *xmlx.Document) (err error) { days := make(map[string]int) @@ -34,7 +38,16 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { var list, tl []*xmlx.Node const ns = "*" - channels := doc.SelectNodes(ns, "channel") + root := doc.SelectNode(ns, "rss") + if root == nil { + root = doc.SelectNode(ns, "RDF") + } + + if root == nil { + return errors.New("Failed to find rss/rdf node in XML.") + } + + channels := root.SelectNodes(ns, "channel") for _, node := range channels { if ch = getChan(node.S(ns, "pubDate"), node.S(ns, "title")); ch == nil { ch = new(Channel)