Apply changes to go-pkg-xmlx allowing wildcards for namespace names.

This commit is contained in:
jim teeuwen 2011-02-01 15:30:39 +01:00
parent 1a851548db
commit 6c6b659e88
3 changed files with 92 additions and 91 deletions

33
feed.go
View File

@ -25,12 +25,14 @@
*/ */
package feeder package feeder
import "os" import (
import "time" "os"
import xmlx "github.com/jteeuwen/go-pkg-xmlx" "time"
import "fmt" xmlx "github.com/jteeuwen/go-pkg-xmlx"
import "strconv" "fmt"
import "strings" "strconv"
"strings"
)
type ChannelHandler func(f *Feed, newchannels []*Channel) type ChannelHandler func(f *Feed, newchannels []*Channel)
type ItemHandler func(f *Feed, ch *Channel, newitems []*Item) type ItemHandler func(f *Feed, ch *Channel, newitems []*Item)
@ -187,19 +189,18 @@ func (this *Feed) testVersions() bool {
} }
func (this *Feed) GetVersionInfo(doc *xmlx.Document) (ftype string, fversion [2]int) { func (this *Feed) GetVersionInfo(doc *xmlx.Document) (ftype string, fversion [2]int) {
node := doc.SelectNode("http://www.w3.org/2005/Atom", "feed") var node *xmlx.Node
if node == nil {
if node = doc.SelectNode("http://www.w3.org/2005/Atom", "feed"); node == nil {
goto rss goto rss
} }
ftype = "atom" ftype = "atom"
fversion = [2]int{1, 0} fversion = [2]int{1, 0}
return return
rss: rss:
node = doc.SelectNode("", "rss") if node = doc.SelectNode("", "rss"); node != nil {
if node == nil {
goto end
}
ftype = "rss" ftype = "rss"
version := node.GetAttr("", "version") version := node.GetAttr("", "version")
p := strings.Index(version, ".") p := strings.Index(version, ".")
@ -207,6 +208,14 @@ rss:
minor, _ := strconv.Atoi(version[p+1 : len(version)]) minor, _ := strconv.Atoi(version[p+1 : len(version)])
fversion = [2]int{major, minor} fversion = [2]int{major, minor}
return return
}
// issue#5: Some documents have an RDF root node instead of rss.
if node = doc.SelectNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "RDF"); node != nil {
ftype = "rss"
fversion = [2]int{1, 1}
return
}
end: end:
ftype = "unknown" ftype = "unknown"

View File

@ -5,6 +5,7 @@ import "os"
func TestFeed(t *testing.T) { func TestFeed(t *testing.T) {
urilist := []string{ urilist := []string{
//"http://store.steampowered.com/feeds/news.xml", // This feed violates the rss spec.
"http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml", "http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml",
"http://cyber.law.harvard.edu/rss/examples/sampleRss092.xml", "http://cyber.law.harvard.edu/rss/examples/sampleRss092.xml",
"http://cyber.law.harvard.edu/rss/examples/rss2sample.xml", "http://cyber.law.harvard.edu/rss/examples/rss2sample.xml",
@ -19,27 +20,15 @@ func TestFeed(t *testing.T) {
if err = feed.Fetch(uri); err != nil { if err = feed.Fetch(uri); err != nil {
t.Errorf("%s >>> %s", uri, err) t.Errorf("%s >>> %s", uri, err)
return
} }
} }
/*
Output of handlers:
6 new item(s) in WriteTheWeb of http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml
1 new channel(s) in http://cyber.law.harvard.edu/rss/examples/sampleRss091.xml
21 new item(s) in Dave Winer: Grateful Dead of http://cyber.law.harvard.edu/rss/examples/sampleRss092.xml
1 new channel(s) in http://cyber.law.harvard.edu/rss/examples/sampleRss092.xml
4 new item(s) in Liftoff News of http://cyber.law.harvard.edu/rss/examples/rss2sample.xml
1 new channel(s) in http://cyber.law.harvard.edu/rss/examples/rss2sample.xml
15 new item(s) in Blog@Case of http://blog.case.edu/news/feed.atom
1 new channel(s) in http://blog.case.edu/news/feed.atom
*/
} }
func chanHandler(feed *Feed, newchannels []*Channel) { func chanHandler(feed *Feed, newchannels []*Channel) {
//println(len(newchannels), "new channel(s) in", feed.Url) println(len(newchannels), "new channel(s) in", feed.Url)
} }
func itemHandler(feed *Feed, ch *Channel, newitems []*Item) { func itemHandler(feed *Feed, ch *Channel, newitems []*Item) {
//println(len(newitems), "new item(s) in", ch.Title, "of", feed.Url) println(len(newitems), "new item(s) in", ch.Title, "of", feed.Url)
} }

119
rss.go
View File

@ -1,7 +1,9 @@
package feeder package feeder
import "os" import (
import xmlx "github.com/jteeuwen/go-pkg-xmlx" "os"
xmlx "github.com/jteeuwen/go-pkg-xmlx"
)
func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) { func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) {
days := make(map[string]int) days := make(map[string]int)
@ -53,134 +55,135 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err os.Error) {
var i *Item var i *Item
var n *xmlx.Node var n *xmlx.Node
var list, tl []*xmlx.Node var list, tl []*xmlx.Node
const ns = "*"
channels := doc.SelectNodes("", "channel") channels := doc.SelectNodes(ns, "channel")
for _, node := range channels { for _, node := range channels {
if ch = getChan(node.GetValue("", "pubDate"), node.GetValue("", "title")); ch == nil { if ch = getChan(node.GetValue(ns, "pubDate"), node.GetValue(ns, "title")); ch == nil {
ch = new(Channel) ch = new(Channel)
this.Channels = append(this.Channels, ch) this.Channels = append(this.Channels, ch)
} }
ch.Title = node.GetValue("", "title") ch.Title = node.GetValue(ns, "title")
list = node.SelectNodes("", "link") list = node.SelectNodes(ns, "link")
ch.Links = make([]Link, len(list)) ch.Links = make([]Link, len(list))
for i, v := range list { for i, v := range list {
ch.Links[i].Href = v.Value ch.Links[i].Href = v.Value
} }
ch.Description = node.GetValue("", "description") ch.Description = node.GetValue(ns, "description")
ch.Language = node.GetValue("", "language") ch.Language = node.GetValue(ns, "language")
ch.Copyright = node.GetValue("", "copyright") ch.Copyright = node.GetValue(ns, "copyright")
ch.ManagingEditor = node.GetValue("", "managingEditor") ch.ManagingEditor = node.GetValue(ns, "managingEditor")
ch.WebMaster = node.GetValue("", "webMaster") ch.WebMaster = node.GetValue(ns, "webMaster")
ch.PubDate = node.GetValue("", "pubDate") ch.PubDate = node.GetValue(ns, "pubDate")
ch.LastBuildDate = node.GetValue("", "lastBuildDate") ch.LastBuildDate = node.GetValue(ns, "lastBuildDate")
ch.Docs = node.GetValue("", "docs") ch.Docs = node.GetValue(ns, "docs")
list = node.SelectNodes("", "category") list = node.SelectNodes(ns, "category")
ch.Categories = make([]*Category, len(list)) ch.Categories = make([]*Category, len(list))
for i, v := range list { for i, v := range list {
ch.Categories[i] = new(Category) ch.Categories[i] = new(Category)
ch.Categories[i].Domain = v.GetAttr("", "domain") ch.Categories[i].Domain = v.GetAttr(ns, "domain")
ch.Categories[i].Text = v.Value ch.Categories[i].Text = v.Value
} }
if n = node.SelectNode("", "generator"); n != nil { if n = node.SelectNode(ns, "generator"); n != nil {
ch.Generator = Generator{} ch.Generator = Generator{}
ch.Generator.Text = n.Value ch.Generator.Text = n.Value
} }
ch.TTL = node.GetValuei("", "ttl") ch.TTL = node.GetValuei(ns, "ttl")
ch.Rating = node.GetValue("", "rating") ch.Rating = node.GetValue(ns, "rating")
list = node.SelectNodes("", "hour") list = node.SelectNodes(ns, "hour")
ch.SkipHours = make([]int, len(list)) ch.SkipHours = make([]int, len(list))
for i, v := range list { for i, v := range list {
ch.SkipHours[i] = int(v.GetValuei("", "hour")) ch.SkipHours[i] = int(v.GetValuei(ns, "hour"))
} }
list = node.SelectNodes("", "days") list = node.SelectNodes(ns, "days")
ch.SkipDays = make([]int, len(list)) ch.SkipDays = make([]int, len(list))
for i, v := range list { for i, v := range list {
ch.SkipDays[i] = days[v.Value] ch.SkipDays[i] = days[v.Value]
} }
if n = node.SelectNode("", "image"); n != nil { if n = node.SelectNode(ns, "image"); n != nil {
ch.Image.Title = n.GetValue("", "title") ch.Image.Title = n.GetValue(ns, "title")
ch.Image.Url = n.GetValue("", "url") ch.Image.Url = n.GetValue(ns, "url")
ch.Image.Link = n.GetValue("", "link") ch.Image.Link = n.GetValue(ns, "link")
ch.Image.Width = n.GetValuei("", "width") ch.Image.Width = n.GetValuei(ns, "width")
ch.Image.Height = n.GetValuei("", "height") ch.Image.Height = n.GetValuei(ns, "height")
ch.Image.Description = n.GetValue("", "description") ch.Image.Description = n.GetValue(ns, "description")
} }
if n = node.SelectNode("", "cloud"); n != nil { if n = node.SelectNode(ns, "cloud"); n != nil {
ch.Cloud = Cloud{} ch.Cloud = Cloud{}
ch.Cloud.Domain = n.GetAttr("", "domain") ch.Cloud.Domain = n.GetAttr(ns, "domain")
ch.Cloud.Port = n.GetAttri("", "port") ch.Cloud.Port = n.GetAttri(ns, "port")
ch.Cloud.Path = n.GetAttr("", "path") ch.Cloud.Path = n.GetAttr(ns, "path")
ch.Cloud.RegisterProcedure = n.GetAttr("", "registerProcedure") ch.Cloud.RegisterProcedure = n.GetAttr(ns, "registerProcedure")
ch.Cloud.Protocol = n.GetAttr("", "protocol") ch.Cloud.Protocol = n.GetAttr(ns, "protocol")
} }
if n = node.SelectNode("", "textInput"); n != nil { if n = node.SelectNode(ns, "textInput"); n != nil {
ch.TextInput = Input{} ch.TextInput = Input{}
ch.TextInput.Title = n.GetValue("", "title") ch.TextInput.Title = n.GetValue(ns, "title")
ch.TextInput.Description = n.GetValue("", "description") ch.TextInput.Description = n.GetValue(ns, "description")
ch.TextInput.Name = n.GetValue("", "name") ch.TextInput.Name = n.GetValue(ns, "name")
ch.TextInput.Link = n.GetValue("", "link") ch.TextInput.Link = n.GetValue(ns, "link")
} }
itemcount := len(ch.Items) itemcount := len(ch.Items)
list = node.SelectNodes("", "item") list = node.SelectNodes(ns, "item")
for _, item := range list { for _, item := range list {
if haveItem(ch, item.GetValue("", "pubDate"), if haveItem(ch, item.GetValue(ns, "pubDate"),
item.GetValue("", "title"), item.GetValue("", "description")) { item.GetValue(ns, "title"), item.GetValue(ns, "description")) {
continue continue
} }
i = new(Item) i = new(Item)
i.Title = item.GetValue("", "title") i.Title = item.GetValue(ns, "title")
i.Description = item.GetValue("", "description") i.Description = item.GetValue(ns, "description")
tl = node.SelectNodes("", "link") tl = node.SelectNodes(ns, "link")
for _, v := range tl { for _, v := range tl {
lnk := new(Link) lnk := new(Link)
lnk.Href = v.Value lnk.Href = v.Value
i.Links = append(i.Links, lnk) i.Links = append(i.Links, lnk)
} }
if n = item.SelectNode("", "author"); n != nil { if n = item.SelectNode(ns, "author"); n != nil {
i.Author = Author{} i.Author = Author{}
i.Author.Name = n.Value i.Author.Name = n.Value
} }
i.Comments = item.GetValue("", "comments") i.Comments = item.GetValue(ns, "comments")
i.Guid = item.GetValue("", "guid") i.Guid = item.GetValue(ns, "guid")
i.PubDate = item.GetValue("", "pubDate") i.PubDate = item.GetValue(ns, "pubDate")
tl = item.SelectNodes("", "category") tl = item.SelectNodes(ns, "category")
for _, lv := range tl { for _, lv := range tl {
cat := new(Category) cat := new(Category)
cat.Domain = lv.GetAttr("", "domain") cat.Domain = lv.GetAttr(ns, "domain")
cat.Text = lv.Value cat.Text = lv.Value
i.Categories = append(i.Categories, cat) i.Categories = append(i.Categories, cat)
} }
tl = item.SelectNodes("", "enclosure") tl = item.SelectNodes(ns, "enclosure")
for _, lv := range tl { for _, lv := range tl {
enc := new(Enclosure) enc := new(Enclosure)
enc.Url = lv.GetAttr("", "url") enc.Url = lv.GetAttr(ns, "url")
enc.Length = lv.GetAttri64("", "length") enc.Length = lv.GetAttri64(ns, "length")
enc.Type = lv.GetAttr("", "type") enc.Type = lv.GetAttr(ns, "type")
i.Enclosures = append(i.Enclosures, enc) i.Enclosures = append(i.Enclosures, enc)
} }
if src := item.SelectNode("", "source"); src != nil { if src := item.SelectNode(ns, "source"); src != nil {
i.Source = new(Source) i.Source = new(Source)
i.Source.Url = src.GetAttr("", "url") i.Source.Url = src.GetAttr(ns, "url")
i.Source.Text = src.Value i.Source.Text = src.Value
} }