Time parsing.
Rather than just using a string for PubDate, we attempt to parse it. This includes a couple of crazy non-standard time formats that I've seen in the wild. Breaking change: Item.PubDate is no longer a string, it is time.Time.
This commit is contained in:
parent
2b6dc03ede
commit
2c67b94a04
2
atom.go
2
atom.go
|
@ -56,7 +56,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) {
|
||||||
i = new(Item)
|
i = new(Item)
|
||||||
i.Title = item.S(ns, "title")
|
i.Title = item.S(ns, "title")
|
||||||
i.Id = item.S(ns, "id")
|
i.Id = item.S(ns, "id")
|
||||||
i.PubDate = item.S(ns, "updated")
|
i.PubDate, _ = parseTime(item.S(ns, "updated"))
|
||||||
i.Description = item.S(ns, "summary")
|
i.Description = item.S(ns, "summary")
|
||||||
|
|
||||||
links := item.SelectNodes(ns, "link")
|
links := item.SelectNodes(ns, "link")
|
||||||
|
|
7
item.go
7
item.go
|
@ -3,6 +3,7 @@ package feeder
|
||||||
import (
|
import (
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
"io"
|
"io"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Item struct {
|
type Item struct {
|
||||||
|
@ -15,7 +16,7 @@ type Item struct {
|
||||||
Comments string
|
Comments string
|
||||||
Enclosures []*Enclosure
|
Enclosures []*Enclosure
|
||||||
Guid *string
|
Guid *string
|
||||||
PubDate string
|
PubDate time.Time
|
||||||
Source *Source
|
Source *Source
|
||||||
|
|
||||||
// Atom specific fields
|
// Atom specific fields
|
||||||
|
@ -33,8 +34,8 @@ func (i *Item) Key() string {
|
||||||
return *i.Guid
|
return *i.Guid
|
||||||
case len(i.Id) != 0:
|
case len(i.Id) != 0:
|
||||||
return i.Id
|
return i.Id
|
||||||
case len(i.Title) > 0 && len(i.PubDate) > 0:
|
case len(i.Title) > 0 && !i.PubDate.IsZero():
|
||||||
return i.Title + i.PubDate
|
return i.Title + i.PubDate.String()
|
||||||
default:
|
default:
|
||||||
h := md5.New()
|
h := md5.New()
|
||||||
io.WriteString(h, i.Description)
|
io.WriteString(h, i.Description)
|
||||||
|
|
2
rss.go
2
rss.go
|
@ -162,7 +162,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) {
|
||||||
i.Guid = &guid
|
i.Guid = &guid
|
||||||
}
|
}
|
||||||
|
|
||||||
i.PubDate = item.S(ns, "pubDate")
|
i.PubDate, _ = parseTime(item.S(ns, "pubDate"))
|
||||||
|
|
||||||
tl = item.SelectNodes(ns, "category")
|
tl = item.SelectNodes(ns, "category")
|
||||||
for _, lv := range tl {
|
for _, lv := range tl {
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
package feeder
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func parseTime(formatted string) (time.Time, error) {
|
||||||
|
var layouts = [...]string{
|
||||||
|
"Mon, _2 Jan 2006 15:04:05 MST",
|
||||||
|
"Mon, _2 Jan 2006 15:04:05 -0700",
|
||||||
|
time.ANSIC,
|
||||||
|
time.UnixDate,
|
||||||
|
time.RubyDate,
|
||||||
|
time.RFC822,
|
||||||
|
time.RFC822Z,
|
||||||
|
time.RFC850,
|
||||||
|
time.RFC1123,
|
||||||
|
time.RFC1123Z,
|
||||||
|
time.RFC3339,
|
||||||
|
time.RFC3339Nano,
|
||||||
|
"Mon, 2, Jan 2006 15:4",
|
||||||
|
"02 Jan 2006 15:04:05 MST",
|
||||||
|
}
|
||||||
|
var t time.Time
|
||||||
|
var err error
|
||||||
|
formatted = strings.TrimSpace(formatted)
|
||||||
|
for _, layout := range layouts {
|
||||||
|
t, err = time.Parse(layout, formatted)
|
||||||
|
if !t.IsZero() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t, err
|
||||||
|
}
|
|
@ -0,0 +1,94 @@
|
||||||
|
package feeder
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Test_InvalidDate(t *testing.T) {
|
||||||
|
date, err := parseTime("invalid")
|
||||||
|
if !date.IsZero() {
|
||||||
|
t.Errorf("Invalid date should parse to zero")
|
||||||
|
}
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("error should not be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_ParseLayout0(t *testing.T) {
|
||||||
|
date, err := parseTime("2014-03-07T05:38:00-05:00")
|
||||||
|
expected := time.Date(2014, time.March, 7, 5, 38, 0, 0, time.FixedZone("-0500", -18000))
|
||||||
|
assertEqualTime(t, expected, date)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("err should be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_ParseLayout1(t *testing.T) {
|
||||||
|
date, err := parseTime("Fri, 07 Mar 2014 17:42:51 GMT")
|
||||||
|
expected := time.Date(2014, time.March, 7, 17, 42, 51, 0, time.UTC)
|
||||||
|
assertEqualTime(t, expected, date)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("err should be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_ParseLayout2(t *testing.T) {
|
||||||
|
date, err := parseTime("2014-02-05T23:33:34Z")
|
||||||
|
expected := time.Date(2014, time.February, 5, 23, 33, 34, 0, time.UTC)
|
||||||
|
assertEqualTime(t, expected, date)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("err should be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_ParseLayout3(t *testing.T) {
|
||||||
|
date, err := parseTime("Mon, 03 Mar 2014 02:12:25 +0000")
|
||||||
|
expected := time.Date(2014, time.March, 3, 2, 12, 25, 0, time.UTC)
|
||||||
|
assertEqualTime(t, expected, date)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("err should be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_ParseLayout4(t *testing.T) {
|
||||||
|
date, err := parseTime("Fri, 21, Mar 2014 10:41")
|
||||||
|
expected := time.Date(2014, time.March, 21, 10, 41, 0, 0, time.UTC)
|
||||||
|
assertEqualTime(t, expected, date)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("err should be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_ParseLayout4_1(t *testing.T) {
|
||||||
|
date, err := parseTime("Fri, 17, Jan 2014 11:1")
|
||||||
|
expected := time.Date(2014, time.January, 17, 11, 1, 0, 0, time.UTC)
|
||||||
|
assertEqualTime(t, expected, date)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("err should be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_ParseLayout4_2(t *testing.T) {
|
||||||
|
date, err := parseTime("Thu, 9, Jan 2014 10:19")
|
||||||
|
expected := time.Date(2014, time.January, 9, 10, 19, 0, 0, time.UTC)
|
||||||
|
assertEqualTime(t, expected, date)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("err should be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_ParseLayout5(t *testing.T) {
|
||||||
|
date, err := parseTime("22 Jul 2013 14:55:01 EST")
|
||||||
|
expected := time.Date(2013, time.July, 22, 14, 55, 1, 0, time.FixedZone("EST", -18000))
|
||||||
|
assertEqualTime(t, expected, date)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("err should be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertEqualTime(t *testing.T, expected, actual time.Time) {
|
||||||
|
if !expected.Equal(actual) {
|
||||||
|
t.Errorf("expected %v but was %v", expected, actual)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue