From 2c67b94a0407a3f6cf82331374c0a8f7c05c5c47 Mon Sep 17 00:00:00 2001 From: Sean Schulte Date: Mon, 24 Mar 2014 21:54:15 -0500 Subject: [PATCH 1/3] Time parsing. Rather than just using a string for PubDate, we attempt to parse it. This includes a couple of crazy non-standard time formats that I've seen in the wild. Breaking change: Item.PubDate is no longer a string, it is time.Time. --- atom.go | 2 +- item.go | 7 ++-- rss.go | 2 +- timeparser.go | 35 +++++++++++++++++ timeparser_test.go | 94 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 timeparser.go create mode 100644 timeparser_test.go diff --git a/atom.go b/atom.go index cab3935..37f568c 100644 --- a/atom.go +++ b/atom.go @@ -56,7 +56,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { i = new(Item) i.Title = item.S(ns, "title") i.Id = item.S(ns, "id") - i.PubDate = item.S(ns, "updated") + i.PubDate, _ = parseTime(item.S(ns, "updated")) i.Description = item.S(ns, "summary") links := item.SelectNodes(ns, "link") diff --git a/item.go b/item.go index 74045e4..5c002f7 100644 --- a/item.go +++ b/item.go @@ -3,6 +3,7 @@ package feeder import ( "crypto/md5" "io" + "time" ) type Item struct { @@ -15,7 +16,7 @@ type Item struct { Comments string Enclosures []*Enclosure Guid *string - PubDate string + PubDate time.Time Source *Source // Atom specific fields @@ -33,8 +34,8 @@ func (i *Item) Key() string { return *i.Guid case len(i.Id) != 0: return i.Id - case len(i.Title) > 0 && len(i.PubDate) > 0: - return i.Title + i.PubDate + case len(i.Title) > 0 && !i.PubDate.IsZero(): + return i.Title + i.PubDate.String() default: h := md5.New() io.WriteString(h, i.Description) diff --git a/rss.go b/rss.go index 1c347ca..e5e749f 100644 --- a/rss.go +++ b/rss.go @@ -162,7 +162,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { i.Guid = &guid } - i.PubDate = item.S(ns, "pubDate") + i.PubDate, _ = parseTime(item.S(ns, "pubDate")) tl = item.SelectNodes(ns, "category") for _, lv := range tl { diff --git a/timeparser.go b/timeparser.go new file mode 100644 index 0000000..5498052 --- /dev/null +++ b/timeparser.go @@ -0,0 +1,35 @@ +package feeder + +import ( + "strings" + "time" +) + +func parseTime(formatted string) (time.Time, error) { + var layouts = [...]string{ + "Mon, _2 Jan 2006 15:04:05 MST", + "Mon, _2 Jan 2006 15:04:05 -0700", + time.ANSIC, + time.UnixDate, + time.RubyDate, + time.RFC822, + time.RFC822Z, + time.RFC850, + time.RFC1123, + time.RFC1123Z, + time.RFC3339, + time.RFC3339Nano, + "Mon, 2, Jan 2006 15:4", + "02 Jan 2006 15:04:05 MST", + } + var t time.Time + var err error + formatted = strings.TrimSpace(formatted) + for _, layout := range layouts { + t, err = time.Parse(layout, formatted) + if !t.IsZero() { + break + } + } + return t, err +} diff --git a/timeparser_test.go b/timeparser_test.go new file mode 100644 index 0000000..fc95d22 --- /dev/null +++ b/timeparser_test.go @@ -0,0 +1,94 @@ +package feeder + +import ( + "time" + "testing" +) + +func Test_InvalidDate(t *testing.T) { + date, err := parseTime("invalid") + if !date.IsZero() { + t.Errorf("Invalid date should parse to zero") + } + if err == nil { + t.Errorf("error should not be nil") + } +} + +func Test_ParseLayout0(t *testing.T) { + date, err := parseTime("2014-03-07T05:38:00-05:00") + expected := time.Date(2014, time.March, 7, 5, 38, 0, 0, time.FixedZone("-0500", -18000)) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout1(t *testing.T) { + date, err := parseTime("Fri, 07 Mar 2014 17:42:51 GMT") + expected := time.Date(2014, time.March, 7, 17, 42, 51, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout2(t *testing.T) { + date, err := parseTime("2014-02-05T23:33:34Z") + expected := time.Date(2014, time.February, 5, 23, 33, 34, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout3(t *testing.T) { + date, err := parseTime("Mon, 03 Mar 2014 02:12:25 +0000") + expected := time.Date(2014, time.March, 3, 2, 12, 25, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout4(t *testing.T) { + date, err := parseTime("Fri, 21, Mar 2014 10:41") + expected := time.Date(2014, time.March, 21, 10, 41, 0, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout4_1(t *testing.T) { + date, err := parseTime("Fri, 17, Jan 2014 11:1") + expected := time.Date(2014, time.January, 17, 11, 1, 0, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout4_2(t *testing.T) { + date, err := parseTime("Thu, 9, Jan 2014 10:19") + expected := time.Date(2014, time.January, 9, 10, 19, 0, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func Test_ParseLayout5(t *testing.T) { + date, err := parseTime("22 Jul 2013 14:55:01 EST") + expected := time.Date(2013, time.July, 22, 14, 55, 1, 0, time.FixedZone("EST", -18000)) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } +} + +func assertEqualTime(t *testing.T, expected, actual time.Time) { + if !expected.Equal(actual) { + t.Errorf("expected %v but was %v", expected, actual) + } +} From c6a78164358f4d6fe84e89a338395887a45c454f Mon Sep 17 00:00:00 2001 From: Sean Schulte Date: Mon, 24 Mar 2014 21:58:36 -0500 Subject: [PATCH 2/3] Back to previous API. (IE, the previous breaking changes are unbroken.) --- atom.go | 2 +- item.go | 10 +++++++--- rss.go | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/atom.go b/atom.go index 37f568c..cab3935 100644 --- a/atom.go +++ b/atom.go @@ -56,7 +56,7 @@ func (this *Feed) readAtom(doc *xmlx.Document) (err error) { i = new(Item) i.Title = item.S(ns, "title") i.Id = item.S(ns, "id") - i.PubDate, _ = parseTime(item.S(ns, "updated")) + i.PubDate = item.S(ns, "updated") i.Description = item.S(ns, "summary") links := item.SelectNodes(ns, "link") diff --git a/item.go b/item.go index 5c002f7..f572a75 100644 --- a/item.go +++ b/item.go @@ -16,7 +16,7 @@ type Item struct { Comments string Enclosures []*Enclosure Guid *string - PubDate time.Time + PubDate string Source *Source // Atom specific fields @@ -28,14 +28,18 @@ type Item struct { Extensions map[string]map[string][]Extension } +func (i *Item) ParsedPubDate() (time.Time, error) { + return parseTime(i.PubDate) +} + func (i *Item) Key() string { switch { case i.Guid != nil && len(*i.Guid) != 0: return *i.Guid case len(i.Id) != 0: return i.Id - case len(i.Title) > 0 && !i.PubDate.IsZero(): - return i.Title + i.PubDate.String() + case len(i.Title) > 0 && len(i.PubDate) > 0: + return i.Title + i.PubDate default: h := md5.New() io.WriteString(h, i.Description) diff --git a/rss.go b/rss.go index e5e749f..1c347ca 100644 --- a/rss.go +++ b/rss.go @@ -162,7 +162,7 @@ func (this *Feed) readRss2(doc *xmlx.Document) (err error) { i.Guid = &guid } - i.PubDate, _ = parseTime(item.S(ns, "pubDate")) + i.PubDate = item.S(ns, "pubDate") tl = item.SelectNodes(ns, "category") for _, lv := range tl { From a93420eed32f3324de281b14984db3f73207d175 Mon Sep 17 00:00:00 2001 From: Sean Schulte Date: Mon, 24 Mar 2014 22:01:26 -0500 Subject: [PATCH 3/3] go fmt --- item.go | 4 +- timeparser.go | 56 ++++++++++----------- timeparser_test.go | 120 ++++++++++++++++++++++----------------------- 3 files changed, 90 insertions(+), 90 deletions(-) diff --git a/item.go b/item.go index f572a75..905d868 100644 --- a/item.go +++ b/item.go @@ -3,7 +3,7 @@ package feeder import ( "crypto/md5" "io" - "time" + "time" ) type Item struct { @@ -29,7 +29,7 @@ type Item struct { } func (i *Item) ParsedPubDate() (time.Time, error) { - return parseTime(i.PubDate) + return parseTime(i.PubDate) } func (i *Item) Key() string { diff --git a/timeparser.go b/timeparser.go index 5498052..5a106e9 100644 --- a/timeparser.go +++ b/timeparser.go @@ -1,35 +1,35 @@ package feeder import ( - "strings" - "time" + "strings" + "time" ) func parseTime(formatted string) (time.Time, error) { - var layouts = [...]string{ - "Mon, _2 Jan 2006 15:04:05 MST", - "Mon, _2 Jan 2006 15:04:05 -0700", - time.ANSIC, - time.UnixDate, - time.RubyDate, - time.RFC822, - time.RFC822Z, - time.RFC850, - time.RFC1123, - time.RFC1123Z, - time.RFC3339, - time.RFC3339Nano, - "Mon, 2, Jan 2006 15:4", - "02 Jan 2006 15:04:05 MST", - } - var t time.Time - var err error - formatted = strings.TrimSpace(formatted) - for _, layout := range layouts { - t, err = time.Parse(layout, formatted) - if !t.IsZero() { - break - } - } - return t, err + var layouts = [...]string{ + "Mon, _2 Jan 2006 15:04:05 MST", + "Mon, _2 Jan 2006 15:04:05 -0700", + time.ANSIC, + time.UnixDate, + time.RubyDate, + time.RFC822, + time.RFC822Z, + time.RFC850, + time.RFC1123, + time.RFC1123Z, + time.RFC3339, + time.RFC3339Nano, + "Mon, 2, Jan 2006 15:4", + "02 Jan 2006 15:04:05 MST", + } + var t time.Time + var err error + formatted = strings.TrimSpace(formatted) + for _, layout := range layouts { + t, err = time.Parse(layout, formatted) + if !t.IsZero() { + break + } + } + return t, err } diff --git a/timeparser_test.go b/timeparser_test.go index fc95d22..2a490e4 100644 --- a/timeparser_test.go +++ b/timeparser_test.go @@ -1,94 +1,94 @@ package feeder import ( - "time" - "testing" + "testing" + "time" ) func Test_InvalidDate(t *testing.T) { - date, err := parseTime("invalid") - if !date.IsZero() { - t.Errorf("Invalid date should parse to zero") - } - if err == nil { - t.Errorf("error should not be nil") - } + date, err := parseTime("invalid") + if !date.IsZero() { + t.Errorf("Invalid date should parse to zero") + } + if err == nil { + t.Errorf("error should not be nil") + } } func Test_ParseLayout0(t *testing.T) { - date, err := parseTime("2014-03-07T05:38:00-05:00") - expected := time.Date(2014, time.March, 7, 5, 38, 0, 0, time.FixedZone("-0500", -18000)) - assertEqualTime(t, expected, date) - if err != nil { - t.Errorf("err should be nil") - } + date, err := parseTime("2014-03-07T05:38:00-05:00") + expected := time.Date(2014, time.March, 7, 5, 38, 0, 0, time.FixedZone("-0500", -18000)) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } } func Test_ParseLayout1(t *testing.T) { - date, err := parseTime("Fri, 07 Mar 2014 17:42:51 GMT") - expected := time.Date(2014, time.March, 7, 17, 42, 51, 0, time.UTC) - assertEqualTime(t, expected, date) - if err != nil { - t.Errorf("err should be nil") - } + date, err := parseTime("Fri, 07 Mar 2014 17:42:51 GMT") + expected := time.Date(2014, time.March, 7, 17, 42, 51, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } } func Test_ParseLayout2(t *testing.T) { - date, err := parseTime("2014-02-05T23:33:34Z") - expected := time.Date(2014, time.February, 5, 23, 33, 34, 0, time.UTC) - assertEqualTime(t, expected, date) - if err != nil { - t.Errorf("err should be nil") - } + date, err := parseTime("2014-02-05T23:33:34Z") + expected := time.Date(2014, time.February, 5, 23, 33, 34, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } } func Test_ParseLayout3(t *testing.T) { - date, err := parseTime("Mon, 03 Mar 2014 02:12:25 +0000") - expected := time.Date(2014, time.March, 3, 2, 12, 25, 0, time.UTC) - assertEqualTime(t, expected, date) - if err != nil { - t.Errorf("err should be nil") - } + date, err := parseTime("Mon, 03 Mar 2014 02:12:25 +0000") + expected := time.Date(2014, time.March, 3, 2, 12, 25, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } } func Test_ParseLayout4(t *testing.T) { - date, err := parseTime("Fri, 21, Mar 2014 10:41") - expected := time.Date(2014, time.March, 21, 10, 41, 0, 0, time.UTC) - assertEqualTime(t, expected, date) - if err != nil { - t.Errorf("err should be nil") - } + date, err := parseTime("Fri, 21, Mar 2014 10:41") + expected := time.Date(2014, time.March, 21, 10, 41, 0, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } } func Test_ParseLayout4_1(t *testing.T) { - date, err := parseTime("Fri, 17, Jan 2014 11:1") - expected := time.Date(2014, time.January, 17, 11, 1, 0, 0, time.UTC) - assertEqualTime(t, expected, date) - if err != nil { - t.Errorf("err should be nil") - } + date, err := parseTime("Fri, 17, Jan 2014 11:1") + expected := time.Date(2014, time.January, 17, 11, 1, 0, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } } func Test_ParseLayout4_2(t *testing.T) { - date, err := parseTime("Thu, 9, Jan 2014 10:19") - expected := time.Date(2014, time.January, 9, 10, 19, 0, 0, time.UTC) - assertEqualTime(t, expected, date) - if err != nil { - t.Errorf("err should be nil") - } + date, err := parseTime("Thu, 9, Jan 2014 10:19") + expected := time.Date(2014, time.January, 9, 10, 19, 0, 0, time.UTC) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } } func Test_ParseLayout5(t *testing.T) { - date, err := parseTime("22 Jul 2013 14:55:01 EST") - expected := time.Date(2013, time.July, 22, 14, 55, 1, 0, time.FixedZone("EST", -18000)) - assertEqualTime(t, expected, date) - if err != nil { - t.Errorf("err should be nil") - } + date, err := parseTime("22 Jul 2013 14:55:01 EST") + expected := time.Date(2013, time.July, 22, 14, 55, 1, 0, time.FixedZone("EST", -18000)) + assertEqualTime(t, expected, date) + if err != nil { + t.Errorf("err should be nil") + } } func assertEqualTime(t *testing.T, expected, actual time.Time) { - if !expected.Equal(actual) { - t.Errorf("expected %v but was %v", expected, actual) - } + if !expected.Equal(actual) { + t.Errorf("expected %v but was %v", expected, actual) + } }