Fix issue 1: Added dependency to go-iconv. this lib is needed to ensure we pass valud UTF-8 encoded data to the XML tokenizer.
This commit is contained in:
parent
bacbff0e71
commit
02d19ed0bd
2
LICENSE
2
LICENSE
|
@ -1,5 +1,5 @@
|
|||
|
||||
Copyright (c) 2010, Jim Teeuwen.
|
||||
Copyright (c) 2010-2011, Jim Teeuwen.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
|
6
README
6
README
|
@ -8,6 +8,12 @@
|
|||
Nodes now simply become collections and don't require you to read them in the
|
||||
order in which the xml.Parser finds them.
|
||||
|
||||
================================================================================
|
||||
DEPENDENCIES
|
||||
================================================================================
|
||||
|
||||
go-iconv: https://github.com/sloonz/go-iconv
|
||||
|
||||
================================================================================
|
||||
USAGE
|
||||
================================================================================
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
include $(GOROOT)/src/Make.inc
|
||||
|
||||
TARG = xmlx
|
||||
GOFILES = document.go node.go io.go entitymap.go\
|
||||
GOFILES = document.go node.go entitymap.go
|
||||
|
||||
include $(GOROOT)/src/Make.pkg
|
||||
|
|
|
@ -1,41 +1,40 @@
|
|||
// Copyright (c) 2010, Jim Teeuwen. All rights reserved.
|
||||
// This code is subject to a 1-clause BSD license.
|
||||
// The contents of which can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Copyright (c) 2010, Jim Teeuwen.
|
||||
All rights reserved.
|
||||
|
||||
This code is subject to a 1-clause BSD license.
|
||||
The contents of which can be found in the LICENSE file.
|
||||
|
||||
|
||||
This package wraps the standard XML library and uses it to build a node tree of
|
||||
any document you load. This allows you to look up nodes forwards and backwards,
|
||||
as well as perform search queries (no xpath support yet).
|
||||
as well as perform simple search queries.
|
||||
|
||||
Nodes now simply become collections and don't require you to read them in the
|
||||
order in which the xml.Parser finds them.
|
||||
|
||||
The Document currently implements 2 simple search functions which allow you to
|
||||
The Document currently implements 2 search functions which allow you to
|
||||
look for specific nodes.
|
||||
|
||||
Document.SelectNode(namespace, name string) *Node;
|
||||
Document.SelectNodes(namespace, name string) []*Node;
|
||||
*xmlx.Document.SelectNode(namespace, name string) *Node;
|
||||
*xmlx.Document.SelectNodes(namespace, name string) []*Node;
|
||||
|
||||
SelectNode() returns the first, single node it finds matching the given name
|
||||
and namespace. SelectNodes() returns a slice containing all the matching nodes.
|
||||
|
||||
Note that these search functions can be invoked on individual nodes as well.
|
||||
This allows you to search only a subset of the entire document.
|
||||
|
||||
*/
|
||||
package xmlx
|
||||
|
||||
import "os"
|
||||
import "io"
|
||||
import "io/ioutil"
|
||||
import "path"
|
||||
import "strings"
|
||||
import "xml"
|
||||
import "fmt"
|
||||
import "http"
|
||||
import (
|
||||
"os"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"path"
|
||||
"strings"
|
||||
"xml"
|
||||
"fmt"
|
||||
"http"
|
||||
"iconv"
|
||||
)
|
||||
|
||||
type Document struct {
|
||||
Version string
|
||||
|
@ -82,6 +81,12 @@ func (this *Document) SelectNodes(namespace, name string) []*Node {
|
|||
// *** Satisfy ILoader interface
|
||||
// *****************************************************************************
|
||||
func (this *Document) LoadString(s string) (err os.Error) {
|
||||
// Ensure we are passing UTF-8 encoding content to the XML tokenizer.
|
||||
if s, err = this.correctEncoding(s); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// tokenize data
|
||||
xp := xml.NewParser(strings.NewReader(s))
|
||||
xp.Entity = this.Entity
|
||||
|
||||
|
@ -218,3 +223,48 @@ func (this *Document) SaveStream(w io.Writer) (err os.Error) {
|
|||
_, err = w.Write([]byte(s))
|
||||
return
|
||||
}
|
||||
|
||||
// Use libiconv to ensure we get UTF-8 encoded data. The Go Xml tokenizer will
|
||||
// throw a tantrum if we give it anything else.
|
||||
func (this *Document) correctEncoding(data string) (ret string, err os.Error) {
|
||||
var cd *iconv.Iconv
|
||||
var tok xml.Token
|
||||
|
||||
enc := "utf-8"
|
||||
xp := xml.NewParser(strings.NewReader(data))
|
||||
xp.Entity = this.Entity
|
||||
|
||||
loop:
|
||||
for {
|
||||
if tok, err = xp.Token(); err != nil {
|
||||
if err == os.EOF {
|
||||
break loop
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
|
||||
switch tt := tok.(type) {
|
||||
case xml.ProcInst:
|
||||
if tt.Target == "xml" { // xml doctype
|
||||
enc = strings.ToLower(string(tt.Inst))
|
||||
if i := strings.Index(enc, `encoding="`); i > -1 {
|
||||
enc = enc[i+len(`encoding="`):]
|
||||
i = strings.Index(enc, `"`)
|
||||
enc = enc[:i]
|
||||
break loop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if enc == "utf-8" {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
if cd, err = iconv.Open("utf-8", enc); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
defer cd.Close()
|
||||
return cd.Conv(data)
|
||||
}
|
||||
|
|
|
@ -1,11 +1,6 @@
|
|||
/*
|
||||
Copyright (c) 2010, Jim Teeuwen.
|
||||
All rights reserved.
|
||||
|
||||
This code is subject to a 1-clause BSD license.
|
||||
The contents of which can be found in the LICENSE file.
|
||||
*/
|
||||
|
||||
// Copyright (c) 2010, Jim Teeuwen. All rights reserved.
|
||||
// This code is subject to a 1-clause BSD license.
|
||||
// The contents of which can be found in the LICENSE file.
|
||||
package xmlx
|
||||
|
||||
/*
|
||||
|
@ -20,11 +15,13 @@ package xmlx
|
|||
"â" (â) is not the same as "Â" (Â).
|
||||
*/
|
||||
|
||||
import "os"
|
||||
import "fmt"
|
||||
import "utf8"
|
||||
import "regexp"
|
||||
import "strconv"
|
||||
import (
|
||||
"os"
|
||||
"fmt"
|
||||
"utf8"
|
||||
"regexp"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
var reg_entnumeric = regexp.MustCompile("^&#[0-9]+;$")
|
||||
var reg_entnamed = regexp.MustCompile("^&[a-zA-Z]+;$")
|
||||
|
|
30
xmlx/io.go
30
xmlx/io.go
|
@ -1,30 +0,0 @@
|
|||
/*
|
||||
Copyright (c) 2010, Jim Teeuwen.
|
||||
All rights reserved.
|
||||
|
||||
This code is subject to a 1-clause BSD license.
|
||||
The contents of which can be found in the LICENSE file.
|
||||
*/
|
||||
|
||||
package xmlx
|
||||
|
||||
import "os"
|
||||
import "io"
|
||||
|
||||
type ILoader interface {
|
||||
LoadUrl(string) os.Error
|
||||
LoadFile(string) os.Error
|
||||
LoadString(string) os.Error
|
||||
LoadStream(*io.Reader) os.Error
|
||||
}
|
||||
|
||||
type ISaver interface {
|
||||
SaveFile(string) os.Error
|
||||
SaveString(string) (string, os.Error)
|
||||
SaveStream(*io.Writer) os.Error
|
||||
}
|
||||
|
||||
type ILoaderSaver interface {
|
||||
ILoader
|
||||
ISaver
|
||||
}
|
14
xmlx/node.go
14
xmlx/node.go
|
@ -8,12 +8,14 @@ The contents of which can be found in the LICENSE file.
|
|||
|
||||
package xmlx
|
||||
|
||||
import "os"
|
||||
import "strings"
|
||||
import "xml"
|
||||
import "bytes"
|
||||
import "fmt"
|
||||
import "strconv"
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"xml"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
NT_ROOT = iota
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
<!DOCTYPE xml>
|
||||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="0.91">
|
||||
<channel>
|
||||
<title>WriteTheWeb</title>
|
||||
|
|
|
@ -14,7 +14,7 @@ func TestLoadLocal(t *testing.T) {
|
|||
doc := New()
|
||||
|
||||
if err := doc.LoadFile("test.xml"); err != nil {
|
||||
t.Errorf("%s", err)
|
||||
t.Error(err.String())
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -24,11 +24,11 @@ func TestLoadLocal(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func _TestLoadRemote(t *testing.T) {
|
||||
func TestLoadRemote(t *testing.T) {
|
||||
doc := New()
|
||||
|
||||
if err := doc.LoadUri("http://www.w3schools.com/xml/plant_catalog.xml"); err != nil {
|
||||
t.Errorf("%s", err)
|
||||
if err := doc.LoadUri("http://rss.cnn.com/rss/cnn_latest.rss"); err != nil {
|
||||
t.Error(err.String())
|
||||
return
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue