2009-11-23 04:16:27 +00:00
|
|
|
/*
|
2010-09-26 20:59:14 +00:00
|
|
|
Copyright (c) 2010, Jim Teeuwen.
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
This code is subject to a 1-clause BSD license.
|
|
|
|
The contents of which can be found in the LICENSE file.
|
|
|
|
|
2009-11-23 04:16:27 +00:00
|
|
|
|
|
|
|
This package wraps the standard XML library and uses it to build a node tree of
|
|
|
|
any document you load. This allows you to look up nodes forwards and backwards,
|
|
|
|
as well as perform search queries (no xpath support yet).
|
|
|
|
|
|
|
|
Nodes now simply become collections and don't require you to read them in the
|
|
|
|
order in which the xml.Parser finds them.
|
|
|
|
|
|
|
|
The Document currently implements 2 simple search functions which allow you to
|
|
|
|
look for specific nodes.
|
2010-05-06 03:36:48 +00:00
|
|
|
|
2009-11-23 04:16:27 +00:00
|
|
|
Document.SelectNode(namespace, name string) *Node;
|
|
|
|
Document.SelectNodes(namespace, name string) []*Node;
|
2010-05-06 03:36:48 +00:00
|
|
|
|
2009-11-23 04:16:27 +00:00
|
|
|
SelectNode() returns the first, single node it finds matching the given name
|
|
|
|
and namespace. SelectNodes() returns a slice containing all the matching nodes.
|
2010-05-06 03:36:48 +00:00
|
|
|
|
2009-11-23 04:16:27 +00:00
|
|
|
Note that these search functions can be invoked on individual nodes as well.
|
|
|
|
This allows you to search only a subset of the entire document.
|
|
|
|
|
|
|
|
*/
|
|
|
|
package xmlx
|
|
|
|
|
|
|
|
import "os"
|
|
|
|
import "io"
|
2010-08-22 03:07:38 +00:00
|
|
|
import "bytes"
|
2010-05-26 00:24:44 +00:00
|
|
|
import "io/ioutil"
|
|
|
|
import "path"
|
2009-11-23 04:16:27 +00:00
|
|
|
import "strings"
|
|
|
|
import "xml"
|
|
|
|
import "fmt"
|
2009-11-25 01:50:06 +00:00
|
|
|
import "http"
|
2009-11-23 04:16:27 +00:00
|
|
|
|
|
|
|
type Document struct {
|
2010-05-06 03:36:48 +00:00
|
|
|
Version string
|
|
|
|
Encoding string
|
|
|
|
StandAlone string
|
|
|
|
SaveDocType bool
|
|
|
|
Root *Node
|
|
|
|
Entity map[string]string
|
|
|
|
Verbose bool
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func New() *Document {
|
|
|
|
return &Document{
|
2010-05-06 03:36:48 +00:00
|
|
|
Version: "1.0",
|
|
|
|
Encoding: "utf-8",
|
|
|
|
StandAlone: "yes",
|
2009-11-23 04:16:27 +00:00
|
|
|
SaveDocType: true,
|
2010-05-06 03:36:48 +00:00
|
|
|
Entity: make(map[string]string),
|
|
|
|
Verbose: false,
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-11-23 17:28:44 +00:00
|
|
|
// This loads a rather massive table of non-conventional xml escape sequences.
|
|
|
|
// Needed to make the parser map them to characters properly. It is advised to
|
|
|
|
// set only those entities needed manually using the document.Entity map, but
|
|
|
|
// if need be, this method can be called to fill the map with the entire set
|
|
|
|
// defined on http://www.w3.org/TR/html4/sgml/entities.html
|
2010-08-22 03:07:38 +00:00
|
|
|
func (this *Document) LoadExtendedEntityMap() { loadNonStandardEntities(this.Entity) }
|
2009-11-23 17:28:44 +00:00
|
|
|
|
2009-11-23 04:16:27 +00:00
|
|
|
func (this *Document) String() string {
|
2010-05-06 03:36:48 +00:00
|
|
|
s, _ := this.SaveString()
|
|
|
|
return s
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (this *Document) SelectNode(namespace, name string) *Node {
|
2010-05-06 03:36:48 +00:00
|
|
|
return this.Root.SelectNode(namespace, name)
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (this *Document) SelectNodes(namespace, name string) []*Node {
|
2010-05-06 03:36:48 +00:00
|
|
|
return this.Root.SelectNodes(namespace, name)
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// *****************************************************************************
|
|
|
|
// *** Satisfy ILoader interface
|
|
|
|
// *****************************************************************************
|
|
|
|
func (this *Document) LoadString(s string) (err os.Error) {
|
2010-05-06 03:36:48 +00:00
|
|
|
xp := xml.NewParser(strings.NewReader(s))
|
|
|
|
xp.Entity = this.Entity
|
2009-11-23 16:50:29 +00:00
|
|
|
|
2010-05-06 03:36:48 +00:00
|
|
|
this.Root = NewNode(NT_ROOT)
|
|
|
|
ct := this.Root
|
2009-11-23 04:16:27 +00:00
|
|
|
|
2010-05-26 00:24:44 +00:00
|
|
|
var tok xml.Token
|
2010-08-22 03:07:38 +00:00
|
|
|
var t *Node
|
|
|
|
var i int
|
|
|
|
var doctype string
|
|
|
|
var v xml.Attr
|
|
|
|
|
2009-11-23 04:16:27 +00:00
|
|
|
for {
|
2010-05-26 00:24:44 +00:00
|
|
|
if tok, err = xp.Token(); err != nil {
|
|
|
|
if err == os.EOF {
|
|
|
|
return nil
|
2009-11-24 13:37:17 +00:00
|
|
|
}
|
2009-11-23 04:16:27 +00:00
|
|
|
|
2010-05-26 00:24:44 +00:00
|
|
|
if this.Verbose {
|
|
|
|
fmt.Fprintf(os.Stderr, "Xml Error: %s\n", err)
|
|
|
|
}
|
|
|
|
return err
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
2010-05-26 00:24:44 +00:00
|
|
|
switch tt := tok.(type) {
|
|
|
|
case xml.SyntaxError:
|
|
|
|
return os.NewError(tt.String())
|
|
|
|
case xml.CharData:
|
2010-08-22 03:07:38 +00:00
|
|
|
ct.Value = strings.TrimSpace(string([]byte(tt)))
|
2010-05-26 00:24:44 +00:00
|
|
|
case xml.Comment:
|
2010-05-06 03:36:48 +00:00
|
|
|
t := NewNode(NT_COMMENT)
|
2010-08-22 03:07:38 +00:00
|
|
|
t.Value = strings.TrimSpace(string([]byte(tt)))
|
2010-05-06 03:36:48 +00:00
|
|
|
ct.AddChild(t)
|
2010-05-26 00:24:44 +00:00
|
|
|
case xml.Directive:
|
2010-08-22 03:07:38 +00:00
|
|
|
t = NewNode(NT_DIRECTIVE)
|
|
|
|
t.Value = strings.TrimSpace(string([]byte(tt)))
|
2010-05-06 03:36:48 +00:00
|
|
|
ct.AddChild(t)
|
2010-05-26 00:24:44 +00:00
|
|
|
case xml.StartElement:
|
2010-08-22 03:07:38 +00:00
|
|
|
t = NewNode(NT_ELEMENT)
|
2010-05-26 00:24:44 +00:00
|
|
|
t.Name = tt.Name
|
2010-08-22 03:07:38 +00:00
|
|
|
t.Attributes = make([]*Attr, len(tt.Attr))
|
|
|
|
for i, v = range tt.Attr {
|
|
|
|
t.Attributes[i] = new(Attr)
|
2010-05-06 03:36:48 +00:00
|
|
|
t.Attributes[i].Name = v.Name
|
|
|
|
t.Attributes[i].Value = v.Value
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
2010-05-06 03:36:48 +00:00
|
|
|
ct.AddChild(t)
|
|
|
|
ct = t
|
2010-05-26 00:24:44 +00:00
|
|
|
case xml.ProcInst:
|
|
|
|
if tt.Target == "xml" { // xml doctype
|
2010-08-22 03:07:38 +00:00
|
|
|
doctype = strings.TrimSpace(string(tt.Inst))
|
|
|
|
if i = strings.Index(doctype, `standalone="`); i > -1 {
|
|
|
|
this.StandAlone = doctype[i+len(`standalone="`) : len(doctype)]
|
|
|
|
i = strings.Index(this.StandAlone, `"`)
|
|
|
|
this.StandAlone = this.StandAlone[0:i]
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
2010-05-26 00:24:44 +00:00
|
|
|
} else {
|
2010-08-22 03:07:38 +00:00
|
|
|
t = NewNode(NT_PROCINST)
|
2010-05-26 00:24:44 +00:00
|
|
|
t.Target = strings.TrimSpace(tt.Target)
|
|
|
|
t.Value = strings.TrimSpace(string(tt.Inst))
|
2010-05-06 03:36:48 +00:00
|
|
|
ct.AddChild(t)
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
2010-05-26 00:24:44 +00:00
|
|
|
case xml.EndElement:
|
|
|
|
if ct = ct.Parent; ct == nil {
|
|
|
|
return
|
|
|
|
}
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-05-06 03:36:48 +00:00
|
|
|
return
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
2010-05-26 00:24:44 +00:00
|
|
|
func (this *Document) LoadFile(filename string) (err os.Error) {
|
|
|
|
var data []byte
|
2009-11-23 04:16:27 +00:00
|
|
|
|
2010-05-26 00:24:44 +00:00
|
|
|
if data, err = ioutil.ReadFile(path.Clean(filename)); err != nil {
|
|
|
|
return
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
2010-05-26 00:24:44 +00:00
|
|
|
return this.LoadString(string(data))
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
2009-11-25 01:50:06 +00:00
|
|
|
func (this *Document) LoadUri(uri string) (err os.Error) {
|
2010-05-06 03:36:48 +00:00
|
|
|
r, _, err := http.Get(uri)
|
2009-11-25 01:50:06 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2010-05-06 03:36:48 +00:00
|
|
|
defer r.Body.Close()
|
2009-11-25 01:50:06 +00:00
|
|
|
|
2010-05-26 00:24:44 +00:00
|
|
|
var b []byte
|
|
|
|
if b, err = ioutil.ReadAll(r.Body); err != nil {
|
|
|
|
return
|
2009-11-25 01:50:06 +00:00
|
|
|
}
|
|
|
|
|
2010-05-26 00:24:44 +00:00
|
|
|
err = this.LoadString(string(b))
|
2010-05-06 03:36:48 +00:00
|
|
|
return
|
2009-11-25 01:50:06 +00:00
|
|
|
}
|
|
|
|
|
2010-10-10 18:04:58 +00:00
|
|
|
func (this *Document) LoadStream(r io.Reader) (err os.Error) {
|
2010-11-05 00:26:35 +00:00
|
|
|
var buf bytes.Buffer
|
2010-08-22 03:07:38 +00:00
|
|
|
s := make([]byte, 1024)
|
|
|
|
|
2009-11-23 04:16:27 +00:00
|
|
|
for {
|
2010-11-05 00:26:35 +00:00
|
|
|
if _, err = r.Read(s); err != nil {
|
2009-11-23 04:16:27 +00:00
|
|
|
break
|
|
|
|
}
|
2010-11-05 00:26:35 +00:00
|
|
|
buf.Write(s)
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
2010-11-05 00:26:35 +00:00
|
|
|
err = this.LoadString(buf.String())
|
2010-05-06 03:36:48 +00:00
|
|
|
return
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// *****************************************************************************
|
|
|
|
// *** Satisfy ISaver interface
|
|
|
|
// *****************************************************************************
|
|
|
|
func (this *Document) SaveFile(path string) (err os.Error) {
|
2010-08-22 03:07:38 +00:00
|
|
|
var data string
|
|
|
|
if data, err = this.SaveString(); err != nil {
|
2009-11-23 04:16:27 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2010-08-22 03:07:38 +00:00
|
|
|
return ioutil.WriteFile(path, []byte(data), 0600)
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (this *Document) SaveString() (s string, err os.Error) {
|
|
|
|
if this.SaveDocType {
|
|
|
|
s = fmt.Sprintf(`<?xml version="%s" encoding="%s" standalone="%s"?>`,
|
|
|
|
this.Version, this.Encoding, this.StandAlone)
|
|
|
|
}
|
|
|
|
|
2010-05-06 03:36:48 +00:00
|
|
|
s += this.Root.String()
|
|
|
|
return
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|
|
|
|
|
2010-10-10 18:04:58 +00:00
|
|
|
func (this *Document) SaveStream(w io.Writer) (err os.Error) {
|
2010-05-06 03:36:48 +00:00
|
|
|
s, err := this.SaveString()
|
2009-11-23 04:16:27 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2010-05-06 03:36:48 +00:00
|
|
|
w.Write([]byte(s))
|
|
|
|
return
|
2009-11-23 04:16:27 +00:00
|
|
|
}
|