From 0a655c27565df840c3e9368b46c55c514b781f66 Mon Sep 17 00:00:00 2001 From: jim teeuwen Date: Mon, 23 Nov 2009 05:16:27 +0100 Subject: [PATCH] new file: README new file: src/Makefile new file: src/document.go new file: src/io.go new file: src/node.go new file: src/test.xml new file: src/xmlx_test.go --- README | 38 +++++++ src/Makefile | 8 ++ src/document.go | 271 +++++++++++++++++++++++++++++++++++++++++++++++ src/io.go | 21 ++++ src/node.go | 181 +++++++++++++++++++++++++++++++ src/test.xml | 51 +++++++++ src/xmlx_test.go | 57 ++++++++++ 7 files changed, 627 insertions(+) create mode 100644 README create mode 100644 src/Makefile create mode 100644 src/document.go create mode 100644 src/io.go create mode 100644 src/node.go create mode 100644 src/test.xml create mode 100644 src/xmlx_test.go diff --git a/README b/README new file mode 100644 index 0000000..13ac910 --- /dev/null +++ b/README @@ -0,0 +1,38 @@ + + Author: Jim Teeuwen + + This package wraps the standard XML library and uses it to build a node tree of + any document you load. This allows you to look up nodes forwards and backwards, + as well as perform search queries (no xpath support yet). + + Nodes now simply become collections and don't require you to read them in the + order in which the xml.Parser finds them. + + xmlx.Document implements both these interfaces: + + type ILoader interface { + LoadFile(string) os.Error; + LoadString(string) os.Error; + LoadStream(*io.Reader) os.Error; + } + + type ISaver interface { + SaveFile(string) os.Error; + SaveString(string) (string, os.Error); + SaveStream(*io.Writer) os.Error; + } + + This allows you to load/save xml data to and from pretty much any source. + + The Document currently implements 2 simple search functions which allow you to + look for specific nodes. + + Document.SelectNode(namespace, name string) *Node; + Document.SelectNodes(namespace, name string) []*Node; + + SelectNode() returns the first, single node it finds matching the given name + and namespace. SelectNodes() returns a slice containing all the matching nodes. + + Note that these search functions can be invoked on individual nodes as well. + This allows you to search only a subset of the entire document. + diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..277d60d --- /dev/null +++ b/src/Makefile @@ -0,0 +1,8 @@ + +include $(GOROOT)/src/Make.$(GOARCH) + +TARG=xmlx +GOFILES=document.go node.go io.go\ + + +include $(GOROOT)/src/Make.pkg diff --git a/src/document.go b/src/document.go new file mode 100644 index 0000000..beb0101 --- /dev/null +++ b/src/document.go @@ -0,0 +1,271 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + Author: Jim Teeuwen + + This package wraps the standard XML library and uses it to build a node tree of + any document you load. This allows you to look up nodes forwards and backwards, + as well as perform search queries (no xpath support yet). + + Nodes now simply become collections and don't require you to read them in the + order in which the xml.Parser finds them. + + xmlx.Document implements both these interfaces: + + type ILoader interface { + LoadFile(string) os.Error; + LoadString(string) os.Error; + LoadStream(*io.Reader) os.Error; + } + + type ISaver interface { + SaveFile(string) os.Error; + SaveString(string) (string, os.Error); + SaveStream(*io.Writer) os.Error; + } + + This allows you to load/save xml data to and from pretty much any source. + + The Document currently implements 2 simple search functions which allow you to + look for specific nodes. + + Document.SelectNode(namespace, name string) *Node; + Document.SelectNodes(namespace, name string) []*Node; + + SelectNode() returns the first, single node it finds matching the given name + and namespace. SelectNodes() returns a slice containing all the matching nodes. + + Note that these search functions can be invoked on individual nodes as well. + This allows you to search only a subset of the entire document. + +*/ +package xmlx + +import "os" +import "io" +import "strings" +import "xml" +import "fmt" + +type Document struct { + Version string; + Encoding string; + StandAlone string; + SaveDocType bool; + Root *Node; +} + +func New() *Document { + return &Document{ + Version: "1.0", + Encoding: "utf-8", + StandAlone: "yes", + SaveDocType: true, + } +} + +func (this *Document) String() string { + s, _ := this.SaveString(); + return s; +} + + +func (this *Document) SelectNode(namespace, name string) *Node { + return this.Root.SelectNode(namespace, name); +} + +func (this *Document) SelectNodes(namespace, name string) []*Node { + return this.Root.SelectNodes(namespace, name); +} + +// ***************************************************************************** +// *** Satisfy ILoader interface +// ***************************************************************************** +func (this *Document) LoadString(s string) (err os.Error) { + xp := xml.NewParser(strings.NewReader(s)); + this.Root = NewNode(NT_ROOT); + ct := this.Root; + + for { + tok, err := xp.Token(); + if err != nil { + return + } + + t1, ok := tok.(xml.SyntaxError); + if ok { + err = os.NewError(t1.String()); + return; + } + + t2, ok := tok.(xml.CharData); + if ok { + if ct != nil { + ct.Value = strings.TrimSpace(string(t2)) + } + continue + } + + t3, ok := tok.(xml.Comment); + if ok { + t := NewNode(NT_COMMENT); + t.Value = strings.TrimSpace(string(t3)); + if ct != nil { + ct.AddChild(t) + } + continue + } + + t4, ok := tok.(xml.Directive); + if ok { + t := NewNode(NT_DIRECTIVE); + t.Value = strings.TrimSpace(string(t4)); + if ct != nil { + ct.AddChild(t) + } + continue + } + + t5, ok := tok.(xml.StartElement); + if ok { + t := NewNode(NT_ELEMENT); + t.Name = t5.Name; + t.Attributes = make([]Attr, len(t5.Attr)); + for i, v := range t5.Attr { + t.Attributes[i].Name = v.Name; + t.Attributes[i].Value = v.Value; + } + if ct != nil { + ct.AddChild(t) + } + ct = t; + continue + } + + t6, ok := tok.(xml.ProcInst); + if ok { + if t6.Target == "xml" { // xml doctype + doctype := strings.TrimSpace(string(t6.Inst)); + + /* // Not needed. There is only xml version 1.0 + pos := strings.Index(doctype, `version="`); + if pos > -1 { + this.Version = doctype[pos+len(`version="`) : len(doctype)]; + pos = strings.Index(this.Version, `"`); + this.Version = this.Version[0:pos]; + } + */ + + /* // Not needed. Any string we handle in Go is UTF8 + // encoded. This means we will save UTF8 data as well. + pos = strings.Index(doctype, `encoding="`); + if pos > -1 { + this.Encoding = doctype[pos+len(`encoding="`) : len(doctype)]; + pos = strings.Index(this.Encoding, `"`); + this.Encoding = this.Encoding[0:pos]; + } + */ + + pos := strings.Index(doctype, `standalone="`); + if pos > -1 { + this.StandAlone = doctype[pos+len(`standalone="`) : len(doctype)]; + pos = strings.Index(this.StandAlone, `"`); + this.StandAlone = this.StandAlone[0:pos]; + } + } else { + t := NewNode(NT_PROCINST); + t.Target = strings.TrimSpace(t6.Target); + t.Value = strings.TrimSpace(string(t6.Inst)); + if ct != nil { + ct.AddChild(t) + } + } + continue + } + + _, ok = tok.(xml.EndElement); + if ok { + ct = ct.Parent; + continue + } + } + + return; +} + +func (this *Document) LoadFile(path string) (err os.Error) { + file, err := os.Open(path, os.O_RDONLY, 0600); + if err != nil { + return + } + defer file.Close(); + + content := ""; + buff := make([]byte, 256); + for { + _, err := file.Read(buff); + if err != nil { + break + } + content += string(buff); + } + + err = this.LoadString(content); + return; +} + +func (this *Document) LoadStream(r *io.Reader) (err os.Error) { + content := ""; + buff := make([]byte, 256); + for { + _, err := r.Read(buff); + if err != nil { + break + } + content += string(buff); + } + + err = this.LoadString(content); + return; +} + +// ***************************************************************************** +// *** Satisfy ISaver interface +// ***************************************************************************** +func (this *Document) SaveFile(path string) (err os.Error) { + file, err := os.Open(path, os.O_WRONLY | os.O_CREAT, 0600); + if err != nil { + return + } + defer file.Close(); + + content, err := this.SaveString(); + if err != nil { + return + } + + file.Write(strings.Bytes(content)); + return +} + +func (this *Document) SaveString() (s string, err os.Error) { + if this.SaveDocType { + s = fmt.Sprintf(``, + this.Version, this.Encoding, this.StandAlone) + } + + s += this.Root.String(); + return; +} + +func (this *Document) SaveStream(w *io.Writer) (err os.Error) { + s, err := this.SaveString(); + if err != nil { + return + } + w.Write(strings.Bytes(s)); + return; +} + diff --git a/src/io.go b/src/io.go new file mode 100644 index 0000000..37ecb80 --- /dev/null +++ b/src/io.go @@ -0,0 +1,21 @@ +package xmlx + +import "os" +import "io" + +type ILoader interface { + LoadFile(string) os.Error; + LoadString(string) os.Error; + LoadStream(*io.Reader) os.Error; +} + +type ISaver interface { + SaveFile(string) os.Error; + SaveString(string) (string, os.Error); + SaveStream(*io.Writer) os.Error; +} + +type ILoaderSaver interface { + ILoader; + ISaver; +} diff --git a/src/node.go b/src/node.go new file mode 100644 index 0000000..d513163 --- /dev/null +++ b/src/node.go @@ -0,0 +1,181 @@ +package xmlx + +import "xml" +import "fmt" + +const ( + NT_ROOT = 0x00; + NT_DIRECTIVE = 0x01; + NT_PROCINST = 0x02; + NT_COMMENT = 0x03; + NT_ELEMENT = 0x04; +) + +type Attr struct { + Name xml.Name; + Value string; +} + +type Node struct { + Type byte; + Name xml.Name; + Children []*Node; + Attributes []Attr; + Parent *Node; + Value string; + + // procinst field + Target string; +} + +func NewNode(tid byte) *Node { return &Node{Type: tid} } + +func (this *Node) SelectNode(namespace, name string) *Node { + return rec_SelectNode(this, namespace, name); +} + +func rec_SelectNode(cn *Node, namespace, name string) *Node { + if cn.Name.Space == namespace && cn.Name.Local == name { + return cn; + } + + for _, v := range cn.Children { + tn := rec_SelectNode(v, namespace, name); + if tn != nil { return tn } + } + return nil; +} + +func (this *Node) SelectNodes(namespace, name string) []*Node { + list := make([]*Node, 0); + rec_SelectNodes(this, namespace, name, &list); + return list; +} + +func rec_SelectNodes(cn *Node, namespace, name string, list *[]*Node) { + if cn.Name.Space == namespace && cn.Name.Local == name { + slice := make([]*Node, len(*list) + 1); + for i,v := range *list { + slice[i] = v; + } + slice[len(slice) - 1] = cn; + *list = slice; + return + } + + for _, v := range cn.Children { + rec_SelectNodes(v, namespace, name, list); + } +} + +func (this *Node) String() (s string) { + switch this.Type { + case NT_PROCINST: + s = this.printProcInst() + case NT_COMMENT: + s = this.printComment() + case NT_DIRECTIVE: + s = this.printDirective() + case NT_ELEMENT: + s = this.printElement() + case NT_ROOT: + s = this.printRoot() + } + return; +} + +func (this *Node) printRoot() (s string) { + for _, v := range this.Children { + s += v.String() + } + return; +} + +func (this *Node) printProcInst() (s string) { + s = ""; + return; +} + +func (this *Node) printComment() (s string) { + s = ""; + return; +} + +func (this *Node) printDirective() (s string) { + s = ""; + return; +} + +func (this *Node) printElement() (s string) { + if len(this.Name.Space) > 0 { + s = "<" + this.Name.Space + ":" + this.Name.Local + } else { + s = "<" + this.Name.Local + } + + for _, v := range this.Attributes { + if len(v.Name.Space) > 0 { + s += fmt.Sprintf(` %s:%s="%s"`, v.Name.Space, v.Name.Local, v.Value) + } else { + s += fmt.Sprintf(` %s="%s"`, v.Name.Local, v.Value) + } + } + + if len(this.Children) == 0 && len(this.Value) == 0 { + s += " />"; + return; + } + + s += ">"; + + for _, v := range this.Children { + s += v.String() + } + + s += this.Value; + if len(this.Name.Space) > 0 { + s += "" + } else { + s += "" + } + return; +} + +func (this *Node) AddChild(t *Node) { + if t.Parent != nil { + t.Parent.RemoveChild(t) + } + t.Parent = this; + + slice := make([]*Node, len(this.Children)+1); + for i, v := range this.Children { + slice[i] = v + } + slice[len(slice)-1] = t; + this.Children = slice; +} + +func (this *Node) RemoveChild(t *Node) { + pos := -1; + for i, v := range this.Children { + if v == t { + pos = i; + break; + } + } + + if pos == -1 { + return + } + slice := make([]*Node, len(this.Children)-1); + + idx := 0; + for i, v := range this.Children { + if i != pos { + slice[idx] = v; + idx++; + } + } + + t.Parent = nil; +} diff --git a/src/test.xml b/src/test.xml new file mode 100644 index 0000000..01ef1ee --- /dev/null +++ b/src/test.xml @@ -0,0 +1,51 @@ + + + + WriteTheWeb + http://writetheweb.com + News for web users that write back + en-us + Copyright 2000, WriteTheWeb team. + editor@writetheweb.com + webmaster@writetheweb.com + + WriteTheWeb + http://writetheweb.com/images/mynetscape88.gif + http://writetheweb.com + 88 + 31 + News for web users that write back + + + Giving the world a pluggable Gnutella + http://writetheweb.com/read.php?item=24 + WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing. + + + + Syndication discussions hot up + http://writetheweb.com/read.php?item=23 + After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication. + + + Personal web server integrates file sharing and messaging + http://writetheweb.com/read.php?item=22 + The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices. + + + Syndication and Metadata + http://writetheweb.com/read.php?item=21 + RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF. + + + UK bloggers get organised + http://writetheweb.com/read.php?item=20 + Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups. + + + Yournamehere.com more important than anything + http://writetheweb.com/read.php?item=19 + Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman. + + + diff --git a/src/xmlx_test.go b/src/xmlx_test.go new file mode 100644 index 0000000..0c09a71 --- /dev/null +++ b/src/xmlx_test.go @@ -0,0 +1,57 @@ +package xmlx + +import "testing" + +func TestDoc(t *testing.T) { + doc := New(); + err := doc.LoadFile("test.xml"); + + if err != nil { + t.Errorf("%s", err); + return; + } + + if len(doc.Root.Children) == 0 { + t.Errorf("Root node has no children.", err); + return; + } +} + +func TestSave(t *testing.T) { + doc := New(); + err := doc.LoadFile("test.xml"); + + if err != nil { + t.Errorf("LoadFile(): %s", err); + return; + } + + err = doc.SaveFile("test1.xml"); + if err != nil { + t.Errorf("SaveFile(): %s", err); + return; + } +} + +func TestNodeSearch(t *testing.T) { + doc := New(); + err := doc.LoadFile("test.xml"); + + if err != nil { + t.Errorf("LoadFile(): %s", err); + return; + } + + node := doc.SelectNode("", "item"); + if node == nil { + t.Errorf("SelectNode(): No node found."); + return; + } + + nodes := doc.SelectNodes("", "item"); + if len(nodes) == 0 { + t.Errorf("SelectNodes(): no nodes found."); + return; + } +} +