commit 52ad8e29a84d63ab8e21235d72fc73e1993cdabe Author: ptrcnull Date: Thu Jul 7 22:59:11 2022 +0200 feat: Initial commit diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..bc8c5fb --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module git.ddd.rip/ptrcnull/html + +go 1.18 + +require golang.org/x/net v0.0.0-20220706163947-c90051bbdb60 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..435c791 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +golang.org/x/net v0.0.0-20220706163947-c90051bbdb60 h1:8NSylCMxLW4JvserAndSgFL7aPli6A68yf0bYFTcWCM= +golang.org/x/net v0.0.0-20220706163947-c90051bbdb60/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= diff --git a/html.go b/html.go new file mode 100644 index 0000000..cdd8c56 --- /dev/null +++ b/html.go @@ -0,0 +1,154 @@ +package utils + +import ( + "bytes" + "golang.org/x/net/html" + "io" + "strings" +) + +type Node struct { + *html.Node +} + +func Parse(r io.Reader) (*Node, error) { + n, err := html.Parse(r) + if err != nil { + return nil, err + } + return &Node{n}, nil +} + +func (n *Node) QuerySelector(selector string) *Node { + if strings.HasPrefix(selector, "#") { + return n.GetElementById(selector[1:]) + } + if strings.HasPrefix(selector, ".") { + return n.GetElementByClass(selector[1:]) + } + return n.GetElementByTagName(selector) +} + +func (n *Node) GetElementById(id string) *Node { + return n.FindOne(func(n *Node) bool { + return n.HasAttr("id", id) + }) +} + +func (n *Node) GetElementByClass(class string) *Node { + return n.FindOne(func(n *Node) bool { + return n.HasClass(class) + }) +} + +func (n *Node) GetElementByTagName(name string) *Node { + return n.FindOne(func(n *Node) bool { + return n.Type == html.ElementNode && n.Data == name + }) +} + +func (n *Node) HasClass(class string) bool { + return n.HasAttr("class", class) +} + +func (n *Node) GetAttr(key string) []string { + var res []string + for _, attr := range n.Attr { + if attr.Key == key { + res = append(res, attr.Val) + } + } + return res +} + +func (n *Node) HasAttr(key, value string) bool { + for _, attr := range n.Attr { + if attr.Key == key && attr.Val == value { + return true + } + } + return false +} + +func (n *Node) ForEach(cb func(n *Node)) { + for c := n.FirstChild; c != nil; c = c.NextSibling { + cb(&Node{c}) + } +} + +func (n *Node) ChildNodes() []*Node { + var res []*Node + n.ForEach(func(n *Node) { + res = append(res, n) + }) + return res +} + +func (n *Node) Children() []*Node { + var res []*Node + n.ForEach(func(n *Node) { + if n.Type == html.ElementNode { + res = append(res, n) + } + }) + return res +} + +func (n *Node) Traverse(cb func(n *Node)) { + var f func(*Node) + f = func(n *Node) { + cb(n) + n.ForEach(f) + } + f(n) +} + +func (n *Node) FindOne(cb func(n *Node) bool) *Node { + var res *Node + + var f func(*Node) + f = func(n *Node) { + if res != nil { + return + } + if cb(n) { + res = n + return + } + n.ForEach(f) + } + f(n) + + return res +} + +func (n *Node) FindMany(cb func(n *Node) bool) []*Node { + var res []*Node + + var f func(*Node) + f = func(n *Node) { + if cb(n) { + res = append(res, n) + } + n.ForEach(f) + } + f(n) + + return res +} + +func (n *Node) Text() string { + res := "" + n.Traverse(func(n *Node) { + if n.Type == html.TextNode { + res += n.Data + } + }) + return res +} + +func (n *Node) Render() (string, error) { + w := bytes.NewBuffer([]byte{}) + err := html.Render(w, n.Node) + return w.String(), err +}