Simple HTML parsing utilities for Go
Go to file
ptrcnull 3a413c5335
docs: Add LICENSE and README.md
2023-03-27 00:20:40 +02:00
LICENSE docs: Add LICENSE and README.md 2023-03-27 00:20:40 +02:00
README.md docs: Add LICENSE and README.md 2023-03-27 00:20:40 +02:00
go.mod feat: Initial commit 2022-07-07 22:59:11 +02:00
go.sum feat: Initial commit 2022-07-07 22:59:11 +02:00
html.go feat: Add Node#TrimmedText 2022-07-08 00:32:27 +02:00

README.md

ptrcnull/html

library wrapping net/html with a bunch of convenience functions

example usage:

package main

import (
  "log"
  "os"

  "git.ddd.rip/ptrcnull/html"
)

func main() {
  htmlText := getSomeHtmlPage()
  document, err := html.ParseDocument(htmlText)
  if err != nil {
    log.Fatalf("parse document: %w\n", err)
  }

  table := document.QuerySelector("table")
  if table == nil {
    log.Fatalln("table not found")
  }

  if os.Getenv("DEBUG") == "1" {
    log.Println("table:", table.Render())
  }

  for _, row := range table.QuerySelectorAll("tr") {
    if strings.Contains(row.Text(), "a thing you're looking for") {
      cell := row.FindOne(func(n *html.Node) {
        return n.HasAttr("aria-label") && n.GetAttr("aria-label")[0] == "Value"
      })

      log.Println("value:", cell.TrimmedText())
    }
  }
}