# HG changeset patch # User Henry Precheur # Date 1471413960 25200 # Tue Aug 16 23:06:00 2016 -0700 # Node ID a3fd89177bab611a8bd3611189ed649a3d8c0a4f # Parent 0000000000000000000000000000000000000000 wip diff --git a/main.go b/main.go new file mode 100644 --- /dev/null +++ b/main.go @@ -0,0 +1,51 @@ +package main + +import ( + "net/http" + _ "io" + "fmt" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" + "github.com/yhat/scrape" +) + +func findOne(node *html.Node, matchers []scrape.Matcher) *html.Node { + for _, matcher := range matchers { + node, ok := scrape.Find(node, matcher) + _ = node + if !ok { + panic(fmt.Sprintf("not found: %v\n", matcher)) + } + } + + return node +} + +func main() { + resp, err := http.Get("http://stats.adultrechockey.ca/c-burnaby/ashlburnaby/en/stats/team.php?type=standings&season_id=204&division_id=531&team_id=5555") + if err != nil { + panic(err) + } + defer resp.Body.Close() + doc, err := html.Parse(resp.Body) + if err != nil { + panic(err) + } + + node := findOne(doc, []scrape.Matcher{ + scrape.ById("statsCal"), + scrape.ByTag(atom.Tbody), + }) + + //*[@id="statsCal"]/center/div/table/tbody + trs := scrape.FindAll(node, scrape.ByTag(atom.Tr))[1:] + for _, tr := range trs { + fmt.Printf("%v %s\n", tr, tr.Data) + date, ok := scrape.Find(tr, scrape.ByClass("ls-date")) + if !ok { + panic("ls-date not found") + } + fmt.Printf("> %#v\n", scrape.Text(date)) + } +}