chromedp/sel.go
Kenneth Shaw 5bef7dce13 Changes to ElementVisible/ElementNotVisible
Changed implementation of ElementVisible/ElementNotVisible to use a
javascript evaluation instead of the previous, cumbersome
implementation. As this may not work correctly in all scenarios, the old
versions will remain as ElementVisibleOld and ElementNotVisibleOld until
the new implementations can be vetted for correctness/performance.
2017-02-08 15:40:22 +07:00

509 lines
12 KiB
Go

package chromedp
import (
"context"
"errors"
"fmt"
"strings"
"sync"
"time"
"github.com/knq/chromedp/cdp"
"github.com/knq/chromedp/cdp/css"
"github.com/knq/chromedp/cdp/dom"
)
/*
TODO: selector 'by' type, as below:
classname
linktext
name
partiallinktext
tagname
*/
// Error types.
var (
ErrNoResults = errors.New("no results")
ErrNotVisible = errors.New("not visible")
ErrVisible = errors.New("visible")
ErrDisabled = errors.New("disabled")
ErrNotSelected = errors.New("not selected")
)
// Selector holds information pertaining to an element query select action.
type Selector struct {
sel interface{}
exp int
by func(context.Context, cdp.FrameHandler, *cdp.Node) ([]cdp.NodeID, error)
wait func(context.Context, cdp.FrameHandler, *cdp.Node, ...cdp.NodeID) ([]*cdp.Node, error)
after func(context.Context, cdp.FrameHandler, ...*cdp.Node) error
}
// Query is an action to query for document nodes match the specified sel and
// the supplied query options.
func Query(sel interface{}, opts ...QueryOption) Action {
s := &Selector{
sel: sel,
exp: 1,
}
// apply options
for _, o := range opts {
o(s)
}
if s.by == nil {
BySearch(s)
}
if s.wait == nil {
ElementReady(s)
}
return s
}
// Do satisfies the Action interface.
func (s *Selector) Do(ctxt context.Context, h cdp.FrameHandler) error {
// TODO: fix this
ctxt, cancel := context.WithTimeout(ctxt, 100*time.Second)
defer cancel()
var err error
select {
case err = <-s.run(ctxt, h):
case <-ctxt.Done():
err = ctxt.Err()
}
return err
}
// run runs the selector action, starting over if the original returned nodes
// are invalidated prior to finishing the selector's by, wait, check, and after
// funcs.
func (s *Selector) run(ctxt context.Context, h cdp.FrameHandler) chan error {
ch := make(chan error)
go func() {
defer close(ch)
for {
root, err := h.GetRoot(ctxt)
if err != nil {
continue
}
select {
default:
ids, err := s.by(ctxt, h, root)
if err == nil && len(ids) >= s.exp {
nodes, err := s.wait(ctxt, h, root, ids...)
if err == nil {
if s.after == nil {
return
}
err = s.after(ctxt, h, nodes...)
if err != nil {
ch <- err
}
return
}
}
time.Sleep(DefaultCheckDuration)
case <-root.Invalidated:
continue
case <-ctxt.Done():
ch <- ctxt.Err()
return
}
}
}()
return ch
}
// selAsString forces sel into a string.
func (s *Selector) selAsString() string {
if sel, ok := s.sel.(string); ok {
return sel
}
return fmt.Sprintf("%s", s.sel)
}
// selAsInt forces sel into a int.
func (s *Selector) selAsInt() int {
sel, ok := s.sel.(int)
if !ok {
panic("selector must be int")
}
return sel
}
// QueryAfter is an action that will match the specified sel using the supplied
// query options, and after the visibility conditions of the query have been
// met, will execute f.
func QueryAfter(sel interface{}, f func(context.Context, cdp.FrameHandler, ...*cdp.Node) error, opts ...QueryOption) Action {
return Query(sel, append(opts, After(f))...)
}
// QueryOption is a element query selector option.
type QueryOption func(*Selector)
// ByFunc is a query option to set the func used to select elements.
func ByFunc(f func(context.Context, cdp.FrameHandler, *cdp.Node) ([]cdp.NodeID, error)) QueryOption {
return func(s *Selector) {
s.by = f
}
}
// ByQuery is a query option to select a single element using
// DOM.querySelector.
func ByQuery(s *Selector) {
ByFunc(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) ([]cdp.NodeID, error) {
nodeID, err := dom.QuerySelector(n.NodeID, s.selAsString()).Do(ctxt, h)
if err != nil {
return nil, err
}
if nodeID == emptyNodeID {
return []cdp.NodeID{}, nil
}
return []cdp.NodeID{nodeID}, nil
})(s)
}
// ByQueryAll is a query option to select elements by DOM.querySelectorAll.
func ByQueryAll(s *Selector) {
ByFunc(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) ([]cdp.NodeID, error) {
return dom.QuerySelectorAll(n.NodeID, s.selAsString()).Do(ctxt, h)
})(s)
}
// ByID is a query option to select a single element by their CSS #id.
func ByID(s *Selector) {
s.sel = "#" + strings.TrimPrefix(s.selAsString(), "#")
ByQuery(s)
}
// BySearch is a query option via DOM.performSearch (works with both CSS and
// XPath queries).
func BySearch(s *Selector) {
ByFunc(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) ([]cdp.NodeID, error) {
id, count, err := dom.PerformSearch(s.selAsString()).Do(ctxt, h)
if err != nil {
return nil, err
}
if count < 1 {
return []cdp.NodeID{}, nil
}
nodes, err := dom.GetSearchResults(id, 0, count).Do(ctxt, h)
if err != nil {
return nil, err
}
return nodes, nil
})(s)
}
// ByNodeID is a query option to select elements by their NodeIDs.
func ByNodeID(s *Selector) {
ids, ok := s.sel.([]cdp.NodeID)
if !ok {
panic("ByNodeID can only work on []cdp.NodeID")
}
ByFunc(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) ([]cdp.NodeID, error) {
var err error
for _, id := range ids {
err = dom.RequestChildNodes(id).WithPierce(true).Do(ctxt, h)
if err != nil {
return nil, err
}
}
return ids, nil
})(s)
}
// waitReady waits for the specified nodes to be ready.
func (s *Selector) waitReady(check func(context.Context, cdp.FrameHandler, *cdp.Node) error) func(context.Context, cdp.FrameHandler, *cdp.Node, ...cdp.NodeID) ([]*cdp.Node, error) {
return func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node, ids ...cdp.NodeID) ([]*cdp.Node, error) {
f, err := h.WaitFrame(ctxt, emptyFrameID)
if err != nil {
return nil, err
}
wg := new(sync.WaitGroup)
nodes := make([]*cdp.Node, len(ids))
errs := make([]error, len(ids))
for i, id := range ids {
wg.Add(1)
go func(i int, id cdp.NodeID) {
defer wg.Done()
nodes[i], errs[i] = h.WaitNode(ctxt, f, id)
}(i, id)
}
wg.Wait()
for _, err := range errs {
if err != nil {
return nil, err
}
}
if check != nil {
errs := make([]error, len(nodes))
for i, n := range nodes {
wg.Add(1)
go func(i int, n *cdp.Node) {
defer wg.Done()
errs[i] = check(ctxt, h, n)
}(i, n)
}
wg.Wait()
for _, err := range errs {
if err != nil {
return nil, err
}
}
}
return nodes, nil
}
}
// WaitFunc is a query option to set a custom wait func.
func WaitFunc(wait func(context.Context, cdp.FrameHandler, *cdp.Node, ...cdp.NodeID) ([]*cdp.Node, error)) QueryOption {
return func(s *Selector) {
s.wait = wait
}
}
// ElementReady is a query option to wait until the element is ready.
func ElementReady(s *Selector) {
WaitFunc(s.waitReady(nil))(s)
}
// ElementVisible is a query option to wait until the element is visible.
func ElementVisible(s *Selector) {
WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error {
var res bool
err := EvaluateAsDevTools(fmt.Sprintf(visibleJS, n.FullXPath()), &res).Do(ctxt, h)
if err != nil {
return err
}
if !res {
return ErrNotVisible
}
return nil
}))(s)
}
// ElementNotVisible is a query option to wait until the element is not visible.
func ElementNotVisible(s *Selector) {
WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error {
var res bool
err := EvaluateAsDevTools(fmt.Sprintf(visibleJS, n.FullXPath()), &res).Do(ctxt, h)
if err != nil {
return err
}
if res {
return ErrVisible
}
return nil
}))(s)
}
// ElementVisibleOld is a query option to wait until the element is visible.
//
// This is the old, complicated, implementation (deprecated).
func ElementVisibleOld(s *Selector) {
WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error {
var err error
// check node has box model
_, err = dom.GetBoxModel(n.NodeID).Do(ctxt, h)
if err != nil {
return err
}
// check if any of the parents are not visible ...
var hidden bool
for ; n.Parent != nil; n = n.Parent {
// get style
style, err := css.GetComputedStyleForNode(n.NodeID).Do(ctxt, h)
if err != nil {
return err
}
// check if hidden
for _, c := range style {
switch c.Name {
case "display":
//log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value)
hidden = c.Value == "none"
case "visibility":
//log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value)
hidden = c.Value != "visible"
case "hidden":
//log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value)
hidden = true
}
if hidden {
return ErrNotVisible
}
}
}
return nil
}))(s)
}
// ElementNotVisibleOld is a query option to wait until the element is not
// visible.
//
// This is the old, complicated, implementation (deprecated).
func ElementNotVisibleOld(s *Selector) {
WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error {
var err error
// check node has box model
_, err = dom.GetBoxModel(n.NodeID).Do(ctxt, h)
if err != nil {
return nil
}
// check if any of the parents are not visible ...
var hidden bool
for ; n.Parent != nil; n = n.Parent {
// get style
style, err := css.GetComputedStyleForNode(n.NodeID).Do(ctxt, h)
if err != nil {
return err
}
// check if hidden
for _, c := range style {
switch c.Name {
case "display":
//log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value)
hidden = c.Value == "none"
case "visibility":
//log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value)
hidden = c.Value != "visible"
case "hidden":
//log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value)
hidden = true
}
if hidden {
return nil
}
}
}
return ErrVisible
}))(s)
}
// ElementEnabled is a query option to wait until the element is enabled.
func ElementEnabled(s *Selector) {
WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error {
n.RLock()
defer n.RUnlock()
for i := 0; i < len(n.Attributes); i += 2 {
if n.Attributes[i] == "disabled" {
return ErrDisabled
}
}
return nil
}))(s)
}
// ElementSelected is a query option to wait until the element is selected.
func ElementSelected(s *Selector) {
WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error {
n.RLock()
defer n.RUnlock()
for i := 0; i < len(n.Attributes); i += 2 {
if n.Attributes[i] == "selected" {
return nil
}
}
return ErrNotSelected
}))(s)
}
// AtLeast is a query option to wait until at least n elements are returned
// from the query selector.
func AtLeast(n int) QueryOption {
return func(s *Selector) {
s.exp = n
}
}
// After is a query option to set a func that will be executed after the wait
// has succeeded.
func After(f func(context.Context, cdp.FrameHandler, ...*cdp.Node) error) QueryOption {
return func(s *Selector) {
s.after = f
}
}
// WaitReady waits until the element is ready (ie, loaded by chromedp).
func WaitReady(sel interface{}, opts ...QueryOption) Action {
return Query(sel, opts...)
}
// WaitVisible waits until the selected element is visible.
func WaitVisible(sel interface{}, opts ...QueryOption) Action {
return Query(sel, append(opts, ElementVisible)...)
}
// WaitNotVisible waits until the selected element is not visible.
func WaitNotVisible(sel interface{}, opts ...QueryOption) Action {
return Query(sel, append(opts, ElementNotVisible)...)
}
// WaitEnabled waits until the selected element is enabled (does not have
// attribute 'disabled').
func WaitEnabled(sel interface{}, opts ...QueryOption) Action {
return Query(sel, append(opts, ElementEnabled)...)
}
// WaitSelected waits until the element is selected (has attribute 'selected').
func WaitSelected(sel interface{}, opts ...QueryOption) Action {
return Query(sel, append(opts, ElementSelected)...)
}
const (
// visibleJS is a javascript snippet that returns true or false depending
// on if the specified node's offsetParent is not null.
visibleJS = `(function(a) {
return a[0].offsetParent !== null
})($x('%s'))`
)