package chromedp import ( "context" "errors" "fmt" "strings" "sync" "time" "github.com/knq/chromedp/cdp" "github.com/knq/chromedp/cdp/css" "github.com/knq/chromedp/cdp/dom" ) /* TODO: selector 'by' type, as below: classname linktext name partiallinktext tagname */ // Error types. var ( ErrNoResults = errors.New("no results") ErrNotVisible = errors.New("not visible") ErrVisible = errors.New("visible") ErrDisabled = errors.New("disabled") ErrNotSelected = errors.New("not selected") ) // Selector holds information pertaining to an element query select action. type Selector struct { sel interface{} exp int by func(context.Context, cdp.FrameHandler, *cdp.Node) ([]cdp.NodeID, error) wait func(context.Context, cdp.FrameHandler, *cdp.Node, ...cdp.NodeID) ([]*cdp.Node, error) after func(context.Context, cdp.FrameHandler, ...*cdp.Node) error } // Query is an action to query for document nodes match the specified sel and // the supplied query options. func Query(sel interface{}, opts ...QueryOption) Action { s := &Selector{ sel: sel, exp: 1, } // apply options for _, o := range opts { o(s) } if s.by == nil { BySearch(s) } if s.wait == nil { ElementReady(s) } return s } // Do satisfies the Action interface. func (s *Selector) Do(ctxt context.Context, h cdp.FrameHandler) error { // TODO: fix this ctxt, cancel := context.WithTimeout(ctxt, 100*time.Second) defer cancel() var err error select { case err = <-s.run(ctxt, h): case <-ctxt.Done(): err = ctxt.Err() } return err } // run runs the selector action, starting over if the original returned nodes // are invalidated prior to finishing the selector's by, wait, check, and after // funcs. func (s *Selector) run(ctxt context.Context, h cdp.FrameHandler) chan error { ch := make(chan error) go func() { defer close(ch) for { root, err := h.GetRoot(ctxt) if err != nil { continue } select { default: ids, err := s.by(ctxt, h, root) if err == nil && len(ids) >= s.exp { nodes, err := s.wait(ctxt, h, root, ids...) if err == nil { if s.after == nil { return } err = s.after(ctxt, h, nodes...) if err != nil { ch <- err } return } } time.Sleep(DefaultCheckDuration) case <-root.Invalidated: continue case <-ctxt.Done(): ch <- ctxt.Err() return } } }() return ch } // selAsString forces sel into a string. func (s *Selector) selAsString() string { if sel, ok := s.sel.(string); ok { return sel } return fmt.Sprintf("%s", s.sel) } // selAsInt forces sel into a int. func (s *Selector) selAsInt() int { sel, ok := s.sel.(int) if !ok { panic("selector must be int") } return sel } // QueryAfter is an action that will match the specified sel using the supplied // query options, and after the visibility conditions of the query have been // met, will execute f. func QueryAfter(sel interface{}, f func(context.Context, cdp.FrameHandler, ...*cdp.Node) error, opts ...QueryOption) Action { return Query(sel, append(opts, After(f))...) } // QueryOption is a element query selector option. type QueryOption func(*Selector) // ByFunc is a query option to set the func used to select elements. func ByFunc(f func(context.Context, cdp.FrameHandler, *cdp.Node) ([]cdp.NodeID, error)) QueryOption { return func(s *Selector) { s.by = f } } // ByQuery is a query option to select a single element using // DOM.querySelector. func ByQuery(s *Selector) { ByFunc(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) ([]cdp.NodeID, error) { nodeID, err := dom.QuerySelector(n.NodeID, s.selAsString()).Do(ctxt, h) if err != nil { return nil, err } if nodeID == emptyNodeID { return []cdp.NodeID{}, nil } return []cdp.NodeID{nodeID}, nil })(s) } // ByQueryAll is a query option to select elements by DOM.querySelectorAll. func ByQueryAll(s *Selector) { ByFunc(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) ([]cdp.NodeID, error) { return dom.QuerySelectorAll(n.NodeID, s.selAsString()).Do(ctxt, h) })(s) } // ByID is a query option to select a single element by their CSS #id. func ByID(s *Selector) { s.sel = "#" + strings.TrimPrefix(s.selAsString(), "#") ByQuery(s) } // BySearch is a query option via DOM.performSearch (works with both CSS and // XPath queries). func BySearch(s *Selector) { ByFunc(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) ([]cdp.NodeID, error) { id, count, err := dom.PerformSearch(s.selAsString()).Do(ctxt, h) if err != nil { return nil, err } if count < 1 { return []cdp.NodeID{}, nil } nodes, err := dom.GetSearchResults(id, 0, count).Do(ctxt, h) if err != nil { return nil, err } return nodes, nil })(s) } // ByNodeID is a query option to select elements by their NodeIDs. func ByNodeID(s *Selector) { ids, ok := s.sel.([]cdp.NodeID) if !ok { panic("ByNodeID can only work on []cdp.NodeID") } ByFunc(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) ([]cdp.NodeID, error) { var err error for _, id := range ids { err = dom.RequestChildNodes(id).WithPierce(true).Do(ctxt, h) if err != nil { return nil, err } } return ids, nil })(s) } // waitReady waits for the specified nodes to be ready. func (s *Selector) waitReady(check func(context.Context, cdp.FrameHandler, *cdp.Node) error) func(context.Context, cdp.FrameHandler, *cdp.Node, ...cdp.NodeID) ([]*cdp.Node, error) { return func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node, ids ...cdp.NodeID) ([]*cdp.Node, error) { f, err := h.WaitFrame(ctxt, emptyFrameID) if err != nil { return nil, err } wg := new(sync.WaitGroup) nodes := make([]*cdp.Node, len(ids)) errs := make([]error, len(ids)) for i, id := range ids { wg.Add(1) go func(i int, id cdp.NodeID) { defer wg.Done() nodes[i], errs[i] = h.WaitNode(ctxt, f, id) }(i, id) } wg.Wait() for _, err := range errs { if err != nil { return nil, err } } if check != nil { errs := make([]error, len(nodes)) for i, n := range nodes { wg.Add(1) go func(i int, n *cdp.Node) { defer wg.Done() errs[i] = check(ctxt, h, n) }(i, n) } wg.Wait() for _, err := range errs { if err != nil { return nil, err } } } return nodes, nil } } // WaitFunc is a query option to set a custom wait func. func WaitFunc(wait func(context.Context, cdp.FrameHandler, *cdp.Node, ...cdp.NodeID) ([]*cdp.Node, error)) QueryOption { return func(s *Selector) { s.wait = wait } } // ElementReady is a query option to wait until the element is ready. func ElementReady(s *Selector) { WaitFunc(s.waitReady(nil))(s) } // ElementVisible is a query option to wait until the element is visible. func ElementVisible(s *Selector) { WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error { var err error // check node has box model _, err = dom.GetBoxModel(n.NodeID).Do(ctxt, h) if err != nil { return err } // check if any of the parents are not visible ... var hidden bool for ; n.Parent != nil; n = n.Parent { // get style style, err := css.GetComputedStyleForNode(n.NodeID).Do(ctxt, h) if err != nil { return err } // check if hidden for _, c := range style { switch c.Name { case "display": //log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value) hidden = c.Value == "none" case "visibility": //log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value) hidden = c.Value != "visible" case "hidden": //log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value) hidden = true } if hidden { return ErrNotVisible } } } return nil }))(s) } // ElementNotVisible is a query option to wait until the element is visible. func ElementNotVisible(s *Selector) { WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error { var err error // check node has box model _, err = dom.GetBoxModel(n.NodeID).Do(ctxt, h) if err != nil { return nil } // check if any of the parents are not visible ... var hidden bool for ; n.Parent != nil; n = n.Parent { // get style style, err := css.GetComputedStyleForNode(n.NodeID).Do(ctxt, h) if err != nil { return err } // check if hidden for _, c := range style { switch c.Name { case "display": //log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value) hidden = c.Value == "none" case "visibility": //log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value) hidden = c.Value != "visible" case "hidden": //log.Printf("%d >>>> %s=%s", n.NodeID, c.Name, c.Value) hidden = true } if hidden { return nil } } } return ErrVisible }))(s) } // ElementEnabled is a query option to wait until the element is enabled. func ElementEnabled(s *Selector) { WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error { n.RLock() defer n.RUnlock() for i := 0; i < len(n.Attributes); i += 2 { if n.Attributes[i] == "disabled" { return ErrDisabled } } return nil }))(s) } // ElementSelected is a query option to wait until the element is selected. func ElementSelected(s *Selector) { WaitFunc(s.waitReady(func(ctxt context.Context, h cdp.FrameHandler, n *cdp.Node) error { n.RLock() defer n.RUnlock() for i := 0; i < len(n.Attributes); i += 2 { if n.Attributes[i] == "selected" { return nil } } return ErrNotSelected }))(s) } // AtLeast is a query option to wait until at least n elements are returned // from the query selector. func AtLeast(n int) QueryOption { return func(s *Selector) { s.exp = n } } // After is a query option to set a func that will be executed after the wait // has succeeded. func After(f func(context.Context, cdp.FrameHandler, ...*cdp.Node) error) QueryOption { return func(s *Selector) { s.after = f } } // WaitReady waits until the element is ready (ie, loaded by chromedp). func WaitReady(sel interface{}, opts ...QueryOption) Action { return Query(sel, opts...) } // WaitVisible waits until the selected element is visible. func WaitVisible(sel interface{}, opts ...QueryOption) Action { return Query(sel, append(opts, ElementVisible)...) } // WaitNotVisible waits until the selected element is not visible. func WaitNotVisible(sel interface{}, opts ...QueryOption) Action { return Query(sel, append(opts, ElementNotVisible)...) } // WaitEnabled waits until the selected element is enabled (does not have // attribute 'disabled'). func WaitEnabled(sel interface{}, opts ...QueryOption) Action { return Query(sel, append(opts, ElementEnabled)...) } // WaitSelected waits until the element is selected (has attribute 'selected'). func WaitSelected(sel interface{}, opts ...QueryOption) Action { return Query(sel, append(opts, ElementSelected)...) }