335d22d376
There's no need to put the error variables in a larger scope, nor define them earlier than necessary. If anything, it makes the code harder to follow, such as figuring out when nil errors are returned.
663 lines
13 KiB
Go
663 lines
13 KiB
Go
package chromedp
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"reflect"
|
|
"runtime"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mailru/easyjson"
|
|
|
|
"github.com/knq/chromedp/cdp"
|
|
"github.com/knq/chromedp/cdp/cdputil"
|
|
"github.com/knq/chromedp/cdp/css"
|
|
"github.com/knq/chromedp/cdp/dom"
|
|
"github.com/knq/chromedp/cdp/inspector"
|
|
logdom "github.com/knq/chromedp/cdp/log"
|
|
"github.com/knq/chromedp/cdp/page"
|
|
rundom "github.com/knq/chromedp/cdp/runtime"
|
|
"github.com/knq/chromedp/client"
|
|
)
|
|
|
|
// TargetHandler manages a Chrome Debugging Protocol target.
|
|
type TargetHandler struct {
|
|
conn client.Transport
|
|
|
|
// frames is the set of encountered frames.
|
|
frames map[cdp.FrameID]*cdp.Frame
|
|
|
|
// cur is the current top level frame.
|
|
cur *cdp.Frame
|
|
|
|
// qcmd is the outgoing message queue.
|
|
qcmd chan *cdp.Message
|
|
|
|
// qres is the incoming command result queue.
|
|
qres chan *cdp.Message
|
|
|
|
// qevents is the incoming event queue.
|
|
qevents chan *cdp.Message
|
|
|
|
// detached is closed when the detached event is received.
|
|
detached chan *inspector.EventDetached
|
|
|
|
pageWaitGroup, domWaitGroup *sync.WaitGroup
|
|
|
|
// last is the last sent message identifier.
|
|
last int64
|
|
lastm sync.Mutex
|
|
|
|
// res is the id->result channel map.
|
|
res map[int64]chan *cdp.Message
|
|
resrw sync.RWMutex
|
|
|
|
// logging funcs
|
|
logf, debugf, errorf LogFunc
|
|
|
|
sync.RWMutex
|
|
}
|
|
|
|
// NewTargetHandler creates a new handler for the specified client target.
|
|
func NewTargetHandler(t client.Target, logf, debugf, errorf LogFunc) (*TargetHandler, error) {
|
|
conn, err := client.Dial(t)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &TargetHandler{
|
|
conn: conn,
|
|
logf: logf,
|
|
debugf: debugf,
|
|
errorf: errorf,
|
|
}, nil
|
|
}
|
|
|
|
// Run starts the processing of commands and events of the client target
|
|
// provided to NewTargetHandler.
|
|
//
|
|
// Callers can stop Run by closing the passed context.
|
|
func (h *TargetHandler) Run(ctxt context.Context) error {
|
|
// reset
|
|
h.Lock()
|
|
h.frames = make(map[cdp.FrameID]*cdp.Frame)
|
|
h.qcmd = make(chan *cdp.Message)
|
|
h.qres = make(chan *cdp.Message)
|
|
h.qevents = make(chan *cdp.Message)
|
|
h.res = make(map[int64]chan *cdp.Message)
|
|
h.detached = make(chan *inspector.EventDetached)
|
|
h.pageWaitGroup = new(sync.WaitGroup)
|
|
h.domWaitGroup = new(sync.WaitGroup)
|
|
h.Unlock()
|
|
|
|
// run
|
|
go h.run(ctxt)
|
|
|
|
// enable domains
|
|
for _, a := range []Action{
|
|
logdom.Enable(),
|
|
rundom.Enable(),
|
|
//network.Enable(),
|
|
inspector.Enable(),
|
|
page.Enable(),
|
|
dom.Enable(),
|
|
css.Enable(),
|
|
} {
|
|
if err := a.Do(ctxt, h); err != nil {
|
|
return fmt.Errorf("unable to execute %s: %v", reflect.TypeOf(a), err)
|
|
}
|
|
}
|
|
|
|
h.Lock()
|
|
|
|
// get page resources
|
|
tree, err := page.GetResourceTree().Do(ctxt, h)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to get resource tree: %v", err)
|
|
}
|
|
|
|
h.frames[tree.Frame.ID] = tree.Frame
|
|
h.cur = tree.Frame
|
|
|
|
for _, c := range tree.ChildFrames {
|
|
h.frames[c.Frame.ID] = c.Frame
|
|
}
|
|
|
|
h.Unlock()
|
|
|
|
h.documentUpdated(ctxt)
|
|
|
|
return nil
|
|
}
|
|
|
|
// run handles the actual message processing to / from the web socket connection.
|
|
func (h *TargetHandler) run(ctxt context.Context) {
|
|
defer h.conn.Close()
|
|
|
|
// add cancel to context
|
|
ctxt, cancel := context.WithCancel(ctxt)
|
|
defer cancel()
|
|
|
|
go func() {
|
|
defer cancel()
|
|
|
|
for {
|
|
select {
|
|
default:
|
|
msg, err := h.read()
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
switch {
|
|
case msg.Method != "":
|
|
h.qevents <- msg
|
|
|
|
case msg.ID != 0:
|
|
h.qres <- msg
|
|
|
|
default:
|
|
h.errorf("ignoring malformed incoming message (missing id or method): %#v", msg)
|
|
}
|
|
|
|
case <-h.detached:
|
|
// FIXME: should log when detached, and reason
|
|
return
|
|
|
|
case <-ctxt.Done():
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
// process queues
|
|
for {
|
|
select {
|
|
case ev := <-h.qevents:
|
|
err := h.processEvent(ctxt, ev)
|
|
if err != nil {
|
|
h.errorf("could not process event %s: %v", ev.Method, err)
|
|
}
|
|
|
|
case res := <-h.qres:
|
|
err := h.processResult(res)
|
|
if err != nil {
|
|
h.errorf("could not process result for message %d: %v", res.ID, err)
|
|
}
|
|
|
|
case cmd := <-h.qcmd:
|
|
err := h.processCommand(cmd)
|
|
if err != nil {
|
|
h.errorf("could not process command message %d: %v", cmd.ID, err)
|
|
}
|
|
|
|
case <-ctxt.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// read reads a message from the client connection.
|
|
func (h *TargetHandler) read() (*cdp.Message, error) {
|
|
// read
|
|
buf, err := h.conn.Read()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
h.debugf("-> %s", string(buf))
|
|
|
|
// unmarshal
|
|
msg := new(cdp.Message)
|
|
err = easyjson.Unmarshal(buf, msg)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return msg, nil
|
|
}
|
|
|
|
// processEvent processes an incoming event.
|
|
func (h *TargetHandler) processEvent(ctxt context.Context, msg *cdp.Message) error {
|
|
if msg == nil {
|
|
return cdp.ErrChannelClosed
|
|
}
|
|
|
|
// unmarshal
|
|
ev, err := cdputil.UnmarshalMessage(msg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
switch e := ev.(type) {
|
|
case *inspector.EventDetached:
|
|
h.Lock()
|
|
defer h.Unlock()
|
|
h.detached <- e
|
|
return nil
|
|
|
|
case *dom.EventDocumentUpdated:
|
|
h.domWaitGroup.Wait()
|
|
go h.documentUpdated(ctxt)
|
|
return nil
|
|
}
|
|
|
|
d := msg.Method.Domain()
|
|
if d != "Page" && d != "DOM" {
|
|
return nil
|
|
}
|
|
|
|
switch d {
|
|
case "Page":
|
|
h.pageWaitGroup.Add(1)
|
|
go h.pageEvent(ctxt, ev)
|
|
|
|
case "DOM":
|
|
h.domWaitGroup.Add(1)
|
|
go h.domEvent(ctxt, ev)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// documentUpdated handles the document updated event, retrieving the document
|
|
// root for the root frame.
|
|
func (h *TargetHandler) documentUpdated(ctxt context.Context) {
|
|
f, err := h.WaitFrame(ctxt, cdp.EmptyFrameID)
|
|
if err != nil {
|
|
h.errorf("could not get current frame: %v", err)
|
|
return
|
|
}
|
|
|
|
f.Lock()
|
|
defer f.Unlock()
|
|
|
|
// invalidate nodes
|
|
if f.Root != nil {
|
|
close(f.Root.Invalidated)
|
|
}
|
|
|
|
f.Nodes = make(map[cdp.NodeID]*cdp.Node)
|
|
f.Root, err = dom.GetDocument().WithPierce(true).Do(ctxt, h)
|
|
if err != nil {
|
|
h.errorf("could not retrieve document root for %s: %v", f.ID, err)
|
|
return
|
|
}
|
|
f.Root.Invalidated = make(chan struct{})
|
|
walk(f.Nodes, f.Root)
|
|
}
|
|
|
|
// processResult processes an incoming command result.
|
|
func (h *TargetHandler) processResult(msg *cdp.Message) error {
|
|
h.resrw.RLock()
|
|
defer h.resrw.RUnlock()
|
|
|
|
ch, ok := h.res[msg.ID]
|
|
if !ok {
|
|
return fmt.Errorf("id %d not present in res map", msg.ID)
|
|
}
|
|
defer close(ch)
|
|
|
|
ch <- msg
|
|
|
|
return nil
|
|
}
|
|
|
|
// processCommand writes a command to the client connection.
|
|
func (h *TargetHandler) processCommand(cmd *cdp.Message) error {
|
|
// marshal
|
|
buf, err := easyjson.Marshal(cmd)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
h.debugf("<- %s", string(buf))
|
|
|
|
return h.conn.Write(buf)
|
|
}
|
|
|
|
// emptyObj is an empty JSON object message.
|
|
var emptyObj = easyjson.RawMessage([]byte(`{}`))
|
|
|
|
// Execute executes commandType against the endpoint passed to Run, using the
|
|
// provided context and params, decoding the result of the command to res.
|
|
func (h *TargetHandler) Execute(ctxt context.Context, commandType cdp.MethodType, params easyjson.Marshaler, res easyjson.Unmarshaler) error {
|
|
var paramsBuf easyjson.RawMessage
|
|
if params == nil {
|
|
paramsBuf = emptyObj
|
|
} else {
|
|
var err error
|
|
paramsBuf, err = easyjson.Marshal(params)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
id := h.next()
|
|
|
|
// save channel
|
|
ch := make(chan *cdp.Message, 1)
|
|
h.resrw.Lock()
|
|
h.res[id] = ch
|
|
h.resrw.Unlock()
|
|
|
|
// queue message
|
|
h.qcmd <- &cdp.Message{
|
|
ID: id,
|
|
Method: commandType,
|
|
Params: paramsBuf,
|
|
}
|
|
|
|
errch := make(chan error, 1)
|
|
go func() {
|
|
defer close(errch)
|
|
|
|
select {
|
|
case msg := <-ch:
|
|
switch {
|
|
case msg == nil:
|
|
errch <- cdp.ErrChannelClosed
|
|
|
|
case msg.Error != nil:
|
|
errch <- msg.Error
|
|
|
|
case res != nil:
|
|
errch <- easyjson.Unmarshal(msg.Result, res)
|
|
}
|
|
|
|
case <-ctxt.Done():
|
|
errch <- ctxt.Err()
|
|
}
|
|
|
|
h.resrw.Lock()
|
|
defer h.resrw.Unlock()
|
|
|
|
delete(h.res, id)
|
|
}()
|
|
|
|
return <-errch
|
|
}
|
|
|
|
// next returns the next message id.
|
|
func (h *TargetHandler) next() int64 {
|
|
h.lastm.Lock()
|
|
defer h.lastm.Unlock()
|
|
h.last++
|
|
return h.last
|
|
}
|
|
|
|
// GetRoot returns the current top level frame's root document node.
|
|
func (h *TargetHandler) GetRoot(ctxt context.Context) (*cdp.Node, error) {
|
|
var root *cdp.Node
|
|
|
|
for {
|
|
var cur *cdp.Frame
|
|
select {
|
|
default:
|
|
h.RLock()
|
|
cur = h.cur
|
|
if cur != nil {
|
|
cur.RLock()
|
|
root = cur.Root
|
|
cur.RUnlock()
|
|
}
|
|
h.RUnlock()
|
|
|
|
if cur != nil && root != nil {
|
|
return root, nil
|
|
}
|
|
|
|
time.Sleep(DefaultCheckDuration)
|
|
|
|
case <-ctxt.Done():
|
|
return nil, ctxt.Err()
|
|
}
|
|
}
|
|
}
|
|
|
|
// SetActive sets the currently active frame after a successful navigation.
|
|
func (h *TargetHandler) SetActive(ctxt context.Context, id cdp.FrameID) error {
|
|
var err error
|
|
|
|
// get frame
|
|
f, err := h.WaitFrame(ctxt, id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
h.Lock()
|
|
defer h.Unlock()
|
|
|
|
h.cur = f
|
|
|
|
return nil
|
|
}
|
|
|
|
// WaitFrame waits for a frame to be loaded using the provided context.
|
|
func (h *TargetHandler) WaitFrame(ctxt context.Context, id cdp.FrameID) (*cdp.Frame, error) {
|
|
// TODO: fix this
|
|
timeout := time.After(10 * time.Second)
|
|
|
|
for {
|
|
select {
|
|
default:
|
|
var f *cdp.Frame
|
|
var ok bool
|
|
|
|
h.RLock()
|
|
if id == cdp.EmptyFrameID {
|
|
f, ok = h.cur, h.cur != nil
|
|
} else {
|
|
f, ok = h.frames[id]
|
|
}
|
|
h.RUnlock()
|
|
|
|
if ok {
|
|
return f, nil
|
|
}
|
|
|
|
time.Sleep(DefaultCheckDuration)
|
|
|
|
case <-ctxt.Done():
|
|
return nil, ctxt.Err()
|
|
|
|
case <-timeout:
|
|
return nil, fmt.Errorf("timeout waiting for frame `%s`", id)
|
|
}
|
|
}
|
|
}
|
|
|
|
// WaitNode waits for a node to be loaded using the provided context.
|
|
func (h *TargetHandler) WaitNode(ctxt context.Context, f *cdp.Frame, id cdp.NodeID) (*cdp.Node, error) {
|
|
// TODO: fix this
|
|
timeout := time.After(10 * time.Second)
|
|
|
|
for {
|
|
select {
|
|
default:
|
|
var n *cdp.Node
|
|
var ok bool
|
|
|
|
f.RLock()
|
|
n, ok = f.Nodes[id]
|
|
f.RUnlock()
|
|
|
|
if n != nil && ok {
|
|
return n, nil
|
|
}
|
|
|
|
time.Sleep(DefaultCheckDuration)
|
|
|
|
case <-ctxt.Done():
|
|
return nil, ctxt.Err()
|
|
|
|
case <-timeout:
|
|
return nil, fmt.Errorf("timeout waiting for node `%d`", id)
|
|
}
|
|
}
|
|
}
|
|
|
|
// pageEvent handles incoming page events.
|
|
func (h *TargetHandler) pageEvent(ctxt context.Context, ev interface{}) {
|
|
defer h.pageWaitGroup.Done()
|
|
|
|
var id cdp.FrameID
|
|
var op frameOp
|
|
|
|
switch e := ev.(type) {
|
|
case *page.EventFrameNavigated:
|
|
h.Lock()
|
|
h.frames[e.Frame.ID] = e.Frame
|
|
if h.cur != nil && h.cur.ID == e.Frame.ID {
|
|
h.cur = e.Frame
|
|
}
|
|
h.Unlock()
|
|
return
|
|
|
|
case *page.EventFrameAttached:
|
|
id, op = e.FrameID, frameAttached(e.ParentFrameID)
|
|
|
|
case *page.EventFrameDetached:
|
|
id, op = e.FrameID, frameDetached
|
|
|
|
case *page.EventFrameStartedLoading:
|
|
id, op = e.FrameID, frameStartedLoading
|
|
|
|
case *page.EventFrameStoppedLoading:
|
|
id, op = e.FrameID, frameStoppedLoading
|
|
|
|
case *page.EventFrameScheduledNavigation:
|
|
id, op = e.FrameID, frameScheduledNavigation
|
|
|
|
case *page.EventFrameClearedScheduledNavigation:
|
|
id, op = e.FrameID, frameClearedScheduledNavigation
|
|
|
|
case *page.EventDomContentEventFired:
|
|
return
|
|
case *page.EventLoadEventFired:
|
|
return
|
|
case *page.EventFrameResized:
|
|
return
|
|
|
|
default:
|
|
h.errorf("unhandled page event %s", reflect.TypeOf(ev))
|
|
return
|
|
}
|
|
|
|
f, err := h.WaitFrame(ctxt, id)
|
|
if err != nil {
|
|
h.errorf("could not get frame %s: %v", id, err)
|
|
return
|
|
}
|
|
|
|
h.Lock()
|
|
defer h.Unlock()
|
|
|
|
f.Lock()
|
|
defer f.Unlock()
|
|
|
|
op(f)
|
|
}
|
|
|
|
// domEvent handles incoming DOM events.
|
|
func (h *TargetHandler) domEvent(ctxt context.Context, ev interface{}) {
|
|
defer h.domWaitGroup.Done()
|
|
|
|
// wait current frame
|
|
f, err := h.WaitFrame(ctxt, cdp.EmptyFrameID)
|
|
if err != nil {
|
|
h.errorf("could not process DOM event %s: %v", reflect.TypeOf(ev), err)
|
|
return
|
|
}
|
|
|
|
var id cdp.NodeID
|
|
var op nodeOp
|
|
|
|
switch e := ev.(type) {
|
|
case *dom.EventSetChildNodes:
|
|
id, op = e.ParentID, setChildNodes(f.Nodes, e.Nodes)
|
|
|
|
case *dom.EventAttributeModified:
|
|
id, op = e.NodeID, attributeModified(e.Name, e.Value)
|
|
|
|
case *dom.EventAttributeRemoved:
|
|
id, op = e.NodeID, attributeRemoved(e.Name)
|
|
|
|
case *dom.EventInlineStyleInvalidated:
|
|
if len(e.NodeIds) == 0 {
|
|
return
|
|
}
|
|
|
|
id, op = e.NodeIds[0], inlineStyleInvalidated(e.NodeIds[1:])
|
|
|
|
case *dom.EventCharacterDataModified:
|
|
id, op = e.NodeID, characterDataModified(e.CharacterData)
|
|
|
|
case *dom.EventChildNodeCountUpdated:
|
|
id, op = e.NodeID, childNodeCountUpdated(e.ChildNodeCount)
|
|
|
|
case *dom.EventChildNodeInserted:
|
|
if e.PreviousNodeID != cdp.EmptyNodeID {
|
|
_, err = h.WaitNode(ctxt, f, e.PreviousNodeID)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
id, op = e.ParentNodeID, childNodeInserted(f.Nodes, e.PreviousNodeID, e.Node)
|
|
|
|
case *dom.EventChildNodeRemoved:
|
|
id, op = e.ParentNodeID, childNodeRemoved(f.Nodes, e.NodeID)
|
|
|
|
case *dom.EventShadowRootPushed:
|
|
id, op = e.HostID, shadowRootPushed(f.Nodes, e.Root)
|
|
|
|
case *dom.EventShadowRootPopped:
|
|
id, op = e.HostID, shadowRootPopped(f.Nodes, e.RootID)
|
|
|
|
case *dom.EventPseudoElementAdded:
|
|
id, op = e.ParentID, pseudoElementAdded(f.Nodes, e.PseudoElement)
|
|
|
|
case *dom.EventPseudoElementRemoved:
|
|
id, op = e.ParentID, pseudoElementRemoved(f.Nodes, e.PseudoElementID)
|
|
|
|
case *dom.EventDistributedNodesUpdated:
|
|
id, op = e.InsertionPointID, distributedNodesUpdated(e.DistributedNodes)
|
|
|
|
default:
|
|
h.errorf("unhandled node event %s", reflect.TypeOf(ev))
|
|
return
|
|
}
|
|
|
|
// retrieve node
|
|
n, err := h.WaitNode(ctxt, f, id)
|
|
if err != nil {
|
|
s := strings.TrimSuffix(runtime.FuncForPC(reflect.ValueOf(op).Pointer()).Name(), ".func1")
|
|
i := strings.LastIndex(s, ".")
|
|
if i != -1 {
|
|
s = s[i+1:]
|
|
}
|
|
h.errorf("could not perform (%s) operation on node %d (wait node): %v", s, id, err)
|
|
return
|
|
}
|
|
|
|
h.Lock()
|
|
defer h.Unlock()
|
|
|
|
f.Lock()
|
|
defer f.Unlock()
|
|
|
|
op(n)
|
|
}
|
|
|
|
// Listen creates a listener for the specified event types.
|
|
func (h *TargetHandler) Listen(eventTypes ...cdp.MethodType) <-chan interface{} {
|
|
return nil
|
|
}
|
|
|
|
// Release releases a channel returned from Listen.
|
|
func (h *TargetHandler) Release(ch <-chan interface{}) {
|
|
|
|
}
|