393 lines
8.4 KiB
Go
393 lines
8.4 KiB
Go
// Package chromedp is a high level Chrome Debugging Protocol domain manager
|
|
// that simplifies driving web browsers (Chrome, Safari, Edge, Android Web
|
|
// Views, and others) for scraping, unit testing, or profiling web pages.
|
|
// chromedp requires no third-party dependencies (ie, Selenium), implementing
|
|
// the async Chrome Debugging Protocol natively.
|
|
package chromedp
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/knq/chromedp/cdp"
|
|
"github.com/knq/chromedp/client"
|
|
"github.com/knq/chromedp/runner"
|
|
)
|
|
|
|
const (
|
|
// DefaultNewTargetTimeout is the default time to wait for a new target to
|
|
// be started.
|
|
DefaultNewTargetTimeout = 3 * time.Second
|
|
|
|
// DefaultCheckDuration is the default time to sleep between a check.
|
|
DefaultCheckDuration = 50 * time.Millisecond
|
|
)
|
|
|
|
// CDP contains information for managing a Chrome process runner, low level
|
|
// client and associated target page handlers.
|
|
type CDP struct {
|
|
// r is the chrome runner.
|
|
r *runner.Runner
|
|
|
|
// opts are command line options to pass to a created runner.
|
|
opts []runner.CommandLineOption
|
|
|
|
// watch is the channel for new client targets.
|
|
watch <-chan client.Target
|
|
|
|
// cur is the current active target's handler.
|
|
cur cdp.FrameHandler
|
|
|
|
// handlers is the active handlers.
|
|
handlers []cdp.FrameHandler
|
|
|
|
// handlerMap is the map of target IDs to its active handler.
|
|
handlerMap map[string]int
|
|
|
|
sync.RWMutex
|
|
}
|
|
|
|
// New creates a new Chrome Debugging Protocol client.
|
|
func New(ctxt context.Context, opts ...Option) (*CDP, error) {
|
|
var err error
|
|
|
|
c := &CDP{
|
|
handlers: make([]cdp.FrameHandler, 0),
|
|
handlerMap: make(map[string]int),
|
|
}
|
|
|
|
// apply options
|
|
for _, o := range opts {
|
|
err = o(c)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// setup context
|
|
if ctxt == nil {
|
|
var cancel func()
|
|
ctxt, cancel = context.WithCancel(context.Background())
|
|
defer cancel()
|
|
}
|
|
|
|
// check for supplied runner, if none then create one
|
|
if c.r == nil && c.watch == nil {
|
|
c.r, err = runner.Run(ctxt, c.opts...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// watch handlers
|
|
if c.watch == nil {
|
|
c.watch = c.r.WatchPageTargets(ctxt)
|
|
}
|
|
|
|
go func() {
|
|
for t := range c.watch {
|
|
go c.AddTarget(ctxt, t)
|
|
}
|
|
}()
|
|
|
|
// TODO: fix this
|
|
timeout := time.After(DefaultNewTargetTimeout)
|
|
|
|
loop:
|
|
// wait until at least one target active
|
|
for {
|
|
select {
|
|
default:
|
|
c.RLock()
|
|
exists := c.cur != nil
|
|
c.RUnlock()
|
|
if exists {
|
|
return c, nil
|
|
}
|
|
|
|
time.Sleep(DefaultCheckDuration)
|
|
|
|
case <-ctxt.Done():
|
|
return nil, cdp.ErrContextDone
|
|
|
|
case <-timeout:
|
|
break loop
|
|
}
|
|
}
|
|
|
|
return nil, errors.New("timeout waiting for initial target")
|
|
}
|
|
|
|
// AddTarget adds a target using the supplied context.
|
|
func (c *CDP) AddTarget(ctxt context.Context, t client.Target) {
|
|
c.Lock()
|
|
defer c.Unlock()
|
|
|
|
// create target manager
|
|
h, err := NewTargetHandler(t)
|
|
if err != nil {
|
|
log.Printf("error: could not create handler for %s, got: %v", t, err)
|
|
return
|
|
}
|
|
|
|
// run
|
|
err = h.Run(ctxt)
|
|
if err != nil {
|
|
log.Printf("error: could not start handler for %s, got: %v", t, err)
|
|
return
|
|
}
|
|
|
|
// add to active handlers
|
|
c.handlers = append(c.handlers, h)
|
|
c.handlerMap[t.GetID()] = len(c.handlers) - 1
|
|
if c.cur == nil {
|
|
c.cur = h
|
|
}
|
|
}
|
|
|
|
// Wait waits for the Chrome runner to terminate.
|
|
func (c *CDP) Wait() error {
|
|
c.RLock()
|
|
r := c.r
|
|
c.RUnlock()
|
|
|
|
if r != nil {
|
|
return r.Wait()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Shutdown closes all Chrome page handlers.
|
|
func (c *CDP) Shutdown(ctxt context.Context, opts ...client.Option) error {
|
|
c.RLock()
|
|
defer c.RUnlock()
|
|
|
|
return c.r.Shutdown(ctxt, opts...)
|
|
}
|
|
|
|
// ListTargets returns the target IDs of the managed targets.
|
|
func (c *CDP) ListTargets() []string {
|
|
c.RLock()
|
|
defer c.RUnlock()
|
|
|
|
targets := make([]string, len(c.handlers))
|
|
i := 0
|
|
for k := range c.handlerMap {
|
|
targets[i] = k
|
|
i++
|
|
}
|
|
|
|
return targets
|
|
}
|
|
|
|
// GetHandlerByIndex retrieves the domains manager for the specified index.
|
|
func (c *CDP) GetHandlerByIndex(i int) cdp.FrameHandler {
|
|
c.RLock()
|
|
defer c.RUnlock()
|
|
|
|
if i < 0 || i >= len(c.handlers) {
|
|
return nil
|
|
}
|
|
|
|
return c.handlers[i]
|
|
}
|
|
|
|
// GetHandlerByID retrieves the domains manager for the specified target ID.
|
|
func (c *CDP) GetHandlerByID(id string) cdp.FrameHandler {
|
|
c.RLock()
|
|
defer c.RUnlock()
|
|
|
|
if i, ok := c.handlerMap[id]; ok {
|
|
return c.handlers[i]
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// SetHandler sets the active target to the target with the specified index.
|
|
func (c *CDP) SetHandler(i int) error {
|
|
c.Lock()
|
|
defer c.Unlock()
|
|
|
|
if i < 0 || i >= len(c.handlers) {
|
|
return fmt.Errorf("no handler associated with target index %d", i)
|
|
}
|
|
|
|
c.cur = c.handlers[i]
|
|
|
|
return nil
|
|
}
|
|
|
|
// SetHandlerByID sets the active target to the target with the specified id.
|
|
func (c *CDP) SetHandlerByID(id string) error {
|
|
c.Lock()
|
|
defer c.Unlock()
|
|
|
|
if i, ok := c.handlerMap[id]; ok {
|
|
c.cur = c.handlers[i]
|
|
}
|
|
|
|
return fmt.Errorf("no handler associated with target id %s", id)
|
|
}
|
|
|
|
// newTarget creates a new target using supplied context and options, returning
|
|
// the id of the created target only after the target has been started for
|
|
// monitoring.
|
|
func (c *CDP) newTarget(ctxt context.Context, opts ...client.Option) (string, error) {
|
|
c.RLock()
|
|
cl := c.r.Client(opts...)
|
|
c.RUnlock()
|
|
|
|
// new page target
|
|
t, err := cl.NewPageTarget(ctxt)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
timeout := time.After(DefaultNewTargetTimeout)
|
|
|
|
loop:
|
|
for {
|
|
select {
|
|
default:
|
|
var ok bool
|
|
id := t.GetID()
|
|
c.RLock()
|
|
_, ok = c.handlerMap[id]
|
|
c.RUnlock()
|
|
if ok {
|
|
return id, nil
|
|
}
|
|
|
|
time.Sleep(DefaultCheckDuration)
|
|
|
|
case <-ctxt.Done():
|
|
return "", cdp.ErrContextDone
|
|
|
|
case <-timeout:
|
|
break loop
|
|
}
|
|
}
|
|
|
|
return "", errors.New("timeout waiting for new target to be available")
|
|
}
|
|
|
|
// SetTarget is an action that sets the active Chrome handler to the specified
|
|
// index i.
|
|
func (c *CDP) SetTarget(i int) Action {
|
|
return ActionFunc(func(context.Context, cdp.FrameHandler) error {
|
|
return c.SetHandler(i)
|
|
})
|
|
}
|
|
|
|
// SetTargetByID is an action that sets the active Chrome handler to the handler
|
|
// associated with the specified id.
|
|
func (c *CDP) SetTargetByID(id string) Action {
|
|
return ActionFunc(func(context.Context, cdp.FrameHandler) error {
|
|
return c.SetHandlerByID(id)
|
|
})
|
|
}
|
|
|
|
// NewTarget is an action that creates a new Chrome target, and sets it as the
|
|
// active target.
|
|
func (c *CDP) NewTarget(id *string, opts ...client.Option) Action {
|
|
return ActionFunc(func(ctxt context.Context, h cdp.FrameHandler) error {
|
|
n, err := c.newTarget(ctxt, opts...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if id != nil {
|
|
*id = n
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// NewTargetWithURL creates a new Chrome target, sets it as the active target,
|
|
// and then navigates to the specified url.
|
|
func (c *CDP) NewTargetWithURL(urlstr string, id *string, opts ...client.Option) Action {
|
|
return ActionFunc(func(ctxt context.Context, h cdp.FrameHandler) error {
|
|
n, err := c.newTarget(ctxt, opts...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
l := c.GetHandlerByID(n)
|
|
if l == nil {
|
|
return errors.New("could not retrieve newly created target")
|
|
}
|
|
|
|
/*err = Navigate(l, urlstr).Do(ctxt)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if id != nil {
|
|
*id = n
|
|
}*/
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// CloseByIndex closes the Chrome target with specified index i.
|
|
func (c *CDP) CloseByIndex(i int) Action {
|
|
return ActionFunc(func(ctxt context.Context, h cdp.FrameHandler) error {
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// CloseByID closes the Chrome target with the specified id.
|
|
func (c *CDP) CloseByID(id string) Action {
|
|
return ActionFunc(func(ctxt context.Context, h cdp.FrameHandler) error {
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// Run executes the action against the current target using the supplied
|
|
// context.
|
|
func (c *CDP) Run(ctxt context.Context, a Action) error {
|
|
c.RLock()
|
|
cur := c.cur
|
|
c.RUnlock()
|
|
|
|
return a.Do(ctxt, cur)
|
|
}
|
|
|
|
// Option is a Chrome Debugging Protocol option.
|
|
type Option func(*CDP) error
|
|
|
|
// WithRunner is a option to specify the underlying Chrome runner to monitor
|
|
// for page handlers.
|
|
func WithRunner(r *runner.Runner) Option {
|
|
return func(c *CDP) error {
|
|
c.r = r
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithTargets is an option to specify the incoming targets to monitor for page
|
|
// handlers.
|
|
func WithTargets(watch <-chan client.Target) Option {
|
|
return func(c *CDP) error {
|
|
c.watch = watch
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithRunnerOptions is a option to specify the options to pass to a newly
|
|
// created Chrome process runner.
|
|
func WithRunnerOptions(opts ...runner.CommandLineOption) Option {
|
|
return func(c *CDP) error {
|
|
c.opts = opts
|
|
return nil
|
|
}
|
|
}
|