chromedp/chromedp.go

// Package chromedp is a high level Chrome DevTools Protocol client that
// simplifies driving browsers for scraping, unit testing, or profiling web
// pages using the CDP.
//
// chromedp requires no third-party dependencies, implementing the async Chrome
// DevTools Protocol entirely in Go.
package chromedp

import (
	"context"
	"errors"
	"fmt"
	"log"
	"sync"
	"time"

	"github.com/chromedp/cdproto/cdp"

	"github.com/chromedp/chromedp/client"
	"github.com/chromedp/chromedp/runner"
)

const (
	// DefaultNewTargetTimeout is the default time to wait for a new target to
	// be started.
	DefaultNewTargetTimeout = 3 * time.Second

	// DefaultCheckDuration is the default time to sleep between a check.
	DefaultCheckDuration = 50 * time.Millisecond

	// DefaultPoolStartPort is the default start port number.
	DefaultPoolStartPort = 9000

	// DefaultPoolEndPort is the default end port number.
	DefaultPoolEndPort = 10000
)

// CDP is the high-level Chrome DevTools Protocol browser manager, handling the
// browser process runner, WebSocket clients, associated targets, and network,
// page, and DOM events.
type CDP struct {
	// r is the chrome runner.
	r *runner.Runner

	// opts are command line options to pass to a created runner.
	opts []runner.CommandLineOption

	// watch is the channel for new client targets.
	watch <-chan client.Target

	// cur is the current active target's handler.
	cur cdp.Executor

	// handlers is the active handlers.
	handlers []*TargetHandler

	// handlerMap is the map of target IDs to its active handler.
	handlerMap map[string]int

	// logging funcs
	logf, debugf, errf func(string, ...interface{})

	sync.RWMutex
}

// New creates and starts a new CDP instance.
func New(ctxt context.Context, opts ...Option) (*CDP, error) {
	c := &CDP{
		handlers:   make([]*TargetHandler, 0),
		handlerMap: make(map[string]int),
		logf:       log.Printf,
		debugf:     func(string, ...interface{}) {},
		errf:       func(s string, v ...interface{}) { log.Printf("error: "+s, v...) },
	}

	// apply options
	for _, o := range opts {
		if err := o(c); err != nil {
			return nil, err
		}
	}

	// check for supplied runner, if none then create one
	if c.r == nil && c.watch == nil {
		var err error
		c.r, err = runner.Run(ctxt, c.opts...)
		if err != nil {
			return nil, err
		}
	}

	// watch handlers
	if c.watch == nil {
		c.watch = c.r.Client().WatchPageTargets(ctxt)
	}

	go func() {
		for t := range c.watch {
			if t == nil {
				return
			}
			go c.AddTarget(ctxt, t)
		}
	}()

	// TODO: fix this
	timeout := time.After(defaultNewTargetTimeout)

	// wait until at least one target active
	for {
		select {
		default:
			c.RLock()
			exists := c.cur != nil
			c.RUnlock()
			if exists {
				return c, nil
			}

			// TODO: fix this
			time.Sleep(DefaultCheckDuration)

		case <-ctxt.Done():
			return nil, ctxt.Err()

		case <-timeout:
			return nil, errors.New("timeout waiting for initial target")
		}
	}
}

// AddTarget adds a target using the supplied context.
func (c *CDP) AddTarget(ctxt context.Context, t client.Target) {
	c.Lock()
	defer c.Unlock()

	// create target manager
	h, err := NewTargetHandler(t, c.logf, c.debugf, c.errf)
	if err != nil {
		c.errf("could not create handler for %s: %v", t, err)
		return
	}

	// run
	if err := h.Run(ctxt); err != nil {
		c.errf("could not start handler for %s: %v", t, err)
		return
	}

	// add to active handlers
	c.handlers = append(c.handlers, h)
	c.handlerMap[t.GetID()] = len(c.handlers) - 1
	if c.cur == nil {
		c.cur = h
	}
}

// Wait waits for the Chrome runner to terminate.
func (c *CDP) Wait() error {
	c.RLock()
	r := c.r
	c.RUnlock()

	if r != nil {
		return r.Wait()
	}

	return nil
}

// Shutdown closes all Chrome page handlers.
func (c *CDP) Shutdown(ctxt context.Context, opts ...client.Option) error {
	c.RLock()
	defer c.RUnlock()

	if c.r != nil {
		return c.r.Shutdown(ctxt, opts...)
	}

	return nil
}

// ListTargets returns the target IDs of the managed targets.
func (c *CDP) ListTargets() []string {
	c.RLock()
	defer c.RUnlock()

	i, targets := 0, make([]string, len(c.handlers))
	for k := range c.handlerMap {
		targets[i] = k
		i++
	}

	return targets
}

// GetHandlerByIndex retrieves the domains manager for the specified index.
func (c *CDP) GetHandlerByIndex(i int) cdp.Executor {
	c.RLock()
	defer c.RUnlock()

	if i < 0 || i >= len(c.handlers) {
		return nil
	}

	return c.handlers[i]
}

// GetHandlerByID retrieves the domains manager for the specified target ID.
func (c *CDP) GetHandlerByID(id string) cdp.Executor {
	c.RLock()
	defer c.RUnlock()

	if i, ok := c.handlerMap[id]; ok {
		return c.handlers[i]
	}

	return nil
}

// SetHandler sets the active handler to the target with the specified index.
func (c *CDP) SetHandler(i int) error {
	c.Lock()
	defer c.Unlock()

	if i < 0 || i >= len(c.handlers) {
		return fmt.Errorf("no handler associated with target index %d", i)
	}

	c.cur = c.handlers[i]

	return nil
}

// SetHandlerByID sets the active target to the target with the specified id.
func (c *CDP) SetHandlerByID(id string) error {
	c.Lock()
	defer c.Unlock()

	if i, ok := c.handlerMap[id]; ok {
		c.cur = c.handlers[i]
		return nil
	}

	return fmt.Errorf("no handler associated with target id %s", id)
}

// newTarget creates a new target using supplied context and options, returning
// the id of the created target only after the target has been started for
// monitoring.
func (c *CDP) newTarget(ctxt context.Context, opts ...client.Option) (string, error) {
	c.RLock()
	cl := c.r.Client(opts...)
	c.RUnlock()

	// new page target
	t, err := cl.NewPageTarget(ctxt)
	if err != nil {
		return "", err
	}

	timeout := time.After(DefaultNewTargetTimeout)

	for {
		select {
		default:
			var ok bool
			id := t.GetID()
			c.RLock()
			_, ok = c.handlerMap[id]
			c.RUnlock()
			if ok {
				return id, nil
			}

			time.Sleep(DefaultCheckDuration)

		case <-ctxt.Done():
			return "", ctxt.Err()

		case <-timeout:
			return "", errors.New("timeout waiting for new target to be available")
		}
	}
}

// SetTarget is an action that sets the active Chrome handler to the specified
// index i.
func (c *CDP) SetTarget(i int) Action {
	return ActionFunc(func(context.Context, cdp.Executor) error {
		return c.SetHandler(i)
	})
}

// SetTargetByID is an action that sets the active Chrome handler to the handler
// associated with the specified id.
func (c *CDP) SetTargetByID(id string) Action {
	return ActionFunc(func(context.Context, cdp.Executor) error {
		return c.SetHandlerByID(id)
	})
}

// NewTarget is an action that creates a new Chrome target, and sets it as the
// active target.
func (c *CDP) NewTarget(id *string, opts ...client.Option) Action {
	return ActionFunc(func(ctxt context.Context, h cdp.Executor) error {
		n, err := c.newTarget(ctxt, opts...)
		if err != nil {
			return err
		}

		if id != nil {
			*id = n
		}

		return nil
	})
}

// CloseByIndex closes the Chrome target with specified index i.
func (c *CDP) CloseByIndex(i int) Action {
	return ActionFunc(func(ctxt context.Context, h cdp.Executor) error {
		return nil
	})
}

// CloseByID closes the Chrome target with the specified id.
func (c *CDP) CloseByID(id string) Action {
	return ActionFunc(func(ctxt context.Context, h cdp.Executor) error {
		return nil
	})
}

// Run executes the action against the current target using the supplied
// context.
func (c *CDP) Run(ctxt context.Context, a Action) error {
	c.RLock()
	cur := c.cur
	c.RUnlock()

	return a.Do(ctxt, cur)
}

// Option is a Chrome DevTools Protocol option.
type Option func(*CDP) error

// WithRunner is a CDP option to specify the underlying Chrome runner to
// monitor for page handlers.
func WithRunner(r *runner.Runner) Option {
	return func(c *CDP) error {
		c.r = r
		return nil
	}
}

// WithTargets is a CDP option to specify the incoming targets to monitor for
// page handlers.
func WithTargets(watch <-chan client.Target) Option {
	return func(c *CDP) error {
		c.watch = watch
		return nil
	}
}

// WithClient is a CDP option to use the incoming targets from a client.
func WithClient(ctxt context.Context, cl *client.Client) Option {
	return func(c *CDP) error {
		return WithTargets(cl.WatchPageTargets(ctxt))(c)
	}
}

// WithURL is a CDP option to use a client with the specified URL.
func WithURL(ctxt context.Context, urlstr string) Option {
	return func(c *CDP) error {
		return WithClient(ctxt, client.New(client.URL(urlstr)))(c)
	}
}

// WithRunnerOptions is a CDP option to specify the options to pass to a newly
// created Chrome process runner.
func WithRunnerOptions(opts ...runner.CommandLineOption) Option {
	return func(c *CDP) error {
		c.opts = opts
		return nil
	}
}

// WithLogf is a CDP option to specify a func to receive general logging.
func WithLogf(f func(string, ...interface{})) Option {
	return func(c *CDP) error {
		c.logf = f
		return nil
	}
}

// WithDebugf is a CDP option to specify a func to receive debug logging (ie,
// protocol information).
func WithDebugf(f func(string, ...interface{})) Option {
	return func(c *CDP) error {
		c.debugf = f
		return nil
	}
}

// WithErrorf is a CDP option to specify a func to receive error logging.
func WithErrorf(f func(string, ...interface{})) Option {
	return func(c *CDP) error {
		c.errf = f
		return nil
	}
}

// WithLog is a CDP option that sets the logging, debugging, and error funcs to
// f.
func WithLog(f func(string, ...interface{})) Option {
	return func(c *CDP) error {
		c.logf, c.debugf, c.errf = f, f, f
		return nil
	}
}

// WithConsolef is a CDP option to specify a func to receive chrome log events.
//
// Note: NOT YET IMPLEMENTED.
func WithConsolef(f func(string, ...interface{})) Option {
	return func(c *CDP) error {
		return nil
	}
}

var (
	// defaultNewTargetTimeout is the default target timeout -- used by
	// testing.
	defaultNewTargetTimeout = DefaultNewTargetTimeout
)