chromedp/chromedp.go
2017-01-26 14:28:34 +07:00

393 lines
8.4 KiB
Go

// Package chromedp is a high level Chrome Debugging Protocol domain manager
// that simplifies driving web browsers (Chrome, Safari, Edge, Android Web
// Views, and others) for scraping, unit testing, or profiling web pages.
// chromedp requires no third-party dependencies (ie, Selenium), implementing
// the async Chrome Debugging Protocol natively.
package chromedp
import (
"context"
"errors"
"fmt"
"log"
"sync"
"time"
"github.com/knq/chromedp/cdp"
"github.com/knq/chromedp/client"
"github.com/knq/chromedp/runner"
)
const (
// DefaultNewTargetTimeout is the default time to wait for a new target to
// be started.
DefaultNewTargetTimeout = 3 * time.Second
// DefaultCheckDuration is the default time to sleep between a check.
DefaultCheckDuration = 50 * time.Millisecond
)
// CDP contains information for managing a Chrome process runner, low level
// client and associated target page handlers.
type CDP struct {
// r is the chrome runner.
r *runner.Runner
// opts are command line options to pass to a created runner.
opts []runner.CommandLineOption
// watch is the channel for new client targets.
watch <-chan client.Target
// cur is the current active target's handler.
cur cdp.FrameHandler
// handlers is the active handlers.
handlers []cdp.FrameHandler
// handlerMap is the map of target IDs to its active handler.
handlerMap map[string]int
sync.RWMutex
}
// New creates a new Chrome Debugging Protocol client.
func New(ctxt context.Context, opts ...Option) (*CDP, error) {
var err error
c := &CDP{
handlers: make([]cdp.FrameHandler, 0),
handlerMap: make(map[string]int),
}
// apply options
for _, o := range opts {
err = o(c)
if err != nil {
return nil, err
}
}
// setup context
if ctxt == nil {
var cancel func()
ctxt, cancel = context.WithCancel(context.Background())
defer cancel()
}
// check for supplied runner, if none then create one
if c.r == nil && c.watch == nil {
c.r, err = runner.Run(ctxt, c.opts...)
if err != nil {
return nil, err
}
}
// watch handlers
if c.watch == nil {
c.watch = c.r.WatchPageTargets(ctxt)
}
go func() {
for t := range c.watch {
go c.AddTarget(ctxt, t)
}
}()
// TODO: fix this
timeout := time.After(DefaultNewTargetTimeout)
loop:
// wait until at least one target active
for {
select {
default:
c.RLock()
exists := c.cur != nil
c.RUnlock()
if exists {
return c, nil
}
time.Sleep(DefaultCheckDuration)
case <-ctxt.Done():
return nil, cdp.ErrContextDone
case <-timeout:
break loop
}
}
return nil, errors.New("timeout waiting for initial target")
}
// AddTarget adds a target using the supplied context.
func (c *CDP) AddTarget(ctxt context.Context, t client.Target) {
c.Lock()
defer c.Unlock()
// create target manager
h, err := NewTargetHandler(t)
if err != nil {
log.Printf("error: could not create handler for %s, got: %v", t, err)
return
}
// run
err = h.Run(ctxt)
if err != nil {
log.Printf("error: could not start handler for %s, got: %v", t, err)
return
}
// add to active handlers
c.handlers = append(c.handlers, h)
c.handlerMap[t.GetID()] = len(c.handlers) - 1
if c.cur == nil {
c.cur = h
}
}
// Wait waits for the Chrome runner to terminate.
func (c *CDP) Wait() error {
c.RLock()
r := c.r
c.RUnlock()
if r != nil {
return r.Wait()
}
return nil
}
// Shutdown closes all Chrome page handlers.
func (c *CDP) Shutdown(ctxt context.Context, opts ...client.Option) error {
c.RLock()
defer c.RUnlock()
return c.r.Shutdown(ctxt, opts...)
}
// ListTargets returns the target IDs of the managed targets.
func (c *CDP) ListTargets() []string {
c.RLock()
defer c.RUnlock()
targets := make([]string, len(c.handlers))
i := 0
for k := range c.handlerMap {
targets[i] = k
i++
}
return targets
}
// GetHandlerByIndex retrieves the domains manager for the specified index.
func (c *CDP) GetHandlerByIndex(i int) cdp.FrameHandler {
c.RLock()
defer c.RUnlock()
if i < 0 || i >= len(c.handlers) {
return nil
}
return c.handlers[i]
}
// GetHandlerByID retrieves the domains manager for the specified target ID.
func (c *CDP) GetHandlerByID(id string) cdp.FrameHandler {
c.RLock()
defer c.RUnlock()
if i, ok := c.handlerMap[id]; ok {
return c.handlers[i]
}
return nil
}
// SetHandler sets the active target to the target with the specified index.
func (c *CDP) SetHandler(i int) error {
c.Lock()
defer c.Unlock()
if i < 0 || i >= len(c.handlers) {
return fmt.Errorf("no handler associated with target index %d", i)
}
c.cur = c.handlers[i]
return nil
}
// SetHandlerByID sets the active target to the target with the specified id.
func (c *CDP) SetHandlerByID(id string) error {
c.Lock()
defer c.Unlock()
if i, ok := c.handlerMap[id]; ok {
c.cur = c.handlers[i]
}
return fmt.Errorf("no handler associated with target id %s", id)
}
// newTarget creates a new target using supplied context and options, returning
// the id of the created target only after the target has been started for
// monitoring.
func (c *CDP) newTarget(ctxt context.Context, opts ...client.Option) (string, error) {
c.RLock()
cl := c.r.Client(opts...)
c.RUnlock()
// new page target
t, err := cl.NewPageTarget(ctxt)
if err != nil {
return "", err
}
timeout := time.After(DefaultNewTargetTimeout)
loop:
for {
select {
default:
var ok bool
id := t.GetID()
c.RLock()
_, ok = c.handlerMap[id]
c.RUnlock()
if ok {
return id, nil
}
time.Sleep(DefaultCheckDuration)
case <-ctxt.Done():
return "", cdp.ErrContextDone
case <-timeout:
break loop
}
}
return "", errors.New("timeout waiting for new target to be available")
}
// SetTarget is an action that sets the active Chrome handler to the specified
// index i.
func (c *CDP) SetTarget(i int) Action {
return ActionFunc(func(context.Context, cdp.FrameHandler) error {
return c.SetHandler(i)
})
}
// SetTargetByID is an action that sets the active Chrome handler to the handler
// associated with the specified id.
func (c *CDP) SetTargetByID(id string) Action {
return ActionFunc(func(context.Context, cdp.FrameHandler) error {
return c.SetHandlerByID(id)
})
}
// NewTarget is an action that creates a new Chrome target, and sets it as the
// active target.
func (c *CDP) NewTarget(id *string, opts ...client.Option) Action {
return ActionFunc(func(ctxt context.Context, h cdp.FrameHandler) error {
n, err := c.newTarget(ctxt, opts...)
if err != nil {
return err
}
if id != nil {
*id = n
}
return nil
})
}
// NewTargetWithURL creates a new Chrome target, sets it as the active target,
// and then navigates to the specified url.
func (c *CDP) NewTargetWithURL(urlstr string, id *string, opts ...client.Option) Action {
return ActionFunc(func(ctxt context.Context, h cdp.FrameHandler) error {
n, err := c.newTarget(ctxt, opts...)
if err != nil {
return err
}
l := c.GetHandlerByID(n)
if l == nil {
return errors.New("could not retrieve newly created target")
}
/*err = Navigate(l, urlstr).Do(ctxt)
if err != nil {
return err
}
if id != nil {
*id = n
}*/
return nil
})
}
// CloseByIndex closes the Chrome target with specified index i.
func (c *CDP) CloseByIndex(i int) Action {
return ActionFunc(func(ctxt context.Context, h cdp.FrameHandler) error {
return nil
})
}
// CloseByID closes the Chrome target with the specified id.
func (c *CDP) CloseByID(id string) Action {
return ActionFunc(func(ctxt context.Context, h cdp.FrameHandler) error {
return nil
})
}
// Run executes the action against the current target using the supplied
// context.
func (c *CDP) Run(ctxt context.Context, a Action) error {
c.RLock()
cur := c.cur
c.RUnlock()
return a.Do(ctxt, cur)
}
// Option is a Chrome Debugging Protocol option.
type Option func(*CDP) error
// WithRunner is a option to specify the underlying Chrome runner to monitor
// for page handlers.
func WithRunner(r *runner.Runner) Option {
return func(c *CDP) error {
c.r = r
return nil
}
}
// WithTargets is an option to specify the incoming targets to monitor for page
// handlers.
func WithTargets(watch <-chan client.Target) Option {
return func(c *CDP) error {
c.watch = watch
return nil
}
}
// WithRunnerOptions is a option to specify the options to pass to a newly
// created Chrome process runner.
func WithRunnerOptions(opts ...runner.CommandLineOption) Option {
return func(c *CDP) error {
c.opts = opts
return nil
}
}