chromedp/chromedp.go
Daniel Martí 46982a1cac rework CancelError into Cancel
That way, cancelling a context while checking the error is much simpler.
The context data already holds onto the cancel func, so this requires no
internal changes.

This renders the Browser.Shutdown API obsolete, even though it doesn't
do exactly the same. If we want Cancel to do a proper shutdown action
before cancelling a browser context and killing the process, we could do
that change within the cancel logic.
2019-04-09 13:06:34 +02:00

277 lines
7.8 KiB
Go

// Package chromedp is a high level Chrome DevTools Protocol client that
// simplifies driving browsers for scraping, unit testing, or profiling web
// pages using the CDP.
//
// chromedp requires no third-party dependencies, implementing the async Chrome
// DevTools Protocol entirely in Go.
package chromedp
import (
"context"
"fmt"
"sync"
"time"
"github.com/chromedp/cdproto/css"
"github.com/chromedp/cdproto/dom"
"github.com/chromedp/cdproto/inspector"
"github.com/chromedp/cdproto/log"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/cdproto/runtime"
"github.com/chromedp/cdproto/target"
)
// Context is attached to any context.Context which is valid for use with Run.
type Context struct {
// Allocator is used to create new browsers. It is inherited from the
// parent context when using NewContext.
Allocator Allocator
// Browser is the browser being used in the context. It is inherited
// from the parent context when using NewContext.
Browser *Browser
// Target is the target to run actions (commands) against. It is not
// inherited from the parent context, and typically each context will
// have its own unique Target pointing to a separate browser tab (page).
Target *Target
// browserOpts holds the browser options passed to NewContext via
// WithBrowserOption, so that they can later be used when allocating a
// browser in Run.
browserOpts []BrowserOption
// cancel simply cancels the context that was used to start Browser.
// This is useful to stop all activity and avoid deadlocks if we detect
// that the browser was closed or happened to crash. Note that this
// cancel function doesn't do any waiting.
cancel func()
// first records whether this context was the one that allocated
// Browser. This is important, because its cancellation will stop the
// entire browser handler, meaning that no further actions can be
// executed.
first bool
// wg allows waiting for a target to be closed on cancellation.
wg sync.WaitGroup
// cancelErr is the first error encountered when cancelling this
// context, for example if a browser's temporary user data directory
// couldn't be deleted.
cancelErr error
}
// NewContext creates a browser context using the parent context.
func NewContext(parent context.Context, opts ...ContextOption) (context.Context, context.CancelFunc) {
ctx, cancel := context.WithCancel(parent)
c := &Context{cancel: cancel, first: true}
if pc := FromContext(parent); pc != nil {
c.Allocator = pc.Allocator
c.Browser = pc.Browser
// don't inherit SessionID, so that NewContext can be used to
// create a new tab on the same browser.
c.first = c.Browser == nil
}
for _, o := range opts {
o(c)
}
if c.Allocator == nil {
c.Allocator = setupExecAllocator(
NoFirstRun,
NoDefaultBrowserCheck,
Headless,
)
}
ctx = context.WithValue(ctx, contextKey{}, c)
go func() {
<-ctx.Done()
if c.first {
// This is the original browser tab, so the entire
// browser will already be cleaned up elsewhere.
c.wg.Done()
return
}
// Not the original browser tab; simply detach and close it.
// We need a new context, as ctx is cancelled; use a 1s timeout.
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
if id := c.Target.SessionID; id != "" {
action := target.DetachFromTarget().WithSessionID(id)
if err := action.Do(ctx, c.Browser); c.cancelErr == nil {
c.cancelErr = err
}
}
if id := c.Target.TargetID; id != "" {
action := target.CloseTarget(id)
if ok, err := action.Do(ctx, c.Browser); c.cancelErr == nil {
if !ok && err == nil {
err = fmt.Errorf("could not close target %q", id)
}
c.cancelErr = err
}
}
c.wg.Done()
}()
cancelWait := func() {
cancel()
c.wg.Wait()
}
return ctx, cancelWait
}
type contextKey struct{}
// FromContext extracts the Context data stored inside a context.Context.
func FromContext(ctx context.Context) *Context {
c, _ := ctx.Value(contextKey{}).(*Context)
return c
}
// Cancel cancels a chromedp context, waits for its resources to be cleaned up,
// and returns any error encountered during that process.
//
// Usually a "defer cancel()" will be enough for most use cases. This API is
// useful if you want to catch underlying cancel errors, such as when a
// temporary directory cannot be deleted.
func Cancel(ctx context.Context) error {
c := FromContext(ctx)
if c == nil {
return ErrInvalidContext
}
c.cancel()
c.wg.Wait()
return c.cancelErr
}
// Run runs an action against the provided context. The provided context must
// contain a valid Allocator; typically, that will be created via NewContext, or
// via one of the allocator constructors like NewExecAllocator.
func Run(ctx context.Context, actions ...Action) error {
c := FromContext(ctx)
if c == nil || c.Allocator == nil {
return ErrInvalidContext
}
if c.Browser == nil {
browser, err := c.Allocator.Allocate(ctx, c.browserOpts...)
if err != nil {
return err
}
c.Browser = browser
}
if c.Target == nil {
if err := c.newSession(ctx); err != nil {
return err
}
}
return Tasks(actions).Do(ctx, c.Target)
}
func (c *Context) newSession(ctx context.Context) error {
var targetID target.ID
if c.first {
// If we just allocated this browser, and it has a single page
// that's blank and not attached, use it.
infos, err := target.GetTargets().Do(ctx, c.Browser)
if err != nil {
return err
}
pages := 0
for _, info := range infos {
if info.Type == "page" && info.URL == "about:blank" && !info.Attached {
targetID = info.TargetID
pages++
}
}
if pages > 1 {
// Multiple blank pages; just in case, don't use any.
targetID = ""
}
}
if targetID == "" {
var err error
targetID, err = target.CreateTarget("about:blank").Do(ctx, c.Browser)
if err != nil {
return err
}
}
sessionID, err := target.AttachToTarget(targetID).Do(ctx, c.Browser)
if err != nil {
return err
}
c.wg.Add(1)
c.Target = c.Browser.newExecutorForTarget(ctx, targetID, sessionID)
// enable domains
for _, enable := range []Action{
log.Enable(),
runtime.Enable(),
// network.Enable(),
inspector.Enable(),
page.Enable(),
dom.Enable(),
css.Enable(),
} {
if err := enable.Do(ctx, c.Target); err != nil {
return fmt.Errorf("unable to execute %T: %v", enable, err)
}
}
return nil
}
// ContextOption is a context option.
type ContextOption func(*Context)
// WithLogf is a shortcut for WithBrowserOption(WithBrowserLogf(f)).
func WithLogf(f func(string, ...interface{})) ContextOption {
return WithBrowserOption(WithBrowserLogf(f))
}
// WithErrorf is a shortcut for WithBrowserOption(WithBrowserErrorf(f)).
func WithErrorf(f func(string, ...interface{})) ContextOption {
return WithBrowserOption(WithBrowserErrorf(f))
}
// WithDebugf is a shortcut for WithBrowserOption(WithBrowserDebugf(f)).
func WithDebugf(f func(string, ...interface{})) ContextOption {
return WithBrowserOption(WithBrowserDebugf(f))
}
// WithBrowserOption allows passing a number of browser options to the allocator
// when allocating a new browser. As such, this context option can only be used
// when NewContext is allocating a new browser.
func WithBrowserOption(opts ...BrowserOption) ContextOption {
return func(c *Context) {
if !c.first {
panic("WithBrowserOption can only be used when allocating a new browser")
}
c.browserOpts = append(c.browserOpts, opts...)
}
}
// Targets lists all the targets in the browser attached to the given context.
func Targets(ctx context.Context) ([]*target.Info, error) {
// Don't rely on Run, as that needs to be able to call Targets, and we
// don't want cyclic func calls.
c := FromContext(ctx)
if c == nil || c.Allocator == nil {
return nil, ErrInvalidContext
}
if c.Browser == nil {
browser, err := c.Allocator.Allocate(ctx, c.browserOpts...)
if err != nil {
return nil, err
}
c.Browser = browser
}
return target.GetTargets().Do(ctx, c.Browser)
}