chromedp/chromedp.go

277 lines
7.8 KiB
Go
Raw Normal View History

// Package chromedp is a high level Chrome DevTools Protocol client that
// simplifies driving browsers for scraping, unit testing, or profiling web
// pages using the CDP.
//
// chromedp requires no third-party dependencies, implementing the async Chrome
// DevTools Protocol entirely in Go.
package chromedp
import (
"context"
"fmt"
"sync"
"time"
"github.com/chromedp/cdproto/css"
"github.com/chromedp/cdproto/dom"
"github.com/chromedp/cdproto/inspector"
"github.com/chromedp/cdproto/log"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/cdproto/runtime"
"github.com/chromedp/cdproto/target"
)
// Context is attached to any context.Context which is valid for use with Run.
type Context struct {
// Allocator is used to create new browsers. It is inherited from the
// parent context when using NewContext.
Allocator Allocator
// Browser is the browser being used in the context. It is inherited
// from the parent context when using NewContext.
Browser *Browser
// Target is the target to run actions (commands) against. It is not
// inherited from the parent context, and typically each context will
// have its own unique Target pointing to a separate browser tab (page).
Target *Target
// browserOpts holds the browser options passed to NewContext via
// WithBrowserOption, so that they can later be used when allocating a
// browser in Run.
browserOpts []BrowserOption
// cancel simply cancels the context that was used to start Browser.
// This is useful to stop all activity and avoid deadlocks if we detect
// that the browser was closed or happened to crash. Note that this
// cancel function doesn't do any waiting.
cancel func()
// first records whether this context was the one that allocated
// Browser. This is important, because its cancellation will stop the
// entire browser handler, meaning that no further actions can be
// executed.
first bool
// wg allows waiting for a target to be closed on cancellation.
wg sync.WaitGroup
// cancelErr is the first error encountered when cancelling this
// context, for example if a browser's temporary user data directory
// couldn't be deleted.
cancelErr error
}
// NewContext creates a browser context using the parent context.
func NewContext(parent context.Context, opts ...ContextOption) (context.Context, context.CancelFunc) {
ctx, cancel := context.WithCancel(parent)
c := &Context{cancel: cancel, first: true}
if pc := FromContext(parent); pc != nil {
c.Allocator = pc.Allocator
c.Browser = pc.Browser
// don't inherit SessionID, so that NewContext can be used to
// create a new tab on the same browser.
c.first = c.Browser == nil
}
for _, o := range opts {
o(c)
}
if c.Allocator == nil {
c.Allocator = setupExecAllocator(
NoFirstRun,
NoDefaultBrowserCheck,
Headless,
)
}
ctx = context.WithValue(ctx, contextKey{}, c)
go func() {
<-ctx.Done()
if c.first {
// This is the original browser tab, so the entire
// browser will already be cleaned up elsewhere.
c.wg.Done()
return
}
// Not the original browser tab; simply detach and close it.
// We need a new context, as ctx is cancelled; use a 1s timeout.
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
if id := c.Target.SessionID; id != "" {
action := target.DetachFromTarget().WithSessionID(id)
if err := action.Do(ctx, c.Browser); c.cancelErr == nil {
c.cancelErr = err
}
}
if id := c.Target.TargetID; id != "" {
action := target.CloseTarget(id)
if ok, err := action.Do(ctx, c.Browser); c.cancelErr == nil {
if !ok && err == nil {
err = fmt.Errorf("could not close target %q", id)
}
c.cancelErr = err
}
}
c.wg.Done()
}()
cancelWait := func() {
cancel()
c.wg.Wait()
}
return ctx, cancelWait
}
type contextKey struct{}
// FromContext extracts the Context data stored inside a context.Context.
func FromContext(ctx context.Context) *Context {
c, _ := ctx.Value(contextKey{}).(*Context)
return c
}
// Cancel cancels a chromedp context, waits for its resources to be cleaned up,
// and returns any error encountered during that process.
//
// Usually a "defer cancel()" will be enough for most use cases. This API is
// useful if you want to catch underlying cancel errors, such as when a
// temporary directory cannot be deleted.
func Cancel(ctx context.Context) error {
c := FromContext(ctx)
if c == nil {
return ErrInvalidContext
}
c.cancel()
c.wg.Wait()
return c.cancelErr
}
// Run runs an action against the provided context. The provided context must
// contain a valid Allocator; typically, that will be created via NewContext, or
// via one of the allocator constructors like NewExecAllocator.
func Run(ctx context.Context, actions ...Action) error {
c := FromContext(ctx)
if c == nil || c.Allocator == nil {
return ErrInvalidContext
}
if c.Browser == nil {
browser, err := c.Allocator.Allocate(ctx, c.browserOpts...)
if err != nil {
return err
}
c.Browser = browser
}
if c.Target == nil {
if err := c.newSession(ctx); err != nil {
return err
}
}
return Tasks(actions).Do(ctx, c.Target)
}
func (c *Context) newSession(ctx context.Context) error {
var targetID target.ID
if c.first {
// If we just allocated this browser, and it has a single page
// that's blank and not attached, use it.
infos, err := target.GetTargets().Do(ctx, c.Browser)
if err != nil {
return err
}
pages := 0
for _, info := range infos {
if info.Type == "page" && info.URL == "about:blank" && !info.Attached {
targetID = info.TargetID
pages++
}
}
if pages > 1 {
// Multiple blank pages; just in case, don't use any.
targetID = ""
}
}
if targetID == "" {
var err error
targetID, err = target.CreateTarget("about:blank").Do(ctx, c.Browser)
if err != nil {
return err
}
}
sessionID, err := target.AttachToTarget(targetID).Do(ctx, c.Browser)
if err != nil {
return err
}
c.wg.Add(1)
c.Target = c.Browser.newExecutorForTarget(ctx, targetID, sessionID)
// enable domains
for _, enable := range []Action{
log.Enable(),
runtime.Enable(),
// network.Enable(),
inspector.Enable(),
page.Enable(),
dom.Enable(),
css.Enable(),
} {
if err := enable.Do(ctx, c.Target); err != nil {
return fmt.Errorf("unable to execute %T: %v", enable, err)
}
}
return nil
}
// ContextOption is a context option.
type ContextOption func(*Context)
// WithLogf is a shortcut for WithBrowserOption(WithBrowserLogf(f)).
func WithLogf(f func(string, ...interface{})) ContextOption {
return WithBrowserOption(WithBrowserLogf(f))
}
// WithErrorf is a shortcut for WithBrowserOption(WithBrowserErrorf(f)).
func WithErrorf(f func(string, ...interface{})) ContextOption {
return WithBrowserOption(WithBrowserErrorf(f))
}
// WithDebugf is a shortcut for WithBrowserOption(WithBrowserDebugf(f)).
func WithDebugf(f func(string, ...interface{})) ContextOption {
return WithBrowserOption(WithBrowserDebugf(f))
}
// WithBrowserOption allows passing a number of browser options to the allocator
// when allocating a new browser. As such, this context option can only be used
// when NewContext is allocating a new browser.
func WithBrowserOption(opts ...BrowserOption) ContextOption {
return func(c *Context) {
if !c.first {
panic("WithBrowserOption can only be used when allocating a new browser")
}
c.browserOpts = append(c.browserOpts, opts...)
}
}
// Targets lists all the targets in the browser attached to the given context.
func Targets(ctx context.Context) ([]*target.Info, error) {
// Don't rely on Run, as that needs to be able to call Targets, and we
// don't want cyclic func calls.
c := FromContext(ctx)
if c == nil || c.Allocator == nil {
return nil, ErrInvalidContext
}
if c.Browser == nil {
browser, err := c.Allocator.Allocate(ctx, c.browserOpts...)
if err != nil {
return nil, err
}
c.Browser = browser
}
return target.GetTargets().Do(ctx, c.Browser)
}