b481eeac51
The API isn't very shiny, but it works. It doesn't matter that much, as most users won't care about these errors. Fixes #295.
265 lines
7.3 KiB
Go
265 lines
7.3 KiB
Go
// Package chromedp is a high level Chrome DevTools Protocol client that
|
|
// simplifies driving browsers for scraping, unit testing, or profiling web
|
|
// pages using the CDP.
|
|
//
|
|
// chromedp requires no third-party dependencies, implementing the async Chrome
|
|
// DevTools Protocol entirely in Go.
|
|
package chromedp
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/chromedp/cdproto/css"
|
|
"github.com/chromedp/cdproto/dom"
|
|
"github.com/chromedp/cdproto/inspector"
|
|
"github.com/chromedp/cdproto/log"
|
|
"github.com/chromedp/cdproto/page"
|
|
"github.com/chromedp/cdproto/runtime"
|
|
"github.com/chromedp/cdproto/target"
|
|
)
|
|
|
|
// Context is attached to any context.Context which is valid for use with Run.
|
|
type Context struct {
|
|
// Allocator is used to create new browsers. It is inherited from the
|
|
// parent context when using NewContext.
|
|
Allocator Allocator
|
|
|
|
// Browser is the browser being used in the context. It is inherited
|
|
// from the parent context when using NewContext.
|
|
Browser *Browser
|
|
|
|
// Target is the target to run actions (commands) against. It is not
|
|
// inherited from the parent context, and typically each context will
|
|
// have its own unique Target pointing to a separate browser tab (page).
|
|
Target *Target
|
|
|
|
// browserOpts holds the browser options passed to NewContext via
|
|
// WithBrowserOption, so that they can later be used when allocating a
|
|
// browser in Run.
|
|
browserOpts []BrowserOption
|
|
|
|
// cancel simply cancels the context that was used to start Browser.
|
|
// This is useful to stop all activity and avoid deadlocks if we detect
|
|
// that the browser was closed or happened to crash. Note that this
|
|
// cancel function doesn't do any waiting.
|
|
cancel func()
|
|
|
|
// first records whether this context was the one that allocated
|
|
// Browser. This is important, because its cancellation will stop the
|
|
// entire browser handler, meaning that no further actions can be
|
|
// executed.
|
|
first bool
|
|
|
|
// wg allows waiting for a target to be closed on cancellation.
|
|
wg sync.WaitGroup
|
|
|
|
// cancelErr is the first error encountered when cancelling this
|
|
// context, for example if a browser's temporary user data directory
|
|
// couldn't be deleted.
|
|
cancelErr error
|
|
}
|
|
|
|
// NewContext creates a browser context using the parent context.
|
|
func NewContext(parent context.Context, opts ...ContextOption) (context.Context, context.CancelFunc) {
|
|
ctx, cancel := context.WithCancel(parent)
|
|
|
|
c := &Context{cancel: cancel, first: true}
|
|
if pc := FromContext(parent); pc != nil {
|
|
c.Allocator = pc.Allocator
|
|
c.Browser = pc.Browser
|
|
// don't inherit SessionID, so that NewContext can be used to
|
|
// create a new tab on the same browser.
|
|
|
|
c.first = c.Browser == nil
|
|
}
|
|
|
|
for _, o := range opts {
|
|
o(c)
|
|
}
|
|
if c.Allocator == nil {
|
|
c.Allocator = setupExecAllocator(
|
|
NoFirstRun,
|
|
NoDefaultBrowserCheck,
|
|
Headless,
|
|
)
|
|
}
|
|
|
|
ctx = context.WithValue(ctx, contextKey{}, c)
|
|
go func() {
|
|
<-ctx.Done()
|
|
if c.first {
|
|
// This is the original browser tab, so the entire
|
|
// browser will already be cleaned up elsewhere.
|
|
c.wg.Done()
|
|
return
|
|
}
|
|
|
|
// Not the original browser tab; simply detach and close it.
|
|
// We need a new context, as ctx is cancelled; use a 1s timeout.
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
|
defer cancel()
|
|
if id := c.Target.SessionID; id != "" {
|
|
action := target.DetachFromTarget().WithSessionID(id)
|
|
if err := action.Do(ctx, c.Browser); c.cancelErr == nil {
|
|
c.cancelErr = err
|
|
}
|
|
}
|
|
if id := c.Target.TargetID; id != "" {
|
|
action := target.CloseTarget(id)
|
|
if ok, err := action.Do(ctx, c.Browser); c.cancelErr == nil {
|
|
if !ok && err == nil {
|
|
err = fmt.Errorf("could not close target %q", id)
|
|
}
|
|
c.cancelErr = err
|
|
}
|
|
}
|
|
c.wg.Done()
|
|
}()
|
|
cancelWait := func() {
|
|
cancel()
|
|
c.wg.Wait()
|
|
}
|
|
return ctx, cancelWait
|
|
}
|
|
|
|
type contextKey struct{}
|
|
|
|
// FromContext extracts the Context data stored inside a context.Context.
|
|
func FromContext(ctx context.Context) *Context {
|
|
c, _ := ctx.Value(contextKey{}).(*Context)
|
|
return c
|
|
}
|
|
|
|
func CancelError(ctx context.Context) error {
|
|
c := FromContext(ctx)
|
|
if c == nil {
|
|
return ErrInvalidContext
|
|
}
|
|
return c.cancelErr
|
|
}
|
|
|
|
// Run runs an action against the provided context. The provided context must
|
|
// contain a valid Allocator; typically, that will be created via NewContext, or
|
|
// via one of the allocator constructors like NewExecAllocator.
|
|
func Run(ctx context.Context, actions ...Action) error {
|
|
c := FromContext(ctx)
|
|
if c == nil || c.Allocator == nil {
|
|
return ErrInvalidContext
|
|
}
|
|
if c.Browser == nil {
|
|
browser, err := c.Allocator.Allocate(ctx, c.browserOpts...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.Browser = browser
|
|
}
|
|
if c.Target == nil {
|
|
if err := c.newSession(ctx); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return Tasks(actions).Do(ctx, c.Target)
|
|
}
|
|
|
|
func (c *Context) newSession(ctx context.Context) error {
|
|
var targetID target.ID
|
|
if c.first {
|
|
// If we just allocated this browser, and it has a single page
|
|
// that's blank and not attached, use it.
|
|
infos, err := target.GetTargets().Do(ctx, c.Browser)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pages := 0
|
|
for _, info := range infos {
|
|
if info.Type == "page" && info.URL == "about:blank" && !info.Attached {
|
|
targetID = info.TargetID
|
|
pages++
|
|
}
|
|
}
|
|
if pages > 1 {
|
|
// Multiple blank pages; just in case, don't use any.
|
|
targetID = ""
|
|
}
|
|
}
|
|
|
|
if targetID == "" {
|
|
var err error
|
|
targetID, err = target.CreateTarget("about:blank").Do(ctx, c.Browser)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
sessionID, err := target.AttachToTarget(targetID).Do(ctx, c.Browser)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.wg.Add(1)
|
|
|
|
c.Target = c.Browser.newExecutorForTarget(ctx, targetID, sessionID)
|
|
|
|
// enable domains
|
|
for _, enable := range []Action{
|
|
log.Enable(),
|
|
runtime.Enable(),
|
|
//network.Enable(),
|
|
inspector.Enable(),
|
|
page.Enable(),
|
|
dom.Enable(),
|
|
css.Enable(),
|
|
} {
|
|
if err := enable.Do(ctx, c.Target); err != nil {
|
|
return fmt.Errorf("unable to execute %T: %v", enable, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ContextOption is a context option.
|
|
type ContextOption func(*Context)
|
|
|
|
// WithLogf is a shortcut for WithBrowserOption(WithBrowserLogf(f)).
|
|
func WithLogf(f func(string, ...interface{})) ContextOption {
|
|
return WithBrowserOption(WithBrowserLogf(f))
|
|
}
|
|
|
|
// WithErrorf is a shortcut for WithBrowserOption(WithBrowserErrorf(f)).
|
|
func WithErrorf(f func(string, ...interface{})) ContextOption {
|
|
return WithBrowserOption(WithBrowserErrorf(f))
|
|
}
|
|
|
|
// WithBrowserOption allows passing a number of browser options to the allocator
|
|
// when allocating a new browser. As such, this context option can only be used
|
|
// when NewContext is allocating a new browser.
|
|
func WithBrowserOption(opts ...BrowserOption) ContextOption {
|
|
return func(c *Context) {
|
|
if !c.first {
|
|
panic("WithBrowserOption can only be used when allocating a new browser")
|
|
}
|
|
c.browserOpts = append(c.browserOpts, opts...)
|
|
}
|
|
}
|
|
|
|
// Targets lists all the targets in the browser attached to the given context.
|
|
func Targets(ctx context.Context) ([]*target.Info, error) {
|
|
// Don't rely on Run, as that needs to be able to call Targets, and we
|
|
// don't want cyclic func calls.
|
|
|
|
c := FromContext(ctx)
|
|
if c == nil || c.Allocator == nil {
|
|
return nil, ErrInvalidContext
|
|
}
|
|
if c.Browser == nil {
|
|
browser, err := c.Allocator.Allocate(ctx, c.browserOpts...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
c.Browser = browser
|
|
}
|
|
return target.GetTargets().Do(ctx, c.Browser)
|
|
}
|