chromedp/browser.go
Daniel Martí 7c1a9fbf3e get rid of all exceptions
We hadn't noticed a few uncaught exceptions being received from the
browser, because the events were ignored. Start printing them via the
error logger.

The ones we were getting were caused by testAllocate running Navigate
actions when the path argument was empty. Navigating to "testdata/"
causes JS exceptions, as it's not a valid page.

Instead, leave the new target pointing at a blank document.
2019-04-01 12:18:16 +01:00

305 lines
6.6 KiB
Go

// Package chromedp is a high level Chrome DevTools Protocol client that
// simplifies driving browsers for scraping, unit testing, or profiling web
// pages using the CDP.
//
// chromedp requires no third-party dependencies, implementing the async Chrome
// DevTools Protocol entirely in Go.
package chromedp
import (
"context"
"encoding/json"
"log"
"sync/atomic"
"github.com/chromedp/cdproto"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/runtime"
"github.com/chromedp/cdproto/target"
"github.com/mailru/easyjson"
)
// Browser is the high-level Chrome DevTools Protocol browser manager, handling
// the browser process runner, WebSocket clients, associated targets, and
// network, page, and DOM events.
type Browser struct {
UserDataDir string
pages map[target.SessionID]*Target
conn Transport
// next is the next message id.
next int64
cmdQueue chan cmdJob
// qres is the incoming command result queue.
qres chan *cdproto.Message
// logging funcs
logf func(string, ...interface{})
errf func(string, ...interface{})
}
type cmdJob struct {
msg *cdproto.Message
resp chan *cdproto.Message
}
// NewBrowser creates a new browser.
func NewBrowser(ctx context.Context, urlstr string, opts ...BrowserOption) (*Browser, error) {
conn, err := DialContext(ctx, ForceIP(urlstr))
if err != nil {
return nil, err
}
b := &Browser{
conn: conn,
pages: make(map[target.SessionID]*Target, 1024),
logf: log.Printf,
}
// apply options
for _, o := range opts {
if err := o(b); err != nil {
return nil, err
}
}
// ensure errf is set
if b.errf == nil {
b.errf = func(s string, v ...interface{}) { b.logf("ERROR: "+s, v...) }
}
return b, nil
}
// Shutdown shuts down the browser.
func (b *Browser) Shutdown() error {
if b.conn != nil {
if err := b.send(cdproto.CommandBrowserClose, nil); err != nil {
b.errf("could not close browser: %v", err)
}
return b.conn.Close()
}
return nil
}
// send writes the supplied message and params.
func (b *Browser) send(method cdproto.MethodType, params easyjson.RawMessage) error {
msg := &cdproto.Message{
ID: atomic.AddInt64(&b.next, 1),
Method: method,
Params: params,
}
return b.conn.Write(msg)
}
func (b *Browser) executorForTarget(ctx context.Context, sessionID target.SessionID) *Target {
if sessionID == "" {
panic("empty session ID")
}
if t, ok := b.pages[sessionID]; ok {
return t
}
t := &Target{
browser: b,
sessionID: sessionID,
eventQueue: make(chan *cdproto.Message, 1024),
waitQueue: make(chan func(cur *cdp.Frame) bool, 1024),
frames: make(map[cdp.FrameID]*cdp.Frame),
logf: b.logf,
errf: b.errf,
}
go t.run(ctx)
b.pages[sessionID] = t
return t
}
func (b *Browser) Execute(ctx context.Context, method string, params json.Marshaler, res json.Unmarshaler) error {
paramsMsg := emptyObj
if params != nil {
var err error
if paramsMsg, err = json.Marshal(params); err != nil {
return err
}
}
id := atomic.AddInt64(&b.next, 1)
ch := make(chan *cdproto.Message, 1)
b.cmdQueue <- cmdJob{
msg: &cdproto.Message{
ID: id,
Method: cdproto.MethodType(method),
Params: paramsMsg,
},
resp: ch,
}
select {
case msg := <-ch:
switch {
case msg == nil:
return ErrChannelClosed
case msg.Error != nil:
return msg.Error
case res != nil:
return json.Unmarshal(msg.Result, res)
}
case <-ctx.Done():
return ctx.Err()
}
return nil
}
func (b *Browser) Start(ctx context.Context) {
b.cmdQueue = make(chan cmdJob)
b.qres = make(chan *cdproto.Message)
go b.run(ctx)
}
func (b *Browser) run(ctx context.Context) {
defer b.conn.Close()
// add cancel to context
ctx, cancel := context.WithCancel(ctx)
defer cancel()
go func() {
defer cancel()
for {
select {
case <-ctx.Done():
return
default:
// continue below
}
msg, err := b.conn.Read()
if err != nil {
return
}
var sessionID target.SessionID
if msg.Method == cdproto.EventTargetReceivedMessageFromTarget {
recv := new(target.EventReceivedMessageFromTarget)
if err := json.Unmarshal(msg.Params, recv); err != nil {
b.errf("%s", err)
continue
}
sessionID = recv.SessionID
msg = new(cdproto.Message)
if err := json.Unmarshal([]byte(recv.Message), msg); err != nil {
b.errf("%s", err)
continue
}
}
switch {
case msg.Method != "":
if sessionID == "" {
// TODO: are we interested in
// these events?
continue
}
if msg.Method == cdproto.EventRuntimeExceptionThrown {
ev := new(runtime.EventExceptionThrown)
if err := json.Unmarshal(msg.Params, ev); err != nil {
b.errf("%s", err)
continue
}
b.errf("%+v\n", ev.ExceptionDetails)
}
page, ok := b.pages[sessionID]
if !ok {
b.errf("unknown session ID %q", sessionID)
continue
}
select {
case page.eventQueue <- msg:
default:
panic("eventQueue is full")
}
case msg.ID != 0:
b.qres <- msg
default:
b.errf("ignoring malformed incoming message (missing id or method): %#v", msg)
}
}
}()
respByID := make(map[int64]chan *cdproto.Message)
// process queues
for {
select {
case res := <-b.qres:
resp, ok := respByID[res.ID]
if !ok {
b.errf("id %d not present in response map", res.ID)
continue
}
if resp != nil {
// resp could be nil, if we're not interested in
// this response; for CommandSendMessageToTarget.
resp <- res
close(resp)
}
delete(respByID, res.ID)
case q := <-b.cmdQueue:
if _, ok := respByID[q.msg.ID]; ok {
b.errf("id %d already present in response map", q.msg.ID)
continue
}
respByID[q.msg.ID] = q.resp
if q.msg.Method == "" {
// Only register the chananel in respByID;
// useful for CommandSendMessageToTarget.
continue
}
if err := b.conn.Write(q.msg); err != nil {
b.errf("%s", err)
continue
}
case <-ctx.Done():
return
}
}
}
// BrowserOption is a browser option.
type BrowserOption func(*Browser) error
// WithLogf is a browser option to specify a func to receive general logging.
func WithLogf(f func(string, ...interface{})) BrowserOption {
return func(b *Browser) error {
b.logf = f
return nil
}
}
// WithErrorf is a browser option to specify a func to receive error logging.
func WithErrorf(f func(string, ...interface{})) BrowserOption {
return func(b *Browser) error {
b.errf = f
return nil
}
}
// WithConsolef is a browser option to specify a func to receive chrome log events.
//
// Note: NOT YET IMPLEMENTED.
func WithConsolef(f func(string, ...interface{})) BrowserOption {
return func(b *Browser) error {
return nil
}
}