chromedp/browser.go
Daniel Martí 1decbccd74 store a Target pointer directly in Context
That way, we avoid the racy map access via Browser.executorForTarget. If
a context is attached to a target, the Target field must be non-nil.

The Browser.pages map is still racy, since multiple tabs can be created
concurrently; we'll fix this other data race in another commit.
2019-04-01 14:31:11 +01:00

305 lines
6.6 KiB
Go

// Package chromedp is a high level Chrome DevTools Protocol client that
// simplifies driving browsers for scraping, unit testing, or profiling web
// pages using the CDP.
//
// chromedp requires no third-party dependencies, implementing the async Chrome
// DevTools Protocol entirely in Go.
package chromedp
import (
"context"
"encoding/json"
"fmt"
"log"
"sync/atomic"
"github.com/chromedp/cdproto"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/runtime"
"github.com/chromedp/cdproto/target"
"github.com/mailru/easyjson"
)
// Browser is the high-level Chrome DevTools Protocol browser manager, handling
// the browser process runner, WebSocket clients, associated targets, and
// network, page, and DOM events.
type Browser struct {
userDataDir string
pages map[target.SessionID]*Target
conn Transport
// next is the next message id.
next int64
cmdQueue chan cmdJob
// qres is the incoming command result queue.
qres chan *cdproto.Message
// logging funcs
logf func(string, ...interface{})
errf func(string, ...interface{})
}
type cmdJob struct {
msg *cdproto.Message
resp chan *cdproto.Message
}
// NewBrowser creates a new browser.
func NewBrowser(ctx context.Context, urlstr string, opts ...BrowserOption) (*Browser, error) {
conn, err := DialContext(ctx, ForceIP(urlstr))
if err != nil {
return nil, err
}
b := &Browser{
conn: conn,
pages: make(map[target.SessionID]*Target, 1024),
cmdQueue: make(chan cmdJob),
qres: make(chan *cdproto.Message),
logf: log.Printf,
}
// apply options
for _, o := range opts {
if err := o(b); err != nil {
return nil, err
}
}
// ensure errf is set
if b.errf == nil {
b.errf = func(s string, v ...interface{}) { b.logf("ERROR: "+s, v...) }
}
go b.run(ctx)
return b, nil
}
// Shutdown shuts down the browser.
func (b *Browser) Shutdown() error {
if b.conn != nil {
if err := b.send(cdproto.CommandBrowserClose, nil); err != nil {
b.errf("could not close browser: %v", err)
}
return b.conn.Close()
}
return nil
}
// send writes the supplied message and params.
func (b *Browser) send(method cdproto.MethodType, params easyjson.RawMessage) error {
msg := &cdproto.Message{
ID: atomic.AddInt64(&b.next, 1),
Method: method,
Params: params,
}
return b.conn.Write(msg)
}
func (b *Browser) newExecutorForTarget(ctx context.Context, sessionID target.SessionID) *Target {
if sessionID == "" {
panic("empty session ID")
}
if _, ok := b.pages[sessionID]; ok {
panic(fmt.Sprintf("executor for %q already exists", sessionID))
}
t := &Target{
browser: b,
SessionID: sessionID,
eventQueue: make(chan *cdproto.Message, 1024),
waitQueue: make(chan func(cur *cdp.Frame) bool, 1024),
frames: make(map[cdp.FrameID]*cdp.Frame),
logf: b.logf,
errf: b.errf,
}
go t.run(ctx)
b.pages[sessionID] = t
return t
}
func (b *Browser) Execute(ctx context.Context, method string, params json.Marshaler, res json.Unmarshaler) error {
paramsMsg := emptyObj
if params != nil {
var err error
if paramsMsg, err = json.Marshal(params); err != nil {
return err
}
}
id := atomic.AddInt64(&b.next, 1)
ch := make(chan *cdproto.Message, 1)
b.cmdQueue <- cmdJob{
msg: &cdproto.Message{
ID: id,
Method: cdproto.MethodType(method),
Params: paramsMsg,
},
resp: ch,
}
select {
case msg := <-ch:
switch {
case msg == nil:
return ErrChannelClosed
case msg.Error != nil:
return msg.Error
case res != nil:
return json.Unmarshal(msg.Result, res)
}
case <-ctx.Done():
return ctx.Err()
}
return nil
}
func (b *Browser) run(ctx context.Context) {
defer b.conn.Close()
// add cancel to context
ctx, cancel := context.WithCancel(ctx)
defer cancel()
go func() {
defer cancel()
for {
select {
case <-ctx.Done():
return
default:
// continue below
}
msg, err := b.conn.Read()
if err != nil {
return
}
var sessionID target.SessionID
if msg.Method == cdproto.EventTargetReceivedMessageFromTarget {
recv := new(target.EventReceivedMessageFromTarget)
if err := json.Unmarshal(msg.Params, recv); err != nil {
b.errf("%s", err)
continue
}
sessionID = recv.SessionID
msg = new(cdproto.Message)
if err := json.Unmarshal([]byte(recv.Message), msg); err != nil {
b.errf("%s", err)
continue
}
}
switch {
case msg.Method != "":
if sessionID == "" {
// TODO: are we interested in
// these events?
continue
}
if msg.Method == cdproto.EventRuntimeExceptionThrown {
ev := new(runtime.EventExceptionThrown)
if err := json.Unmarshal(msg.Params, ev); err != nil {
b.errf("%s", err)
continue
}
b.errf("%+v\n", ev.ExceptionDetails)
}
page, ok := b.pages[sessionID]
if !ok {
b.errf("unknown session ID %q", sessionID)
continue
}
select {
case page.eventQueue <- msg:
default:
panic("eventQueue is full")
}
case msg.ID != 0:
b.qres <- msg
default:
b.errf("ignoring malformed incoming message (missing id or method): %#v", msg)
}
}
}()
respByID := make(map[int64]chan *cdproto.Message)
// process queues
for {
select {
case res := <-b.qres:
resp, ok := respByID[res.ID]
if !ok {
b.errf("id %d not present in response map", res.ID)
continue
}
if resp != nil {
// resp could be nil, if we're not interested in
// this response; for CommandSendMessageToTarget.
resp <- res
close(resp)
}
delete(respByID, res.ID)
case q := <-b.cmdQueue:
if _, ok := respByID[q.msg.ID]; ok {
b.errf("id %d already present in response map", q.msg.ID)
continue
}
respByID[q.msg.ID] = q.resp
if q.msg.Method == "" {
// Only register the chananel in respByID;
// useful for CommandSendMessageToTarget.
continue
}
if err := b.conn.Write(q.msg); err != nil {
b.errf("%s", err)
continue
}
case <-ctx.Done():
return
}
}
}
// BrowserOption is a browser option.
type BrowserOption func(*Browser) error
// WithLogf is a browser option to specify a func to receive general logging.
func WithLogf(f func(string, ...interface{})) BrowserOption {
return func(b *Browser) error {
b.logf = f
return nil
}
}
// WithErrorf is a browser option to specify a func to receive error logging.
func WithErrorf(f func(string, ...interface{})) BrowserOption {
return func(b *Browser) error {
b.errf = f
return nil
}
}
// WithConsolef is a browser option to specify a func to receive chrome log events.
//
// Note: NOT YET IMPLEMENTED.
func WithConsolef(f func(string, ...interface{})) BrowserOption {
return func(b *Browser) error {
return nil
}
}