2019-03-05 13:14:50 +00:00
|
|
|
// Package chromedp is a high level Chrome DevTools Protocol client that
|
|
|
|
// simplifies driving browsers for scraping, unit testing, or profiling web
|
|
|
|
// pages using the CDP.
|
|
|
|
//
|
|
|
|
// chromedp requires no third-party dependencies, implementing the async Chrome
|
|
|
|
// DevTools Protocol entirely in Go.
|
|
|
|
package chromedp
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2019-03-15 17:17:57 +00:00
|
|
|
"encoding/json"
|
2019-03-05 13:14:50 +00:00
|
|
|
"log"
|
2019-04-07 17:25:03 +00:00
|
|
|
"os"
|
2019-03-05 13:14:50 +00:00
|
|
|
"sync/atomic"
|
|
|
|
|
2019-04-03 00:05:46 +00:00
|
|
|
"github.com/mailru/easyjson"
|
|
|
|
|
2019-03-05 13:14:50 +00:00
|
|
|
"github.com/chromedp/cdproto"
|
2019-03-15 17:17:57 +00:00
|
|
|
"github.com/chromedp/cdproto/cdp"
|
2019-03-20 16:56:03 +00:00
|
|
|
"github.com/chromedp/cdproto/runtime"
|
2019-03-15 17:17:57 +00:00
|
|
|
"github.com/chromedp/cdproto/target"
|
2019-03-05 13:14:50 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Browser is the high-level Chrome DevTools Protocol browser manager, handling
|
|
|
|
// the browser process runner, WebSocket clients, associated targets, and
|
|
|
|
// network, page, and DOM events.
|
|
|
|
type Browser struct {
|
|
|
|
conn Transport
|
|
|
|
|
|
|
|
// next is the next message id.
|
|
|
|
next int64
|
|
|
|
|
2019-04-01 18:31:05 +00:00
|
|
|
// tabQueue is the queue used to create new target handlers, once a new
|
|
|
|
// tab is created and attached to. The newly created Target is sent back
|
|
|
|
// via tabResult.
|
2019-04-07 11:36:48 +00:00
|
|
|
tabQueue chan newTab
|
2019-04-01 18:31:05 +00:00
|
|
|
tabResult chan *Target
|
2019-03-15 17:17:57 +00:00
|
|
|
|
2019-04-01 18:31:05 +00:00
|
|
|
// cmdQueue is the outgoing command queue.
|
|
|
|
cmdQueue chan cmdJob
|
2019-03-15 17:17:57 +00:00
|
|
|
|
2019-03-05 13:14:50 +00:00
|
|
|
// logging funcs
|
|
|
|
logf func(string, ...interface{})
|
|
|
|
errf func(string, ...interface{})
|
2019-04-07 17:25:03 +00:00
|
|
|
|
|
|
|
// The optional fields below are helpful for some tests.
|
|
|
|
|
|
|
|
// process can be initialized by the allocators which start a process
|
|
|
|
// when allocating a browser.
|
|
|
|
process *os.Process
|
|
|
|
|
|
|
|
// userDataDir can be initialized by the allocators which set up user
|
|
|
|
// data dirs directly.
|
|
|
|
userDataDir string
|
2019-03-05 13:14:50 +00:00
|
|
|
}
|
|
|
|
|
2019-04-07 11:36:48 +00:00
|
|
|
type newTab struct {
|
|
|
|
targetID target.ID
|
|
|
|
sessionID target.SessionID
|
|
|
|
}
|
|
|
|
|
2019-03-15 17:17:57 +00:00
|
|
|
type cmdJob struct {
|
|
|
|
msg *cdproto.Message
|
|
|
|
resp chan *cdproto.Message
|
|
|
|
}
|
|
|
|
|
2019-03-05 13:14:50 +00:00
|
|
|
// NewBrowser creates a new browser.
|
2019-03-15 17:17:57 +00:00
|
|
|
func NewBrowser(ctx context.Context, urlstr string, opts ...BrowserOption) (*Browser, error) {
|
|
|
|
conn, err := DialContext(ctx, ForceIP(urlstr))
|
2019-03-05 13:14:50 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
b := &Browser{
|
2019-03-21 15:44:28 +00:00
|
|
|
conn: conn,
|
|
|
|
|
2019-04-07 11:36:48 +00:00
|
|
|
tabQueue: make(chan newTab, 1),
|
2019-04-01 18:31:05 +00:00
|
|
|
tabResult: make(chan *Target, 1),
|
2019-03-21 15:44:28 +00:00
|
|
|
|
|
|
|
cmdQueue: make(chan cmdJob),
|
|
|
|
|
|
|
|
logf: log.Printf,
|
2019-03-05 13:14:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// apply options
|
|
|
|
for _, o := range opts {
|
|
|
|
if err := o(b); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ensure errf is set
|
|
|
|
if b.errf == nil {
|
|
|
|
b.errf = func(s string, v ...interface{}) { b.logf("ERROR: "+s, v...) }
|
|
|
|
}
|
|
|
|
|
2019-03-21 15:44:28 +00:00
|
|
|
go b.run(ctx)
|
2019-03-05 13:14:50 +00:00
|
|
|
return b, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shutdown shuts down the browser.
|
|
|
|
func (b *Browser) Shutdown() error {
|
|
|
|
if b.conn != nil {
|
|
|
|
if err := b.send(cdproto.CommandBrowserClose, nil); err != nil {
|
|
|
|
b.errf("could not close browser: %v", err)
|
|
|
|
}
|
|
|
|
return b.conn.Close()
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// send writes the supplied message and params.
|
|
|
|
func (b *Browser) send(method cdproto.MethodType, params easyjson.RawMessage) error {
|
|
|
|
msg := &cdproto.Message{
|
|
|
|
ID: atomic.AddInt64(&b.next, 1),
|
2019-03-15 17:17:57 +00:00
|
|
|
Method: method,
|
2019-03-05 13:14:50 +00:00
|
|
|
Params: params,
|
|
|
|
}
|
2019-03-15 17:17:57 +00:00
|
|
|
return b.conn.Write(msg)
|
|
|
|
}
|
|
|
|
|
2019-04-07 11:36:48 +00:00
|
|
|
func (b *Browser) newExecutorForTarget(ctx context.Context, targetID target.ID, sessionID target.SessionID) *Target {
|
|
|
|
if targetID == "" {
|
|
|
|
panic("empty target ID")
|
|
|
|
}
|
2019-03-15 17:17:57 +00:00
|
|
|
if sessionID == "" {
|
|
|
|
panic("empty session ID")
|
2019-03-05 13:14:50 +00:00
|
|
|
}
|
2019-04-07 11:36:48 +00:00
|
|
|
b.tabQueue <- newTab{targetID, sessionID}
|
2019-04-01 18:31:05 +00:00
|
|
|
return <-b.tabResult
|
2019-03-05 13:14:50 +00:00
|
|
|
}
|
|
|
|
|
2019-03-15 17:17:57 +00:00
|
|
|
func (b *Browser) Execute(ctx context.Context, method string, params json.Marshaler, res json.Unmarshaler) error {
|
|
|
|
paramsMsg := emptyObj
|
|
|
|
if params != nil {
|
|
|
|
var err error
|
|
|
|
if paramsMsg, err = json.Marshal(params); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
id := atomic.AddInt64(&b.next, 1)
|
|
|
|
ch := make(chan *cdproto.Message, 1)
|
|
|
|
b.cmdQueue <- cmdJob{
|
|
|
|
msg: &cdproto.Message{
|
|
|
|
ID: id,
|
|
|
|
Method: cdproto.MethodType(method),
|
|
|
|
Params: paramsMsg,
|
|
|
|
},
|
|
|
|
resp: ch,
|
|
|
|
}
|
|
|
|
select {
|
|
|
|
case msg := <-ch:
|
|
|
|
switch {
|
|
|
|
case msg == nil:
|
|
|
|
return ErrChannelClosed
|
|
|
|
case msg.Error != nil:
|
|
|
|
return msg.Error
|
|
|
|
case res != nil:
|
|
|
|
return json.Unmarshal(msg.Result, res)
|
|
|
|
}
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2019-03-05 13:14:50 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-04-01 18:31:05 +00:00
|
|
|
type tabEvent struct {
|
|
|
|
sessionID target.SessionID
|
|
|
|
msg *cdproto.Message
|
|
|
|
}
|
|
|
|
|
2019-03-15 17:17:57 +00:00
|
|
|
func (b *Browser) run(ctx context.Context) {
|
|
|
|
defer b.conn.Close()
|
|
|
|
|
2019-04-07 17:25:03 +00:00
|
|
|
cancel := FromContext(ctx).cancel
|
2019-03-15 17:17:57 +00:00
|
|
|
|
2019-04-01 18:31:05 +00:00
|
|
|
// tabEventQueue is the queue of incoming target events, to be routed by
|
|
|
|
// their session ID.
|
|
|
|
tabEventQueue := make(chan tabEvent, 1)
|
|
|
|
|
|
|
|
// resQueue is the incoming command result queue.
|
|
|
|
resQueue := make(chan *cdproto.Message, 1)
|
|
|
|
|
|
|
|
// This goroutine continuously reads events from the websocket
|
|
|
|
// connection. The separate goroutine is needed since a websocket read
|
|
|
|
// is blocking, so it cannot be used in a select statement.
|
2019-03-15 17:17:57 +00:00
|
|
|
go func() {
|
|
|
|
for {
|
|
|
|
msg, err := b.conn.Read()
|
|
|
|
if err != nil {
|
2019-04-07 17:25:03 +00:00
|
|
|
// If the websocket failed, most likely Chrome
|
|
|
|
// was closed or crashed. Cancel the entire
|
|
|
|
// Browser context to stop all activity.
|
|
|
|
cancel()
|
2019-03-15 17:17:57 +00:00
|
|
|
return
|
|
|
|
}
|
2019-04-01 18:31:05 +00:00
|
|
|
if msg.Method == cdproto.EventRuntimeExceptionThrown {
|
|
|
|
ev := new(runtime.EventExceptionThrown)
|
|
|
|
if err := json.Unmarshal(msg.Params, ev); err != nil {
|
|
|
|
b.errf("%s", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
b.errf("%+v\n", ev.ExceptionDetails)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2019-03-15 17:17:57 +00:00
|
|
|
var sessionID target.SessionID
|
|
|
|
if msg.Method == cdproto.EventTargetReceivedMessageFromTarget {
|
2019-04-01 18:31:05 +00:00
|
|
|
event := new(target.EventReceivedMessageFromTarget)
|
|
|
|
if err := json.Unmarshal(msg.Params, event); err != nil {
|
2019-03-15 17:17:57 +00:00
|
|
|
b.errf("%s", err)
|
|
|
|
continue
|
|
|
|
}
|
2019-04-01 18:31:05 +00:00
|
|
|
sessionID = event.SessionID
|
2019-03-15 17:17:57 +00:00
|
|
|
msg = new(cdproto.Message)
|
2019-04-01 18:31:05 +00:00
|
|
|
if err := json.Unmarshal([]byte(event.Message), msg); err != nil {
|
2019-03-15 17:17:57 +00:00
|
|
|
b.errf("%s", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
switch {
|
|
|
|
case msg.Method != "":
|
|
|
|
if sessionID == "" {
|
2019-04-01 18:31:05 +00:00
|
|
|
// TODO: are we interested in browser events?
|
2019-03-15 17:17:57 +00:00
|
|
|
continue
|
|
|
|
}
|
2019-04-01 18:31:05 +00:00
|
|
|
tabEventQueue <- tabEvent{
|
|
|
|
sessionID: sessionID,
|
|
|
|
msg: msg,
|
|
|
|
}
|
|
|
|
case msg.ID != 0:
|
|
|
|
// We can't process the response here, as it's
|
|
|
|
// another goroutine that maintans respByID.
|
|
|
|
resQueue <- msg
|
|
|
|
default:
|
|
|
|
b.errf("ignoring malformed incoming message (missing id or method): %#v", msg)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
// This goroutine handles tabs, as well as routing events to each tab
|
|
|
|
// via the pages map.
|
|
|
|
go func() {
|
|
|
|
// This map is only safe for use within this goroutine, so don't
|
|
|
|
// declare it as a Browser field.
|
|
|
|
pages := make(map[target.SessionID]*Target, 1024)
|
|
|
|
for {
|
|
|
|
select {
|
2019-04-07 11:36:48 +00:00
|
|
|
case tab := <-b.tabQueue:
|
|
|
|
if _, ok := pages[tab.sessionID]; ok {
|
|
|
|
b.errf("executor for %q already exists", tab.sessionID)
|
2019-03-20 16:56:03 +00:00
|
|
|
}
|
2019-04-01 18:31:05 +00:00
|
|
|
t := &Target{
|
|
|
|
browser: b,
|
2019-04-07 11:36:48 +00:00
|
|
|
TargetID: tab.targetID,
|
|
|
|
SessionID: tab.sessionID,
|
2019-03-15 17:17:57 +00:00
|
|
|
|
2019-04-01 18:31:05 +00:00
|
|
|
eventQueue: make(chan *cdproto.Message, 1024),
|
|
|
|
waitQueue: make(chan func(cur *cdp.Frame) bool, 1024),
|
|
|
|
frames: make(map[cdp.FrameID]*cdp.Frame),
|
|
|
|
|
|
|
|
logf: b.logf,
|
|
|
|
errf: b.errf,
|
|
|
|
}
|
|
|
|
go t.run(ctx)
|
2019-04-07 11:36:48 +00:00
|
|
|
pages[tab.sessionID] = t
|
2019-04-01 18:31:05 +00:00
|
|
|
b.tabResult <- t
|
|
|
|
case event := <-tabEventQueue:
|
|
|
|
page, ok := pages[event.sessionID]
|
2019-03-15 17:17:57 +00:00
|
|
|
if !ok {
|
2019-04-01 18:31:05 +00:00
|
|
|
b.errf("unknown session ID %q", event.sessionID)
|
2019-03-15 17:17:57 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
select {
|
2019-04-01 18:31:05 +00:00
|
|
|
case page.eventQueue <- event.msg:
|
2019-03-15 17:17:57 +00:00
|
|
|
default:
|
|
|
|
panic("eventQueue is full")
|
|
|
|
}
|
|
|
|
|
2019-04-01 18:31:05 +00:00
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
2019-03-15 17:17:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
respByID := make(map[int64]chan *cdproto.Message)
|
|
|
|
|
2019-04-01 18:31:05 +00:00
|
|
|
// This goroutine handles sending commands to the browser, and sending
|
|
|
|
// responses back for each of these commands via respByID.
|
2019-03-15 17:17:57 +00:00
|
|
|
for {
|
|
|
|
select {
|
2019-04-01 18:31:05 +00:00
|
|
|
case res := <-resQueue:
|
2019-03-15 17:17:57 +00:00
|
|
|
resp, ok := respByID[res.ID]
|
|
|
|
if !ok {
|
|
|
|
b.errf("id %d not present in response map", res.ID)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if resp != nil {
|
|
|
|
// resp could be nil, if we're not interested in
|
|
|
|
// this response; for CommandSendMessageToTarget.
|
|
|
|
resp <- res
|
|
|
|
close(resp)
|
|
|
|
}
|
|
|
|
delete(respByID, res.ID)
|
|
|
|
|
|
|
|
case q := <-b.cmdQueue:
|
|
|
|
if _, ok := respByID[q.msg.ID]; ok {
|
|
|
|
b.errf("id %d already present in response map", q.msg.ID)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
respByID[q.msg.ID] = q.resp
|
|
|
|
|
|
|
|
if q.msg.Method == "" {
|
|
|
|
// Only register the chananel in respByID;
|
|
|
|
// useful for CommandSendMessageToTarget.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err := b.conn.Write(q.msg); err != nil {
|
|
|
|
b.errf("%s", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2019-03-05 13:14:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// BrowserOption is a browser option.
|
|
|
|
type BrowserOption func(*Browser) error
|
|
|
|
|
|
|
|
// WithLogf is a browser option to specify a func to receive general logging.
|
|
|
|
func WithLogf(f func(string, ...interface{})) BrowserOption {
|
|
|
|
return func(b *Browser) error {
|
|
|
|
b.logf = f
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// WithErrorf is a browser option to specify a func to receive error logging.
|
|
|
|
func WithErrorf(f func(string, ...interface{})) BrowserOption {
|
|
|
|
return func(b *Browser) error {
|
|
|
|
b.errf = f
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// WithConsolef is a browser option to specify a func to receive chrome log events.
|
|
|
|
//
|
|
|
|
// Note: NOT YET IMPLEMENTED.
|
|
|
|
func WithConsolef(f func(string, ...interface{})) BrowserOption {
|
|
|
|
return func(b *Browser) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|