chromedp/allocate.go
Daniel Martí 81a48280ef route all communication via the browser
Use a single websocket connection per browser, removing the need for an
extra websocket connection per target.

This is thanks to the Target.sendMessageToTarget command to send
messages to each target, and the Target.receivedMessageFromTarget event
to receive messages back.

The browser handles activity via a single worker goroutine, and the same
technique is used for each target. This means that commands and events
are dealt with in order, and we can do away with some complexity like
mutexes and extra go statements.
2019-04-01 12:18:16 +01:00

257 lines
6.2 KiB
Go

package chromedp
import (
"bufio"
"context"
"fmt"
"io/ioutil"
"os"
"os/exec"
"strings"
"sync"
)
type Allocator interface {
// Allocate creates a new browser from the pool. It can be cancelled via
// the provided context, at which point all the resources used by the
// browser (such as temporary directories) will be cleaned up.
Allocate(context.Context) (*Browser, error)
// Wait can be called after cancelling a pool's context, to block until
// all the pool's resources have been cleaned up.
Wait()
}
func NewAllocator(parent context.Context, opts ...AllocatorOption) (context.Context, context.CancelFunc) {
ctx, cancel := context.WithCancel(parent)
c := &Context{}
for _, o := range opts {
o(&c.Allocator)
}
ctx = context.WithValue(ctx, contextKey{}, c)
return ctx, cancel
}
type AllocatorOption func(*Allocator)
func WithExecAllocator(opts ...ExecAllocatorOption) func(*Allocator) {
return func(p *Allocator) {
ep := &ExecAllocator{
initFlags: make(map[string]interface{}),
}
for _, o := range opts {
o(ep)
}
if ep.execPath == "" {
ep.execPath = findExecPath()
}
*p = ep
}
}
type ExecAllocatorOption func(*ExecAllocator)
type ExecAllocator struct {
execPath string
initFlags map[string]interface{}
wg sync.WaitGroup
}
func (p *ExecAllocator) Allocate(ctx context.Context) (*Browser, error) {
removeDir := false
var cmd *exec.Cmd
// TODO: figure out a nicer way to do this
flags := make(map[string]interface{})
for name, value := range p.initFlags {
flags[name] = value
}
dataDir, ok := flags["user-data-dir"].(string)
if !ok {
tempDir, err := ioutil.TempDir("", "chromedp-runner")
if err != nil {
return nil, err
}
flags["user-data-dir"] = tempDir
dataDir = tempDir
removeDir = true
}
p.wg.Add(1)
go func() {
<-ctx.Done()
// First wait for the process to be finished.
if cmd != nil {
cmd.Wait()
}
// Then delete the temporary user data directory, if needed.
if removeDir {
os.RemoveAll(dataDir)
}
p.wg.Done()
}()
flags["remote-debugging-port"] = "0"
args := []string{}
for name, value := range flags {
switch value := value.(type) {
case string:
args = append(args, fmt.Sprintf("--%s=%s", name, value))
case bool:
if value {
args = append(args, fmt.Sprintf("--%s", name))
}
default:
return nil, fmt.Errorf("invalid exec pool flag")
}
}
cmd = exec.CommandContext(ctx, p.execPath, args...)
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, err
}
if err := cmd.Start(); err != nil {
return nil, err
}
// Pick up the browser's websocket URL from stderr.
wsURL := ""
scanner := bufio.NewScanner(stderr)
prefix := "DevTools listening on"
for scanner.Scan() {
line := scanner.Text()
if s := strings.TrimPrefix(line, prefix); s != line {
wsURL = strings.TrimSpace(s)
break
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
stderr.Close()
browser, err := NewBrowser(ctx, wsURL)
if err != nil {
return nil, err
}
browser.UserDataDir = dataDir
browser.Start(ctx)
return browser, nil
}
func (p *ExecAllocator) Wait() {
p.wg.Wait()
}
func ExecPath(path string) ExecAllocatorOption {
return func(p *ExecAllocator) {
if fullPath, _ := exec.LookPath(path); fullPath != "" {
// Convert to an absolute path if possible, to avoid
// repeated LookPath calls in each Allocate.
path = fullPath
}
p.execPath = path
}
}
// findExecPath tries to find the Chrome browser somewhere in the current
// system. It performs a rather agressive search, which is the same in all
// systems. That may make it a bit slow, but it will only be run when creating a
// new ExecAllocator.
func findExecPath() string {
for _, path := range [...]string{
// Unix-like
"headless_shell",
"chromium",
"chromium-browser",
"google-chrome",
"google-chrome-stable",
"google-chrome-beta",
"google-chrome-unstable",
"/usr/bin/google-chrome",
// Windows
"chrome",
"chrome.exe", // in case PATHEXT is misconfigured
`C:\Program Files (x86)\Google\Chrome\Application\chrome.exe`,
// Mac
`/Applications/Google Chrome.app/Contents/MacOS/Google Chrome`,
} {
found, err := exec.LookPath(path)
if err == nil {
return found
}
}
// Fall back to something simple and sensible, to give a useful error
// message.
return "google-chrome"
}
// Flag is a generic command line option to pass a flag to Chrome. If the value
// is a string, it will be passed as --name=value. If it's a boolean, it will be
// passed as --name if value is true.
func Flag(name string, value interface{}) ExecAllocatorOption {
return func(p *ExecAllocator) {
p.initFlags[name] = value
}
}
// UserDataDir is the command line option to set the user data dir.
//
// Note: set this option to manually set the profile directory used by Chrome.
// When this is not set, then a default path will be created in the /tmp
// directory.
func UserDataDir(dir string) ExecAllocatorOption {
return Flag("user-data-dir", dir)
}
// ProxyServer is the command line option to set the outbound proxy server.
func ProxyServer(proxy string) ExecAllocatorOption {
return Flag("proxy-server", proxy)
}
// WindowSize is the command line option to set the initial window size.
func WindowSize(width, height int) ExecAllocatorOption {
return Flag("window-size", fmt.Sprintf("%d,%d", width, height))
}
// UserAgent is the command line option to set the default User-Agent
// header.
func UserAgent(userAgent string) ExecAllocatorOption {
return Flag("user-agent", userAgent)
}
// NoSandbox is the Chrome comamnd line option to disable the sandbox.
func NoSandbox(p *ExecAllocator) {
Flag("no-sandbox", true)(p)
}
// NoFirstRun is the Chrome comamnd line option to disable the first run
// dialog.
func NoFirstRun(p *ExecAllocator) {
Flag("no-first-run", true)(p)
}
// NoDefaultBrowserCheck is the Chrome comamnd line option to disable the
// default browser check.
func NoDefaultBrowserCheck(p *ExecAllocator) {
Flag("no-default-browser-check", true)(p)
}
// Headless is the command line option to run in headless mode.
func Headless(p *ExecAllocator) {
Flag("headless", true)(p)
}
// DisableGPU is the command line option to disable the GPU process.
func DisableGPU(p *ExecAllocator) {
Flag("disable-gpu", true)(p)
}