433 lines
18 KiB
Markdown
433 lines
18 KiB
Markdown
## API Reference
|
|
Use this as the supported `agent.browsers.*` surface.
|
|
|
|
```ts
|
|
// Installed by setupBrowserRuntime({ globals: globalThis }).
|
|
const browser = await agent.browsers.get("iab");
|
|
interface Agent {
|
|
browsers: Browsers; // API for finding and selecting browsers.
|
|
documentation: Documentation; // API for reading packaged browser-use documentation by name.
|
|
}
|
|
|
|
interface Browsers {
|
|
get(id: string): Promise<Browser>; // Get a browser by id or client type.
|
|
list(): Promise<Array<BrowserInfo>>; // List available browsers.
|
|
}
|
|
|
|
interface Browser {
|
|
browserId: string; // Browser id selected by `agent.browsers.get()`.
|
|
capabilities: BrowserCapabilityCollection; // Browser-scoped optional capabilities advertised by the connected backend; discover IDs with `await browser.capabilities.list()`, then call `await (await browser.capabilities.get(id)).documentation()` for method details.
|
|
tabs: Tabs; // API for interacting with browser tabs.
|
|
user: BrowserUser; // Readonly context about tabs in the user's browser windows.
|
|
documentation(): Promise<string>; // Read browser guidance and the core API reference.
|
|
nameSession(name: string): Promise<void>; // Name the current browser automation session.
|
|
}
|
|
|
|
interface BrowserUser {
|
|
|
|
openTabs(): Promise<Array<BrowserUserTabInfo>>; // List open top-level tabs across the user's browser windows ordered by `lastOpened` descending.
|
|
}
|
|
|
|
interface Tabs {
|
|
|
|
get(id: string): Promise<Tab>; // Get a tab by id.
|
|
list(): Promise<Array<TabInfo>>; // List open tabs in the browser.
|
|
new(): Promise<Tab>; // Create and return a new tab in the browser.
|
|
selected(): Promise<undefined | Tab>; // Return the currently selected tab, if any.
|
|
}
|
|
|
|
interface Tab {
|
|
capabilities: TabCapabilityCollection; // Tab-scoped optional capabilities advertised by the connected backend; discover IDs with `await tab.capabilities.list()`, then call `await (await tab.capabilities.get(id)).documentation()` for method details.
|
|
clipboard: TabClipboardAPI; // API for interacting with clipboard content in this tab.
|
|
|
|
cua: CUAAPI; // API for interacting with the tab via the cua api
|
|
dev: TabDevAPI; // API for developer-oriented tab inspection.
|
|
dom_cua: DomCUAAPI; // API for interacting with the tab via the dom based cua api
|
|
id: string; // A tab's unique identifier
|
|
playwright: PlaywrightAPI; // API for interacting with the tab via the playwright api
|
|
back(): Promise<void>; // Navigate this tab back in history.
|
|
close(): Promise<void>; // Close this tab.
|
|
forward(): Promise<void>; // Navigate this tab forward in history.
|
|
goto(url: string): Promise<void>; // Open a URL in this tab.
|
|
reload(): Promise<void>; // Reload this tab.
|
|
screenshot(options: ScreenshotOptions): Promise<Uint8Array>; // Capture a screenshot of this tab.
|
|
title(): Promise<undefined | string>; // Get the current title for this tab.
|
|
url(): Promise<undefined | string>; // Get the current URL for this tab.
|
|
}
|
|
|
|
interface CUAAPI {
|
|
click(options: ClickOptions): Promise<void>; // Click at a coordinate in the current viewport.
|
|
double_click(options: DoubleClickOptions): Promise<void>; // Double click at a coordinate in the current viewport.
|
|
|
|
drag(options: DragOptions): Promise<void>; // Drag from a point to a point by the provided path.
|
|
keypress(options: KeypressOptions): Promise<void>; // Press control characters at the current focused element (focus it first via click/dblclick).
|
|
move(options: MoveOptions): Promise<void>; // Move the mouse to a point by the provided x and y coordinates.
|
|
scroll(options: ScrollOptions): Promise<void>; // Scroll by a delta from a specific viewport coordinate.
|
|
type(options: TypeOptions): Promise<void>; // Type text at the current focus.
|
|
}
|
|
|
|
interface DomCUAAPI {
|
|
click(options: DomClickOptions): Promise<void>; // Click a DOM node by its id from the visible DOM snapshot.
|
|
double_click(options: DomClickOptions): Promise<void>; // Double-click a DOM node by its id.
|
|
|
|
get_visible_dom(): Promise<unknown>; // Return a filtered DOM with node ids for interactable elements.
|
|
keypress(options: DomKeypressOptions): Promise<void>; // Press control characters at the currently focused element (focus it first via click/dblclick).
|
|
scroll(options: DomScrollOptions): Promise<void>; // Scroll either the page or a specific node (if node_id provided) by deltas.
|
|
type(options: DomTypeOptions): Promise<void>; // Type text into the currently focused element (focus via click first).
|
|
}
|
|
|
|
interface PlaywrightAPI {
|
|
domSnapshot(): Promise<string>; // Return a snapshot of the current DOM as a string, including expanded iframe body content when available.
|
|
|
|
evaluate<TResult, TArg>(pageFunction: PlaywrightEvaluateFunction<TArg, TResult>, arg?: TArg, options?: PlaywrightEvaluateOptions): Promise<TResult>; // Evaluate JavaScript in a read-only page scope.
|
|
expectNavigation<T>(action: () => Promise<T>, options: { timeoutMs?: number; url?: string; waitUntil?: LoadState }): Promise<T>; // Expect a navigation triggered by an action.
|
|
frameLocator(frameSelector: string): PlaywrightFrameLocator; // Create a frame-scoped locator builder.
|
|
getByLabel(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by label text within the page.
|
|
getByPlaceholder(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by placeholder text within the page.
|
|
getByRole(role: string, options: { exact?: boolean; name?: TextMatcher }): PlaywrightLocator; // Find elements by ARIA role within the page.
|
|
getByTestId(testId: string): PlaywrightLocator; // Find elements by test id within the page.
|
|
getByText(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by text within the page.
|
|
locator(selector: string): PlaywrightLocator; // Create a locator scoped to this tab.
|
|
waitForEvent(event: "download", options?: WaitForEventOptions): Promise<PlaywrightDownload>; // Wait for the next event on the page.
|
|
|
|
waitForLoadState(options: PageWaitForLoadStateOptions): Promise<void>; // Wait for the page to reach a specific load state.
|
|
waitForTimeout(timeoutMs: number): Promise<void>; // Wait for a fixed duration.
|
|
waitForURL(url: string, options: PageWaitForURLOptions): Promise<void>; // Wait for the page URL to match the provided value.
|
|
}
|
|
|
|
interface PlaywrightFrameLocator {
|
|
frameLocator(frameSelector: string): PlaywrightFrameLocator; // Create a locator scoped to a nested frame.
|
|
getByLabel(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by label within this frame.
|
|
getByPlaceholder(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by placeholder within this frame.
|
|
getByRole(role: string, options: { exact?: boolean; name?: TextMatcher }): PlaywrightLocator; // Find elements by ARIA role within this frame.
|
|
getByTestId(testId: string): PlaywrightLocator; // Find elements by test id within this frame.
|
|
getByText(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by text within this frame.
|
|
locator(selector: string): PlaywrightLocator; // Create a locator scoped to this frame.
|
|
}
|
|
|
|
interface PlaywrightLocator {
|
|
all(): Promise<Array<PlaywrightLocator>>; // Resolve to a list of locators for each matched element.
|
|
allTextContents(options: { timeoutMs?: number }): Promise<Array<string>>; // Return `textContent` for *all* elements matched by this locator.
|
|
and(locator: PlaywrightLocator): PlaywrightLocator; // Return a locator matching elements that satisfy both this locator and `locator`.
|
|
check(options: LocatorCheckOptions): Promise<void>; // Check a checkbox or switch-like control.
|
|
click(options: LocatorClickOptions): Promise<void>; // Click the element matched by this locator.
|
|
count(): Promise<number>; // Number of elements matching this locator.
|
|
dblclick(options: LocatorClickOptions): Promise<void>; // Double-click the element matched by this locator.
|
|
|
|
fill(value: string, options: { timeoutMs?: number }): Promise<void>; // Replace the element's value with the provided text.
|
|
filter(options: LocatorFilterOptions): PlaywrightLocator; // Narrow this locator by additional constraints.
|
|
first(): PlaywrightLocator; // Return a locator pointing at the first matched element.
|
|
getAttribute(name: string, options: { timeoutMs?: number }): Promise<null | string>; // Return an attribute value from the first matched element.
|
|
getByLabel(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by label text, scoped to this locator.
|
|
getByPlaceholder(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by placeholder text, scoped to this locator.
|
|
getByRole(role: string, options: { exact?: boolean; name?: TextMatcher }): PlaywrightLocator; // Find elements by ARIA role, scoped to this locator.
|
|
getByTestId(testId: string): PlaywrightLocator; // Find elements by test id, scoped to this locator.
|
|
getByText(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by text content, scoped to this locator.
|
|
innerText(options: { timeoutMs?: number }): Promise<string>; // Return the rendered (visible) text of the first matched element.
|
|
isEnabled(): Promise<boolean>; // Whether the first matched element is currently enabled.
|
|
isVisible(): Promise<boolean>; // Whether the first matched element is currently visible.
|
|
last(): PlaywrightLocator; // Return a locator pointing at the last matched element.
|
|
locator(selector: string, options: LocatorLocatorOptions): PlaywrightLocator; // Create a descendant locator scoped to this locator.
|
|
nth(index: number): PlaywrightLocator; // Return a locator pointing at the Nth matched element.
|
|
or(locator: PlaywrightLocator): PlaywrightLocator; // Return a locator matching elements that satisfy either this locator or `locator`.
|
|
press(value: string, options: { timeoutMs?: number }): Promise<void>; // Press a keyboard key while this locator is focused.
|
|
selectOption(value: SelectOptionInput | Array<SelectOptionInput>, options: { timeoutMs?: number }): Promise<void>; // Select one or more options on a native `<select>` element.
|
|
setChecked(checked: boolean, options: LocatorCheckOptions): Promise<void>; // Set a checkbox or switch-like control to a checked/unchecked state.
|
|
textContent(options: { timeoutMs?: number }): Promise<null | string>; // Return the raw textContent of the first matched element (or null if missing).
|
|
type(value: string, options: { timeoutMs?: number }): Promise<void>; // Type text into the element without clearing existing content.
|
|
uncheck(options: LocatorCheckOptions): Promise<void>; // Uncheck a checkbox or switch-like control.
|
|
waitFor(options: LocatorWaitForOptions): Promise<void>; // Wait for the element to reach a specific state.
|
|
}
|
|
|
|
interface PlaywrightDownload {
|
|
|
|
}
|
|
|
|
interface TabClipboardAPI {
|
|
read(): Promise<Array<TabClipboardItem>>; // Read clipboard items, including text and binary payloads.
|
|
readText(): Promise<string>; // Read plain text from the browser clipboard.
|
|
write(items: Array<TabClipboardItem>): Promise<void>; // Write clipboard items.
|
|
writeText(text: string): Promise<void>; // Write plain text to the browser clipboard.
|
|
}
|
|
|
|
interface TabDevAPI {
|
|
logs(options: TabDevLogsOptions): Promise<Array<TabDevLogEntry>>; // Read console log messages captured for this tab.
|
|
}
|
|
|
|
interface Documentation {
|
|
get(name: string): Promise<string>; // Read packaged documentation by its extensionless relative path.
|
|
}
|
|
|
|
interface BrowserInfo {
|
|
capabilities: ClientCapabilities;
|
|
id: string;
|
|
metadata?: Record<string, string>;
|
|
name: string;
|
|
type: ClientType;
|
|
}
|
|
|
|
type BrowserCapabilityCollection = {
|
|
get(id: string): Promise<unknown>;
|
|
list(): Promise<Array<{ id: string; description: string }>>;
|
|
};
|
|
|
|
interface BrowserUserTabInfo {
|
|
id: string; // Opaque identifier for this browser tab.
|
|
lastOpened?: string; // ISO 8601 timestamp for the last time the tab was opened or focused.
|
|
tabGroup?: string; // User-visible tab group name when the tab belongs to one.
|
|
title?: string; // User-visible tab title.
|
|
url?: string; // Current tab URL.
|
|
}
|
|
|
|
interface TabsContentOptions {
|
|
|
|
timeoutMs?: number; // Maximum time to wait for each page load, in milliseconds.
|
|
urls: Array<string>; // URLs to load in temporary background tabs.
|
|
}
|
|
|
|
interface TabsContentResult {
|
|
|
|
title: null | string; // The resolved page title when available.
|
|
url: string; // The resolved page URL when available, otherwise the requested URL.
|
|
}
|
|
|
|
interface FinalizeTabsOptions {
|
|
keep?: Array<FinalizeTabsKeep>; // Explicit tab dispositions to preserve after cleanup.
|
|
}
|
|
|
|
interface TabInfo {
|
|
id: string; // Metadata describing an open tab.
|
|
title?: string;
|
|
url?: string;
|
|
}
|
|
|
|
type TabCapabilityCollection = {
|
|
get(id: string): Promise<unknown>;
|
|
list(): Promise<Array<{ id: string; description: string }>>;
|
|
};
|
|
|
|
type ScreenshotOptions = {
|
|
clip?: ClipRect; // Crop to a specific rectangle instead of the full viewport.
|
|
fullPage?: boolean; // Capture the full page instead of the viewport.
|
|
};
|
|
|
|
type ClickOptions = {
|
|
button?: number; // Mouse button (1-left, 2-middle/wheel, 3-right, 4-back, 5-forward).
|
|
keypress?: Array<string>; // Modifier keys held during the click.
|
|
x: number;
|
|
y: number;
|
|
};
|
|
|
|
type DoubleClickOptions = {
|
|
keypress?: Array<string>; // Modifier keys held during the double click.
|
|
x: number;
|
|
y: number;
|
|
};
|
|
|
|
type DragOptions = {
|
|
keys?: Array<string>; // Optional modifier keys held during the drag.
|
|
path: Array<{ x: number; y: number }>; // Drag path as a list of points.
|
|
};
|
|
|
|
type KeypressOptions = {
|
|
keys: Array<string>; // Key combination to press.
|
|
};
|
|
|
|
type MoveOptions = {
|
|
keys?: Array<string>; // Optional modifier keys held while moving.
|
|
x: number;
|
|
y: number;
|
|
};
|
|
|
|
type ScrollOptions = {
|
|
keypress?: Array<string>; // Modifier keys held during scroll.
|
|
scrollX: number;
|
|
scrollY: number;
|
|
x: number;
|
|
y: number;
|
|
};
|
|
|
|
type TypeOptions = {
|
|
text: string;
|
|
};
|
|
|
|
type DomClickOptions = {
|
|
node_id: string; // Node id from `get_visible_dom()`.
|
|
};
|
|
|
|
type DomKeypressOptions = {
|
|
keys: Array<string>; // Key combination to press.
|
|
};
|
|
|
|
type DomScrollOptions = {
|
|
node_id?: string; // Optional node id to scroll within.
|
|
x: number; // Horizontal scroll delta.
|
|
y: number; // Vertical scroll delta.
|
|
};
|
|
|
|
type DomTypeOptions = {
|
|
text: string; // Text to type into the currently focused element.
|
|
};
|
|
|
|
type ElementInfoOptions = {
|
|
includeNonInteractable?: boolean; // When true, include non-interactable elements in addition to interactable targets.
|
|
x: number;
|
|
y: number;
|
|
};
|
|
|
|
type ElementInfo = {
|
|
ariaName?: string | null; // Accessible name if available.
|
|
boundingBox?: ElementInfoRect | null; // Element bounds in screenshot coordinates.
|
|
nodeId?: number | null; // Backend node id that can be passed to DOM-inspection APIs when available.
|
|
preview: string; // Compact human-readable node preview.
|
|
role?: string | null; // Computed ARIA role if available.
|
|
selector: ElementInfoSelector; // Suggested selector data for this element.
|
|
tagName: string; // Lowercased HTML tag name.
|
|
testId?: string | null; // Configured test id attribute if present.
|
|
visibleText?: string | null; // Rendered visible text, selected option text, or visible form value when available.
|
|
};
|
|
|
|
type ElementScreenshotOptions = {
|
|
includeNonInteractable?: boolean; // When true, highlight non-interactable elements in addition to interactable targets.
|
|
x: number;
|
|
y: number;
|
|
};
|
|
|
|
type PlaywrightEvaluateFunction<TArg, TResult> = string | (arg: TArg) => TResult | Promise<TResult>;
|
|
|
|
type PlaywrightEvaluateOptions = {
|
|
timeoutMs?: number; // Maximum time to spend setting up the read-only DOM scope and running the script.
|
|
};
|
|
|
|
type LoadState = "load" | "domcontentloaded" | "networkidle";
|
|
|
|
type TextMatcher = string | RegExp;
|
|
|
|
type WaitForEventOptions = {
|
|
timeoutMs?: number;
|
|
};
|
|
|
|
type PageWaitForLoadStateOptions = {
|
|
state?: LoadState;
|
|
timeoutMs?: number;
|
|
};
|
|
|
|
type PageWaitForURLOptions = {
|
|
timeoutMs?: number;
|
|
waitUntil?: WaitUntil;
|
|
};
|
|
|
|
type LocatorCheckOptions = {
|
|
force?: boolean;
|
|
timeoutMs?: number;
|
|
};
|
|
|
|
type LocatorClickOptions = {
|
|
button?: MouseButton;
|
|
force?: boolean;
|
|
modifiers?: Array<KeyboardModifier>;
|
|
timeoutMs?: number;
|
|
};
|
|
|
|
type LocatorFilterOptions = {
|
|
has?: PlaywrightLocator;
|
|
hasNot?: PlaywrightLocator;
|
|
hasNotText?: TextMatcher;
|
|
hasText?: TextMatcher;
|
|
visible?: boolean;
|
|
};
|
|
|
|
type LocatorLocatorOptions = {
|
|
has?: PlaywrightLocator;
|
|
hasNot?: PlaywrightLocator;
|
|
hasNotText?: TextMatcher;
|
|
hasText?: TextMatcher;
|
|
};
|
|
|
|
type SelectOptionInput = string | SelectOptionDescriptor;
|
|
|
|
type LocatorWaitForOptions = {
|
|
state: WaitForState;
|
|
timeoutMs?: number;
|
|
};
|
|
|
|
type TabClipboardItem = {
|
|
entries: Array<TabClipboardEntry>;
|
|
presentationStyle?: "unspecified" | "inline" | "attachment";
|
|
};
|
|
|
|
interface TabDevLogsOptions {
|
|
filter?: string; // Optional substring filter applied to the rendered log message.
|
|
levels?: Array<"debug" | "info" | "log" | "warn" | "error" | "warning">; // Optional levels to include.
|
|
limit?: number; // Maximum number of logs to return.
|
|
}
|
|
|
|
interface TabDevLogEntry {
|
|
level: "debug" | "info" | "log" | "warn" | "error"; // Console log level.
|
|
message: string; // Rendered log message text.
|
|
timestamp: string; // ISO 8601 timestamp for when the runtime captured the log.
|
|
url?: string; // Source URL reported by the browser runtime, when available.
|
|
}
|
|
|
|
interface ClientCapabilities {
|
|
browser?: Array<CapabilityInfo>;
|
|
tab?: Array<CapabilityInfo>;
|
|
}
|
|
|
|
type ClientType = "iab" | "extension" | "cdp";
|
|
|
|
type TabsContentType = "html" | "text" | "domSnapshot";
|
|
|
|
interface FinalizeTabsKeep {
|
|
status: FinalizeTabStatus; // Where the kept tab belongs after cleanup.
|
|
tab: string | Tab | TabInfo; // Tab object to keep open after browser cleanup.
|
|
}
|
|
|
|
type ClipRect = {
|
|
height: number;
|
|
width: number;
|
|
x: number;
|
|
y: number;
|
|
};
|
|
|
|
type ElementInfoRect = {
|
|
height: number;
|
|
width: number;
|
|
x: number;
|
|
y: number;
|
|
};
|
|
|
|
type ElementInfoSelector = {
|
|
candidates: Array<string>; // Ranked selector candidates for the element.
|
|
frameSelectors?: Array<string>; // Frame selectors to enter before using the element selector.
|
|
primary?: string | null; // The preferred selector for the element when available.
|
|
};
|
|
|
|
type WaitUntil = LoadState | "commit";
|
|
|
|
type MouseButton = "left" | "right" | "middle";
|
|
|
|
type KeyboardModifier = "Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift";
|
|
|
|
type SelectOptionDescriptor = {
|
|
index?: number;
|
|
label?: string;
|
|
value?: string;
|
|
};
|
|
|
|
type WaitForState = "attached" | "detached" | "visible" | "hidden";
|
|
|
|
type TabClipboardEntry = {
|
|
base64?: string;
|
|
mimeType: string;
|
|
text?: string;
|
|
};
|
|
|
|
interface CapabilityInfo {
|
|
description: string;
|
|
id: string;
|
|
}
|
|
|
|
type FinalizeTabStatus = "handoff" | "deliverable";
|
|
```
|