18 KiB
18 KiB
API Reference
Use this as the supported agent.browsers.* surface.
// Installed by setupBrowserRuntime({ globals: globalThis }).
const browser = await agent.browsers.get("iab");
interface Agent {
browsers: Browsers; // API for finding and selecting browsers.
documentation: Documentation; // API for reading packaged browser-use documentation by name.
}
interface Browsers {
get(id: string): Promise<Browser>; // Get a browser by id or client type.
list(): Promise<Array<BrowserInfo>>; // List available browsers.
}
interface Browser {
browserId: string; // Browser id selected by `agent.browsers.get()`.
capabilities: BrowserCapabilityCollection; // Browser-scoped optional capabilities advertised by the connected backend; discover IDs with `await browser.capabilities.list()`, then call `await (await browser.capabilities.get(id)).documentation()` for method details.
tabs: Tabs; // API for interacting with browser tabs.
user: BrowserUser; // Readonly context about tabs in the user's browser windows.
documentation(): Promise<string>; // Read browser guidance and the core API reference.
nameSession(name: string): Promise<void>; // Name the current browser automation session.
}
interface BrowserUser {
openTabs(): Promise<Array<BrowserUserTabInfo>>; // List open top-level tabs across the user's browser windows ordered by `lastOpened` descending.
}
interface Tabs {
get(id: string): Promise<Tab>; // Get a tab by id.
list(): Promise<Array<TabInfo>>; // List open tabs in the browser.
new(): Promise<Tab>; // Create and return a new tab in the browser.
selected(): Promise<undefined | Tab>; // Return the currently selected tab, if any.
}
interface Tab {
capabilities: TabCapabilityCollection; // Tab-scoped optional capabilities advertised by the connected backend; discover IDs with `await tab.capabilities.list()`, then call `await (await tab.capabilities.get(id)).documentation()` for method details.
clipboard: TabClipboardAPI; // API for interacting with clipboard content in this tab.
cua: CUAAPI; // API for interacting with the tab via the cua api
dev: TabDevAPI; // API for developer-oriented tab inspection.
dom_cua: DomCUAAPI; // API for interacting with the tab via the dom based cua api
id: string; // A tab's unique identifier
playwright: PlaywrightAPI; // API for interacting with the tab via the playwright api
back(): Promise<void>; // Navigate this tab back in history.
close(): Promise<void>; // Close this tab.
forward(): Promise<void>; // Navigate this tab forward in history.
goto(url: string): Promise<void>; // Open a URL in this tab.
reload(): Promise<void>; // Reload this tab.
screenshot(options: ScreenshotOptions): Promise<Uint8Array>; // Capture a screenshot of this tab.
title(): Promise<undefined | string>; // Get the current title for this tab.
url(): Promise<undefined | string>; // Get the current URL for this tab.
}
interface CUAAPI {
click(options: ClickOptions): Promise<void>; // Click at a coordinate in the current viewport.
double_click(options: DoubleClickOptions): Promise<void>; // Double click at a coordinate in the current viewport.
drag(options: DragOptions): Promise<void>; // Drag from a point to a point by the provided path.
keypress(options: KeypressOptions): Promise<void>; // Press control characters at the current focused element (focus it first via click/dblclick).
move(options: MoveOptions): Promise<void>; // Move the mouse to a point by the provided x and y coordinates.
scroll(options: ScrollOptions): Promise<void>; // Scroll by a delta from a specific viewport coordinate.
type(options: TypeOptions): Promise<void>; // Type text at the current focus.
}
interface DomCUAAPI {
click(options: DomClickOptions): Promise<void>; // Click a DOM node by its id from the visible DOM snapshot.
double_click(options: DomClickOptions): Promise<void>; // Double-click a DOM node by its id.
get_visible_dom(): Promise<unknown>; // Return a filtered DOM with node ids for interactable elements.
keypress(options: DomKeypressOptions): Promise<void>; // Press control characters at the currently focused element (focus it first via click/dblclick).
scroll(options: DomScrollOptions): Promise<void>; // Scroll either the page or a specific node (if node_id provided) by deltas.
type(options: DomTypeOptions): Promise<void>; // Type text into the currently focused element (focus via click first).
}
interface PlaywrightAPI {
domSnapshot(): Promise<string>; // Return a snapshot of the current DOM as a string, including expanded iframe body content when available.
evaluate<TResult, TArg>(pageFunction: PlaywrightEvaluateFunction<TArg, TResult>, arg?: TArg, options?: PlaywrightEvaluateOptions): Promise<TResult>; // Evaluate JavaScript in a read-only page scope.
expectNavigation<T>(action: () => Promise<T>, options: { timeoutMs?: number; url?: string; waitUntil?: LoadState }): Promise<T>; // Expect a navigation triggered by an action.
frameLocator(frameSelector: string): PlaywrightFrameLocator; // Create a frame-scoped locator builder.
getByLabel(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by label text within the page.
getByPlaceholder(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by placeholder text within the page.
getByRole(role: string, options: { exact?: boolean; name?: TextMatcher }): PlaywrightLocator; // Find elements by ARIA role within the page.
getByTestId(testId: string): PlaywrightLocator; // Find elements by test id within the page.
getByText(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by text within the page.
locator(selector: string): PlaywrightLocator; // Create a locator scoped to this tab.
waitForEvent(event: "download", options?: WaitForEventOptions): Promise<PlaywrightDownload>; // Wait for the next event on the page.
waitForLoadState(options: PageWaitForLoadStateOptions): Promise<void>; // Wait for the page to reach a specific load state.
waitForTimeout(timeoutMs: number): Promise<void>; // Wait for a fixed duration.
waitForURL(url: string, options: PageWaitForURLOptions): Promise<void>; // Wait for the page URL to match the provided value.
}
interface PlaywrightFrameLocator {
frameLocator(frameSelector: string): PlaywrightFrameLocator; // Create a locator scoped to a nested frame.
getByLabel(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by label within this frame.
getByPlaceholder(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by placeholder within this frame.
getByRole(role: string, options: { exact?: boolean; name?: TextMatcher }): PlaywrightLocator; // Find elements by ARIA role within this frame.
getByTestId(testId: string): PlaywrightLocator; // Find elements by test id within this frame.
getByText(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by text within this frame.
locator(selector: string): PlaywrightLocator; // Create a locator scoped to this frame.
}
interface PlaywrightLocator {
all(): Promise<Array<PlaywrightLocator>>; // Resolve to a list of locators for each matched element.
allTextContents(options: { timeoutMs?: number }): Promise<Array<string>>; // Return `textContent` for *all* elements matched by this locator.
and(locator: PlaywrightLocator): PlaywrightLocator; // Return a locator matching elements that satisfy both this locator and `locator`.
check(options: LocatorCheckOptions): Promise<void>; // Check a checkbox or switch-like control.
click(options: LocatorClickOptions): Promise<void>; // Click the element matched by this locator.
count(): Promise<number>; // Number of elements matching this locator.
dblclick(options: LocatorClickOptions): Promise<void>; // Double-click the element matched by this locator.
fill(value: string, options: { timeoutMs?: number }): Promise<void>; // Replace the element's value with the provided text.
filter(options: LocatorFilterOptions): PlaywrightLocator; // Narrow this locator by additional constraints.
first(): PlaywrightLocator; // Return a locator pointing at the first matched element.
getAttribute(name: string, options: { timeoutMs?: number }): Promise<null | string>; // Return an attribute value from the first matched element.
getByLabel(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by label text, scoped to this locator.
getByPlaceholder(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by placeholder text, scoped to this locator.
getByRole(role: string, options: { exact?: boolean; name?: TextMatcher }): PlaywrightLocator; // Find elements by ARIA role, scoped to this locator.
getByTestId(testId: string): PlaywrightLocator; // Find elements by test id, scoped to this locator.
getByText(text: TextMatcher, options: { exact?: boolean }): PlaywrightLocator; // Find elements by text content, scoped to this locator.
innerText(options: { timeoutMs?: number }): Promise<string>; // Return the rendered (visible) text of the first matched element.
isEnabled(): Promise<boolean>; // Whether the first matched element is currently enabled.
isVisible(): Promise<boolean>; // Whether the first matched element is currently visible.
last(): PlaywrightLocator; // Return a locator pointing at the last matched element.
locator(selector: string, options: LocatorLocatorOptions): PlaywrightLocator; // Create a descendant locator scoped to this locator.
nth(index: number): PlaywrightLocator; // Return a locator pointing at the Nth matched element.
or(locator: PlaywrightLocator): PlaywrightLocator; // Return a locator matching elements that satisfy either this locator or `locator`.
press(value: string, options: { timeoutMs?: number }): Promise<void>; // Press a keyboard key while this locator is focused.
selectOption(value: SelectOptionInput | Array<SelectOptionInput>, options: { timeoutMs?: number }): Promise<void>; // Select one or more options on a native `<select>` element.
setChecked(checked: boolean, options: LocatorCheckOptions): Promise<void>; // Set a checkbox or switch-like control to a checked/unchecked state.
textContent(options: { timeoutMs?: number }): Promise<null | string>; // Return the raw textContent of the first matched element (or null if missing).
type(value: string, options: { timeoutMs?: number }): Promise<void>; // Type text into the element without clearing existing content.
uncheck(options: LocatorCheckOptions): Promise<void>; // Uncheck a checkbox or switch-like control.
waitFor(options: LocatorWaitForOptions): Promise<void>; // Wait for the element to reach a specific state.
}
interface PlaywrightDownload {
}
interface TabClipboardAPI {
read(): Promise<Array<TabClipboardItem>>; // Read clipboard items, including text and binary payloads.
readText(): Promise<string>; // Read plain text from the browser clipboard.
write(items: Array<TabClipboardItem>): Promise<void>; // Write clipboard items.
writeText(text: string): Promise<void>; // Write plain text to the browser clipboard.
}
interface TabDevAPI {
logs(options: TabDevLogsOptions): Promise<Array<TabDevLogEntry>>; // Read console log messages captured for this tab.
}
interface Documentation {
get(name: string): Promise<string>; // Read packaged documentation by its extensionless relative path.
}
interface BrowserInfo {
capabilities: ClientCapabilities;
id: string;
metadata?: Record<string, string>;
name: string;
type: ClientType;
}
type BrowserCapabilityCollection = {
get(id: string): Promise<unknown>;
list(): Promise<Array<{ id: string; description: string }>>;
};
interface BrowserUserTabInfo {
id: string; // Opaque identifier for this browser tab.
lastOpened?: string; // ISO 8601 timestamp for the last time the tab was opened or focused.
tabGroup?: string; // User-visible tab group name when the tab belongs to one.
title?: string; // User-visible tab title.
url?: string; // Current tab URL.
}
interface TabsContentOptions {
timeoutMs?: number; // Maximum time to wait for each page load, in milliseconds.
urls: Array<string>; // URLs to load in temporary background tabs.
}
interface TabsContentResult {
title: null | string; // The resolved page title when available.
url: string; // The resolved page URL when available, otherwise the requested URL.
}
interface FinalizeTabsOptions {
keep?: Array<FinalizeTabsKeep>; // Explicit tab dispositions to preserve after cleanup.
}
interface TabInfo {
id: string; // Metadata describing an open tab.
title?: string;
url?: string;
}
type TabCapabilityCollection = {
get(id: string): Promise<unknown>;
list(): Promise<Array<{ id: string; description: string }>>;
};
type ScreenshotOptions = {
clip?: ClipRect; // Crop to a specific rectangle instead of the full viewport.
fullPage?: boolean; // Capture the full page instead of the viewport.
};
type ClickOptions = {
button?: number; // Mouse button (1-left, 2-middle/wheel, 3-right, 4-back, 5-forward).
keypress?: Array<string>; // Modifier keys held during the click.
x: number;
y: number;
};
type DoubleClickOptions = {
keypress?: Array<string>; // Modifier keys held during the double click.
x: number;
y: number;
};
type DragOptions = {
keys?: Array<string>; // Optional modifier keys held during the drag.
path: Array<{ x: number; y: number }>; // Drag path as a list of points.
};
type KeypressOptions = {
keys: Array<string>; // Key combination to press.
};
type MoveOptions = {
keys?: Array<string>; // Optional modifier keys held while moving.
x: number;
y: number;
};
type ScrollOptions = {
keypress?: Array<string>; // Modifier keys held during scroll.
scrollX: number;
scrollY: number;
x: number;
y: number;
};
type TypeOptions = {
text: string;
};
type DomClickOptions = {
node_id: string; // Node id from `get_visible_dom()`.
};
type DomKeypressOptions = {
keys: Array<string>; // Key combination to press.
};
type DomScrollOptions = {
node_id?: string; // Optional node id to scroll within.
x: number; // Horizontal scroll delta.
y: number; // Vertical scroll delta.
};
type DomTypeOptions = {
text: string; // Text to type into the currently focused element.
};
type ElementInfoOptions = {
includeNonInteractable?: boolean; // When true, include non-interactable elements in addition to interactable targets.
x: number;
y: number;
};
type ElementInfo = {
ariaName?: string | null; // Accessible name if available.
boundingBox?: ElementInfoRect | null; // Element bounds in screenshot coordinates.
nodeId?: number | null; // Backend node id that can be passed to DOM-inspection APIs when available.
preview: string; // Compact human-readable node preview.
role?: string | null; // Computed ARIA role if available.
selector: ElementInfoSelector; // Suggested selector data for this element.
tagName: string; // Lowercased HTML tag name.
testId?: string | null; // Configured test id attribute if present.
visibleText?: string | null; // Rendered visible text, selected option text, or visible form value when available.
};
type ElementScreenshotOptions = {
includeNonInteractable?: boolean; // When true, highlight non-interactable elements in addition to interactable targets.
x: number;
y: number;
};
type PlaywrightEvaluateFunction<TArg, TResult> = string | (arg: TArg) => TResult | Promise<TResult>;
type PlaywrightEvaluateOptions = {
timeoutMs?: number; // Maximum time to spend setting up the read-only DOM scope and running the script.
};
type LoadState = "load" | "domcontentloaded" | "networkidle";
type TextMatcher = string | RegExp;
type WaitForEventOptions = {
timeoutMs?: number;
};
type PageWaitForLoadStateOptions = {
state?: LoadState;
timeoutMs?: number;
};
type PageWaitForURLOptions = {
timeoutMs?: number;
waitUntil?: WaitUntil;
};
type LocatorCheckOptions = {
force?: boolean;
timeoutMs?: number;
};
type LocatorClickOptions = {
button?: MouseButton;
force?: boolean;
modifiers?: Array<KeyboardModifier>;
timeoutMs?: number;
};
type LocatorFilterOptions = {
has?: PlaywrightLocator;
hasNot?: PlaywrightLocator;
hasNotText?: TextMatcher;
hasText?: TextMatcher;
visible?: boolean;
};
type LocatorLocatorOptions = {
has?: PlaywrightLocator;
hasNot?: PlaywrightLocator;
hasNotText?: TextMatcher;
hasText?: TextMatcher;
};
type SelectOptionInput = string | SelectOptionDescriptor;
type LocatorWaitForOptions = {
state: WaitForState;
timeoutMs?: number;
};
type TabClipboardItem = {
entries: Array<TabClipboardEntry>;
presentationStyle?: "unspecified" | "inline" | "attachment";
};
interface TabDevLogsOptions {
filter?: string; // Optional substring filter applied to the rendered log message.
levels?: Array<"debug" | "info" | "log" | "warn" | "error" | "warning">; // Optional levels to include.
limit?: number; // Maximum number of logs to return.
}
interface TabDevLogEntry {
level: "debug" | "info" | "log" | "warn" | "error"; // Console log level.
message: string; // Rendered log message text.
timestamp: string; // ISO 8601 timestamp for when the runtime captured the log.
url?: string; // Source URL reported by the browser runtime, when available.
}
interface ClientCapabilities {
browser?: Array<CapabilityInfo>;
tab?: Array<CapabilityInfo>;
}
type ClientType = "iab" | "extension" | "cdp";
type TabsContentType = "html" | "text" | "domSnapshot";
interface FinalizeTabsKeep {
status: FinalizeTabStatus; // Where the kept tab belongs after cleanup.
tab: string | Tab | TabInfo; // Tab object to keep open after browser cleanup.
}
type ClipRect = {
height: number;
width: number;
x: number;
y: number;
};
type ElementInfoRect = {
height: number;
width: number;
x: number;
y: number;
};
type ElementInfoSelector = {
candidates: Array<string>; // Ranked selector candidates for the element.
frameSelectors?: Array<string>; // Frame selectors to enter before using the element selector.
primary?: string | null; // The preferred selector for the element when available.
};
type WaitUntil = LoadState | "commit";
type MouseButton = "left" | "right" | "middle";
type KeyboardModifier = "Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift";
type SelectOptionDescriptor = {
index?: number;
label?: string;
value?: string;
};
type WaitForState = "attached" | "detached" | "visible" | "hidden";
type TabClipboardEntry = {
base64?: string;
mimeType: string;
text?: string;
};
interface CapabilityInfo {
description: string;
id: string;
}
type FinalizeTabStatus = "handoff" | "deliverable";