Skip to content

Commit 4ad9c25

Browse files
authored
Merge pull request #80 from ScrapeGraphAI/feat/add-wait-ms-crawler
feat: add wait_ms parameter to Crawler endpoint
2 parents 9eb21ac + 906c1e6 commit 4ad9c25

File tree

3 files changed

+17
-0
lines changed

3 files changed

+17
-0
lines changed

scrapegraph-py/scrapegraph_py/async_client.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,7 @@ async def crawl(
884884
include_paths: Optional[list[str]] = None,
885885
exclude_paths: Optional[list[str]] = None,
886886
webhook_url: Optional[str] = None,
887+
wait_ms: Optional[int] = None,
887888
return_toon: bool = False,
888889
):
889890
"""Send a crawl request with support for both AI extraction and
@@ -911,6 +912,7 @@ async def crawl(
911912
exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*'])
912913
Supports wildcards and takes precedence over include_paths
913914
webhook_url: URL to receive webhook notifications when the crawl completes
915+
wait_ms: Number of milliseconds to wait before scraping each page
914916
return_toon: If True, return response in TOON format (reduces token usage by 30-60%)
915917
"""
916918
logger.info("🔍 Starting crawl request")
@@ -944,6 +946,8 @@ async def crawl(
944946
logger.debug(f"❌ Exclude paths: {exclude_paths}")
945947
if webhook_url:
946948
logger.debug(f"🔔 Webhook URL: {webhook_url}")
949+
if wait_ms is not None:
950+
logger.debug(f"⏱️ Wait ms: {wait_ms}")
947951
if return_toon:
948952
logger.debug("🎨 TOON format output enabled")
949953

@@ -977,6 +981,8 @@ async def crawl(
977981
request_data["exclude_paths"] = exclude_paths
978982
if webhook_url is not None:
979983
request_data["webhook_url"] = webhook_url
984+
if wait_ms is not None:
985+
request_data["wait_ms"] = wait_ms
980986

981987
request = CrawlRequest(**request_data)
982988
logger.debug("✅ Request validation passed")

scrapegraph-py/scrapegraph_py/client.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,7 @@ def crawl(
894894
include_paths: Optional[list[str]] = None,
895895
exclude_paths: Optional[list[str]] = None,
896896
webhook_url: Optional[str] = None,
897+
wait_ms: Optional[int] = None,
897898
return_toon: bool = False,
898899
):
899900
"""Send a crawl request with support for both AI extraction and
@@ -921,6 +922,7 @@ def crawl(
921922
exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*'])
922923
Supports wildcards and takes precedence over include_paths
923924
webhook_url: URL to receive webhook notifications when the crawl completes
925+
wait_ms: Number of milliseconds to wait before scraping each page
924926
return_toon: If True, return response in TOON format (reduces token usage by 30-60%)
925927
"""
926928
logger.info("🔍 Starting crawl request")
@@ -954,6 +956,8 @@ def crawl(
954956
logger.debug(f"❌ Exclude paths: {exclude_paths}")
955957
if webhook_url:
956958
logger.debug(f"🔔 Webhook URL: {webhook_url}")
959+
if wait_ms is not None:
960+
logger.debug(f"⏱️ Wait ms: {wait_ms}")
957961
if return_toon:
958962
logger.debug("🎨 TOON format output enabled")
959963

@@ -987,6 +991,8 @@ def crawl(
987991
request_data["exclude_paths"] = exclude_paths
988992
if webhook_url is not None:
989993
request_data["webhook_url"] = webhook_url
994+
if wait_ms is not None:
995+
request_data["wait_ms"] = wait_ms
990996

991997
request = CrawlRequest(**request_data)
992998
logger.debug("✅ Request validation passed")

scrapegraph-py/scrapegraph_py/models/crawl.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ class CrawlRequest(BaseModel):
108108
"The webhook will receive a POST request with the crawl results.",
109109
example="https://example.com/webhook"
110110
)
111+
wait_ms: Optional[int] = Field(
112+
default=None,
113+
description="Number of milliseconds to wait before scraping each page. "
114+
"Useful for pages with heavy JavaScript rendering that need extra time to load.",
115+
)
111116

112117
@model_validator(mode="after")
113118
def validate_url(self) -> "CrawlRequest":

0 commit comments

Comments
 (0)