Merge pull request #80 from ScrapeGraphAI/feat/add-wait-ms-crawler

VinciGit00 · web-flow · commit 4ad9c25ad3af · 2026-02-17T08:41:33.000+01:00
feat: add wait_ms parameter to Crawler endpoint
diff --git a/scrapegraph-py/scrapegraph_py/async_client.py b/scrapegraph-py/scrapegraph_py/async_client.py
@@ -884,6 +884,7 @@ async def crawl(
         include_paths: Optional[list[str]] = None,
         exclude_paths: Optional[list[str]] = None,
         webhook_url: Optional[str] = None,
+        wait_ms: Optional[int] = None,
         return_toon: bool = False,
     ):
         """Send a crawl request with support for both AI extraction and
@@ -911,6 +912,7 @@ async def crawl(
             exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*'])
                           Supports wildcards and takes precedence over include_paths
             webhook_url: URL to receive webhook notifications when the crawl completes
+            wait_ms: Number of milliseconds to wait before scraping each page
             return_toon: If True, return response in TOON format (reduces token usage by 30-60%)
         """
         logger.info("🔍 Starting crawl request")
@@ -944,6 +946,8 @@ async def crawl(
             logger.debug(f"❌ Exclude paths: {exclude_paths}")
         if webhook_url:
             logger.debug(f"🔔 Webhook URL: {webhook_url}")
+        if wait_ms is not None:
+            logger.debug(f"⏱️ Wait ms: {wait_ms}")
         if return_toon:
             logger.debug("🎨 TOON format output enabled")
 
@@ -977,6 +981,8 @@ async def crawl(
             request_data["exclude_paths"] = exclude_paths
         if webhook_url is not None:
             request_data["webhook_url"] = webhook_url
+        if wait_ms is not None:
+            request_data["wait_ms"] = wait_ms
 
         request = CrawlRequest(**request_data)
         logger.debug("✅ Request validation passed")
diff --git a/scrapegraph-py/scrapegraph_py/client.py b/scrapegraph-py/scrapegraph_py/client.py
@@ -894,6 +894,7 @@ def crawl(
         include_paths: Optional[list[str]] = None,
         exclude_paths: Optional[list[str]] = None,
         webhook_url: Optional[str] = None,
+        wait_ms: Optional[int] = None,
         return_toon: bool = False,
     ):
         """Send a crawl request with support for both AI extraction and
@@ -921,6 +922,7 @@ def crawl(
             exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*'])
                           Supports wildcards and takes precedence over include_paths
             webhook_url: URL to receive webhook notifications when the crawl completes
+            wait_ms: Number of milliseconds to wait before scraping each page
             return_toon: If True, return response in TOON format (reduces token usage by 30-60%)
         """
         logger.info("🔍 Starting crawl request")
@@ -954,6 +956,8 @@ def crawl(
             logger.debug(f"❌ Exclude paths: {exclude_paths}")
         if webhook_url:
             logger.debug(f"🔔 Webhook URL: {webhook_url}")
+        if wait_ms is not None:
+            logger.debug(f"⏱️ Wait ms: {wait_ms}")
         if return_toon:
             logger.debug("🎨 TOON format output enabled")
 
@@ -987,6 +991,8 @@ def crawl(
             request_data["exclude_paths"] = exclude_paths
         if webhook_url is not None:
             request_data["webhook_url"] = webhook_url
+        if wait_ms is not None:
+            request_data["wait_ms"] = wait_ms
 
         request = CrawlRequest(**request_data)
         logger.debug("✅ Request validation passed")
diff --git a/scrapegraph-py/scrapegraph_py/models/crawl.py b/scrapegraph-py/scrapegraph_py/models/crawl.py
@@ -108,6 +108,11 @@ class CrawlRequest(BaseModel):
         "The webhook will receive a POST request with the crawl results.",
         example="https://example.com/webhook"
     )
+    wait_ms: Optional[int] = Field(
+        default=None,
+        description="Number of milliseconds to wait before scraping each page. "
+        "Useful for pages with heavy JavaScript rendering that need extra time to load.",
+    )
 
     @model_validator(mode="after")
     def validate_url(self) -> "CrawlRequest":