diff --git a/CHANGELOG.md b/CHANGELOG.md index ba11f45d..3644ad38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +## [1.62.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.61.0...v1.62.0) (2025-08-13) + + +### Features + +* update pr ([c07b3c0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c07b3c08cd6a87c3f7acd2d4d560b7a17d6c02eb)) + + +### Docs + +* removed duplicated line ([c2abb9f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c2abb9fd5df9b5b3a1d9158a2b607f9646c9211d)) + ## [1.61.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.60.0...v1.61.0) (2025-07-03) diff --git a/README.md b/README.md index f8a5cbad..6f2586c4 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [English](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/README.md) | [中文](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/chinese.md) | [日本語](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/japanese.md) | [한국어](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/korean.md) | [Русский](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/russian.md) | [Türkçe](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/turkish.md) -| [Deutsch](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=de) +| [Deutsch](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=de) | [Español](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=es) | [français](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=fr) | [Português](https://www.readme-i18n.com/ScrapeGraphAI/Scrapegraph-ai?lang=pt) @@ -38,13 +38,11 @@ ScrapeGraphAI offers seamless integration with popular frameworks and tools to e You can find more informations at the following [link](https://scrapegraphai.com) -**Integrations**: - **Integrations**: - **API**: [Documentation](https://docs.scrapegraphai.com/introduction) -- **SDKs**: [Python](https://docs.scrapegraphai.com/sdks/python), [Node](https://docs.scrapegraphai.com/sdks/javascript) +- **SDKs**: [Python](https://docs.scrapegraphai.com/sdks/python), [Node](https://docs.scrapegraphai.com/sdks/javascript) - **LLM Frameworks**: [Langchain](https://docs.scrapegraphai.com/integrations/langchain), [Llama Index](https://docs.scrapegraphai.com/integrations/llamaindex), [Crew.ai](https://docs.scrapegraphai.com/integrations/crewai), [Agno](https://docs.scrapegraphai.com/integrations/agno), [CamelAI](https://github.com/camel-ai/camel) -- **Low-code Frameworks**: [Pipedream](https://pipedream.com/apps/scrapegraphai), [Bubble](https://bubble.io/plugin/scrapegraphai-1745408893195x213542371433906180), [Zapier](https://zapier.com/apps/scrapegraphai/integrations), [n8n](http://localhost:5001/dashboard), [Dify](https://dify.ai) +- **Low-code Frameworks**: [Pipedream](https://pipedream.com/apps/scrapegraphai), [Bubble](https://bubble.io/plugin/scrapegraphai-1745408893195x213542371433906180), [Zapier](https://zapier.com/apps/scrapegraphai/integrations), [n8n](http://localhost:5001/dashboard), [Dify](https://dify.ai), [Toolhouse](https://app.toolhouse.ai/mcp-servers/scrapegraph_smartscraper) - **MCP server**: [Link](https://smithery.ai/server/@ScrapeGraphAI/scrapegraph-mcp) ## 🚀 Quick install @@ -191,7 +189,6 @@ The Official API Documentation can be found [here](https://docs.scrapegraphai.co ## 📈 Telemetry We collect anonymous usage metrics to enhance our package's quality and user experience. The data helps us prioritize improvements and ensure compatibility. If you wish to opt-out, set the environment variable SCRAPEGRAPHAI_TELEMETRY_ENABLED=false. For more information, please refer to the documentation [here](https://scrapegraph-ai.readthedocs.io/en/latest/scrapers/telemetry.html). - ## ❤️ Contributors [![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) diff --git a/pyproject.toml b/pyproject.toml index 5e6898df..ed00c5db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "scrapegraphai" -version = "1.61.0" +version = "1.62.0" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index 16964d84..8cf11f35 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -2,10 +2,14 @@ SmartScraperGraph Module """ +import logging from typing import Optional, Type from pydantic import BaseModel +# Initialize logger +logger = logging.getLogger(__name__) + from ..nodes import ( ConditionalNode, FetchNode, @@ -92,9 +96,12 @@ def _create_graph(self) -> BaseGraph: user_prompt=self.prompt, ) - # Print the response - print(f"Request ID: {response['request_id']}") - print(f"Result: {response['result']}") + # Use logging instead of print for better production practices + if 'request_id' in response and 'result' in response: + logger.info(f"Request ID: {response['request_id']}") + logger.info(f"Result: {response['result']}") + else: + logger.warning("Missing expected keys in response.") sgai_client.close() diff --git a/scrapegraphai/utils/__init__.py b/scrapegraphai/utils/__init__.py index df9118c1..3a4e7b13 100644 --- a/scrapegraphai/utils/__init__.py +++ b/scrapegraphai/utils/__init__.py @@ -43,7 +43,7 @@ from .proxy_rotation import Proxy, parse_or_search_proxy, search_proxy_servers from .save_audio_from_bytes import save_audio_from_bytes from .save_code_to_file import save_code_to_file -from .schema_trasform import transform_schema +from .schema_trasform import transform_schema # Note: filename has typo but kept for compatibility from .screenshot_scraping.screenshot_preparation import ( crop_image, select_area_with_ipywidget, diff --git a/scrapegraphai/utils/schema_trasform.py b/scrapegraphai/utils/schema_trasform.py index 8ac968a0..91f4c033 100644 --- a/scrapegraphai/utils/schema_trasform.py +++ b/scrapegraphai/utils/schema_trasform.py @@ -1,5 +1,5 @@ """ -This utility function trasfrom the pydantic schema into a more comprehensible schema. +This utility function transforms the pydantic schema into a more comprehensible schema. """ @@ -19,15 +19,20 @@ def process_properties(properties): for key, value in properties.items(): if "type" in value: if value["type"] == "array": - if "$ref" in value["items"]: + if "items" in value and "$ref" in value["items"]: ref_key = value["items"]["$ref"].split("/")[-1] - result[key] = [ - process_properties( - pydantic_schema["$defs"][ref_key]["properties"] - ) - ] - else: + if "$defs" in pydantic_schema and ref_key in pydantic_schema["$defs"]: + result[key] = [ + process_properties( + pydantic_schema["$defs"][ref_key].get("properties", {}) + ) + ] + else: + result[key] = ["object"] # fallback for missing reference + elif "items" in value and "type" in value["items"]: result[key] = [value["items"]["type"]] + else: + result[key] = ["unknown"] # fallback for malformed array else: result[key] = { "type": value["type"], @@ -35,9 +40,14 @@ def process_properties(properties): } elif "$ref" in value: ref_key = value["$ref"].split("/")[-1] - result[key] = process_properties( - pydantic_schema["$defs"][ref_key]["properties"] - ) + if "$defs" in pydantic_schema and ref_key in pydantic_schema["$defs"]: + result[key] = process_properties( + pydantic_schema["$defs"][ref_key].get("properties", {}) + ) + else: + result[key] = {"type": "object", "description": "Missing reference"} # fallback return result + if "properties" not in pydantic_schema: + raise ValueError("Invalid pydantic schema: missing 'properties' key") return process_properties(pydantic_schema["properties"])