diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..5894edd --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,48 @@ +name: CI +on: + push: + branches-ignore: + - 'generated' + - 'codegen/**' + - 'integrated/**' + - 'stl-preview-head/**' + - 'stl-preview-base/**' + pull_request: + branches-ignore: + - 'stl-preview-head/**' + - 'stl-preview-base/**' + +jobs: + lint: + timeout-minutes: 10 + name: lint + runs-on: ${{ github.repository == 'stainless-sdks/scrapegraphai-ruby' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} + if: github.event_name == 'push' || github.event.pull_request.head.repo.fork + + steps: + - uses: actions/checkout@v4 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + bundler-cache: false + - run: |- + bundle install + + - name: Run lints + run: ./scripts/lint + test: + timeout-minutes: 10 + name: test + runs-on: ${{ github.repository == 'stainless-sdks/scrapegraphai-ruby' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} + if: github.event_name == 'push' || github.event.pull_request.head.repo.fork + steps: + - uses: actions/checkout@v4 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + bundler-cache: false + - run: |- + bundle install + + - name: Run tests + run: ./scripts/test diff --git a/.github/workflows/publish-gem.yml b/.github/workflows/publish-gem.yml new file mode 100644 index 0000000..db6ea75 --- /dev/null +++ b/.github/workflows/publish-gem.yml @@ -0,0 +1,31 @@ +# This workflow is triggered when a GitHub release is created. +# It can also be run manually to re-publish to rubygems.org in case it failed for some reason. +# You can run this workflow by navigating to https://www.github.com/ScrapeGraphAI/scrapegraphai-ruby/actions/workflows/publish-gem.yml +name: Publish Gem +on: + workflow_dispatch: + + release: + types: [published] + +jobs: + publish: + name: publish + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + bundler-cache: false + - run: |- + bundle install + + - name: Publish to RubyGems.org + run: | + bash ./bin/publish-gem + env: + # `RUBYGEMS_HOST` is only required for private gem repositories, not https://rubygems.org + RUBYGEMS_HOST: ${{ secrets.SCRAPEGRAPHAI_RUBYGEMS_HOST || secrets.RUBYGEMS_HOST }} + GEM_HOST_API_KEY: ${{ secrets.SCRAPEGRAPHAI_GEM_HOST_API_KEY || secrets.GEM_HOST_API_KEY }} diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml new file mode 100644 index 0000000..c8e8fe8 --- /dev/null +++ b/.github/workflows/release-doctor.yml @@ -0,0 +1,22 @@ +name: Release Doctor +on: + pull_request: + branches: + - main + workflow_dispatch: + +jobs: + release_doctor: + name: release doctor + runs-on: ubuntu-latest + if: github.repository == 'ScrapeGraphAI/scrapegraphai-ruby' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next') + + steps: + - uses: actions/checkout@v4 + + - name: Check release environment + run: | + bash ./bin/check-release-environment + env: + RUBYGEMS_HOST: ${{ secrets.SCRAPEGRAPHAI_RUBYGEMS_HOST || secrets.RUBYGEMS_HOST }} + GEM_HOST_API_KEY: ${{ secrets.SCRAPEGRAPHAI_GEM_HOST_API_KEY || secrets.GEM_HOST_API_KEY }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3d26cee --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +*.gem +.idea/ +.ignore +.prism.log +.ruby-lsp/ +.yardoc/ +bin/tapioca +Brewfile.lock.json +doc/ +sorbet/tapioca/* diff --git a/.release-please-manifest.json b/.release-please-manifest.json new file mode 100644 index 0000000..1332969 --- /dev/null +++ b/.release-please-manifest.json @@ -0,0 +1,3 @@ +{ + ".": "0.0.1" +} \ No newline at end of file diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..5707b00 --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,296 @@ +# yaml-language-server: $schema=https://www.rubyschema.org/rubocop.json +--- +# Explicitly disable pending cops for now. This is the default behaviour but +# this avoids a large warning every time we run it. +# Stop RuboCop nagging about rubocop-rake. +# Ensure that RuboCop validates according to the lowest version of Ruby that we support. +AllCops: + Exclude: + - "bin/*" + NewCops: enable + SuggestExtensions: false + TargetRubyVersion: 3.2 + +# Whether MFA is required or not should be left to the token configuration. +Gemspec/RequireMFA: + Enabled: false + +# Don't require this extra line break, it can be excessive. +Layout/EmptyLineAfterGuardClause: + Enabled: false + +# Don't leave complex assignment values hanging off to the right. +Layout/EndAlignment: + EnforcedStyleAlignWith: variable + +Layout/FirstArrayElementLineBreak: + Enabled: true + +Layout/FirstHashElementLineBreak: + Enabled: true + +Layout/FirstMethodArgumentLineBreak: + Enabled: true + +Layout/FirstMethodParameterLineBreak: + Enabled: true + +# Set a reasonable line length; rely on other cops to correct long lines. +Layout/LineLength: + AllowedPatterns: + - "^\\s*#.*$" + - ^require(_relative)? + - "Scrapegraphai::Internal::Type::BaseModel$" + - "^\\s*[A-Z0-9_]+ = :" + - "Scrapegraphai::(Models|Resources|Test)::" + Max: 110 + +Layout/MultilineArrayLineBreaks: + Enabled: true + +# Start the assignment on the same line variable is mentioned. +Layout/MultilineAssignmentLayout: + EnforcedStyle: same_line + +Layout/MultilineHashKeyLineBreaks: + Enabled: true + +Layout/MultilineMethodArgumentLineBreaks: + Enabled: true + +Layout/MultilineMethodParameterLineBreaks: + Enabled: true + +# Prefer compact hash literals. +Layout/SpaceInsideHashLiteralBraces: + EnforcedStyle: no_space + Exclude: + - "**/*.rbi" + +Lint/BooleanSymbol: + Enabled: false + +# This option occasionally mangles identifier names +Lint/DeprecatedConstants: + Exclude: + - "**/*.rbi" + +# We use pattern assertion in tests to ensure correctness. +Lint/DuplicateMatchPattern: + Exclude: + - "test/**/*" + +# Fairly useful in tests for pattern assertions. +Lint/EmptyInPattern: + Exclude: + - "test/**/*" + +Lint/MissingCopEnableDirective: + Exclude: + - "examples/**/*.rb" + +Lint/MissingSuper: + Exclude: + - "**/*.rbi" + +Lint/SymbolConversion: + Exclude: + - "**/*.rbi" + +# Disabled for safety reasons, this option changes code semantics. +Lint/UnusedMethodArgument: + AutoCorrect: false + +# This option is prone to causing accidental bugs. +Lint/UselessAssignment: + AutoCorrect: false + Exclude: + - "examples/**/*.rb" + +Metrics/AbcSize: + Enabled: false + +Metrics/BlockLength: + AllowedPatterns: + - assert_pattern + - type_alias + - define_sorbet_constant! + Exclude: + - "**/*.rbi" + +Metrics/ClassLength: + Enabled: false + +Metrics/CollectionLiteralLength: + Exclude: + - "test/**/*" + +Metrics/CyclomaticComplexity: + Enabled: false + +Metrics/MethodLength: + Enabled: false + +Metrics/ModuleLength: + Enabled: false + +Metrics/ParameterLists: + Enabled: false + +Metrics/PerceivedComplexity: + Enabled: false + +Naming/AccessorMethodName: + Enabled: false + +# Need to preserve block identifier for documentation. +Naming/BlockForwarding: + Enabled: false + +# Underscores are generally useful for disambiguation. +Naming/ClassAndModuleCamelCase: + Enabled: false + +Naming/MethodParameterName: + Enabled: false + +Naming/PredicatePrefix: + Exclude: + - "**/*.rbi" + +Naming/VariableNumber: + Enabled: false + +# Nothing wrong with inline private methods. +Style/AccessModifierDeclarations: + Enabled: false + +Style/AccessorGrouping: + Exclude: + - "**/*.rbi" + +# Behaviour of alias_method is more predictable. +Style/Alias: + EnforcedStyle: prefer_alias_method + +# And/or have confusing precedence, avoid them. +Style/AndOr: + EnforcedStyle: always + +Style/ArgumentsForwarding: + Enabled: false + +Style/BisectedAttrAccessor: + Exclude: + - "**/*.rbi" + +# We prefer nested modules in lib/, but are currently using compact style for tests. +Style/ClassAndModuleChildren: + Exclude: + - "test/**/*" + +Style/CommentAnnotation: + Enabled: false + +# We should go back and add these docs, but ignore for now. +Style/Documentation: + Enabled: false + +# Allow explicit empty elses, for clarity. +Style/EmptyElse: + Enabled: false + +Style/EmptyMethod: + Exclude: + - "**/*.rbi" + +# We commonly use ENV['KEY'], it's OK. +Style/FetchEnvVar: + Enabled: false + +# Just to be safe, ensure nobody is mutating our internal strings. +Style/FrozenStringLiteralComment: + EnforcedStyle: always + Exclude: + - "**/*.rbi" + +# Nothing wrong with clear if statements. +Style/IfUnlessModifier: + Enabled: false + +# Rubocop is pretty bad about mangling single line lambdas. +Style/Lambda: + Enabled: false + +# Prefer consistency in method calling syntax. +Style/MethodCallWithArgsParentheses: + AllowedMethods: + - raise + Enabled: true + Exclude: + - "**/*.gemspec" + +Style/MultilineBlockChain: + Enabled: false + +# Perfectly fine. +Style/MultipleComparison: + Enabled: false + +Style/MutableConstant: + Exclude: + - "**/*.rbi" + +# Not all parameters should be named. +Style/NumberedParameters: + Enabled: false + +Style/NumberedParametersLimit: + Max: 2 + +# Reasonable to use brackets for errors with long messages. +Style/RaiseArgs: + Enabled: false + +# Be explicit about `RuntimeError`s. +Style/RedundantException: + Enabled: false + +Style/RedundantInitialize: + Exclude: + - "**/*.rbi" + +Style/RedundantParentheses: + Exclude: + - "**/*.rbi" + +# Prefer slashes for regex literals. +Style/RegexpLiteral: + EnforcedStyle: slashes + +# Allow explicit ifs, especially for imperative use. +Style/SafeNavigation: + Enabled: false + +Style/SignalException: + Exclude: + - Rakefile + - "**/*.rake" + +# We use these sparingly, where we anticipate future branches for the +# inner conditional. +Style/SoleNestedConditional: + Enabled: false + +# Prefer double quotes so that interpolation can be easily added. +Style/StringLiterals: + EnforcedStyle: double_quotes + +# Prefer explicit symbols for clarity; you can search for `:the_symbol`. +Style/SymbolArray: + EnforcedStyle: brackets + +# This option makes examples harder to read for ruby novices. +Style/SymbolProc: + Exclude: + - "examples/**/*.rb" diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 0000000..944880f --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +3.2.0 diff --git a/.solargraph.yml b/.solargraph.yml new file mode 100644 index 0000000..18edb2a --- /dev/null +++ b/.solargraph.yml @@ -0,0 +1,11 @@ +--- +max_files: 0 +include: + - '*.gemspec' + - 'Rakefile' + - 'examples/**/*.rb' + - 'lib/**/*.rb' + - 'test/scrapegraphai/resource_namespaces.rb' + - 'test/scrapegraphai/test_helper.rb' +exclude: + - 'rbi/**/*' diff --git a/.stats.yml b/.stats.yml new file mode 100644 index 0000000..6804ffb --- /dev/null +++ b/.stats.yml @@ -0,0 +1,4 @@ +configured_endpoints: 15 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/scrapegraphai%2Fscrapegraphai-633fdeab6abaefbe666099e8f86ce6b2acc9dacff1c33a80813bb04e8e437229.yml +openapi_spec_hash: f41ec90694ca8e7233bd20cc7ff1afbf +config_hash: 6889576ba0fdc14f2c71cea09a60a0f6 diff --git a/.yardopts b/.yardopts new file mode 100644 index 0000000..84c12f2 --- /dev/null +++ b/.yardopts @@ -0,0 +1,6 @@ +--type-name-tag generic:Generic +--default-return void +--markup markdown +--markup-provider redcarpet +--exclude /rbi +--exclude /sig diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..ec5addf --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,125 @@ +## Setting up the environment + +This repository contains a `.ruby-version` file, which should work with either [rbenv](https://github.com/rbenv/rbenv) or [asdf](https://github.com/asdf-vm/asdf) with the [ruby plugin](https://github.com/asdf-vm/asdf-ruby). + +Please follow the instructions for your preferred version manager to install the Ruby version specified in the `.ruby-version` file. + +To set up the repository, run: + +```bash +$ ./scripts/bootstrap +``` + +This will install all the required dependencies. + +## Modifying/Adding code + +Most of the SDK is generated code. Modifications to code will be persisted between generations, but may result in merge conflicts between manual patches and changes from the generator. The generator will never modify the contents of `lib/scrapegraphai/helpers/` and `examples/` directory. + +## Adding and running examples + +All files in the `examples/` directory are not modified by the generator and can be freely edited or added to. + +```ruby +#!/usr/bin/env ruby +# frozen_string_literal: true + +require_relative "../lib/scrapegraphai" + +# ... +``` + +```bash +$ chmod +x './examples/.rb' + +# run the example against your api +$ ruby './examples/.rb' +``` + +## Using the repository from source + +If you’d like to use the repository from source, you can either install from git or reference a cloned repository: + +To install via git in your `Gemfile`: + +```ruby +gem "scrapegraphai", git: "https://www.github.com/ScrapeGraphAI/scrapegraphai-ruby" +``` + +Alternatively, reference local copy of the repo: + +```bash +$ git clone -- 'https://www.github.com/ScrapeGraphAI/scrapegraphai-ruby' '' +``` + +```ruby +gem "scrapegraphai", path: "" +``` + +## Running commands + +Running `rake` by itself will show all runnable commands. + +```bash +$ bundle exec rake +``` + +## Running tests + +Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests. + +```bash +$ npx prism mock path/to/your/openapi.yml +``` + +```bash +$ bundle exec rake test +``` + +## Linting and formatting + +This repository uses [rubocop](https://github.com/rubocop/rubocop) for linting and formatting of `*.rb` files; And [syntax_tree](https://github.com/ruby-syntax-tree/syntax_tree) is used for formatting of both `*.rbi` and `*.rbs` files. + +There are two separate type checkers supported by this library: [sorbet](https://github.com/sorbet/sorbet) and [steep](https://github.com/soutaro/steep) are used for verifying `*.rbi` and `*.rbs` files respectively. + +To lint and typecheck: + +```bash +$ bundle exec rake lint +``` + +To format and fix all lint issues automatically: + +```bash +$ bundle exec rake format +``` + +## Editor Support + +### Ruby LSP + +[Ruby LSP](https://github.com/Shopify/ruby-lsp) has quite good support for go to definition, but not auto-completion. + +This can be installed along side Solargraph. + +### Solargraph + +[Solargraph](https://solargraph.org) has quite good support for auto-completion, but not go to definition. + +This can be installed along side Ruby LSP. + +### Sorbet + +[Sorbet](https://sorbet.org) should mostly work out of the box when editing this library directly. However, there are a some caveats due to the colocation of `*.rb` and `*.rbi` files in the same project. These issues should not otherwise manifest when this library is used as a dependency. + +1. For go to definition usages, sorbet might get confused and may not always navigate to the correct location. + +2. For each generic type in `*.rbi` files, a spurious "Duplicate type member" error is present. + +## Documentation Preview + +To preview the documentation, run: + +```bash +$ bundle exec rake docs:preview [PORT=8808] +``` diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..0d76364 --- /dev/null +++ b/Gemfile @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +gemspec + +group :development do + gem "rake" + gem "rbs" + gem "rubocop" + gem "sorbet" + gem "steep" + gem "syntax_tree" + # TODO: using a fork for now, the prettier below has a bug + gem "syntax_tree-rbs", github: "stainless-api/syntax_tree-rbs", branch: "main" + gem "tapioca" +end + +group :development, :test do + gem "async" + gem "minitest" + gem "minitest-focus" + gem "minitest-hooks" + gem "minitest-proveit" + gem "minitest-rg" + gem "webmock" +end + +group :development, :docs do + gem "redcarpet" + gem "webrick" + gem "yard" +end diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..3d0f47c --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,226 @@ +GIT + remote: https://github.com/stainless-api/syntax_tree-rbs.git + revision: c30b50219918be7cfe3ef803a00b59d1e77fcada + branch: main + specs: + syntax_tree-rbs (1.0.0) + prettier_print + rbs + syntax_tree (>= 2.0.1) + +PATH + remote: . + specs: + scrapegraphai (0.0.1) + connection_pool + +GEM + remote: https://rubygems.org/ + specs: + activesupport (8.0.2.1) + base64 + benchmark (>= 0.3) + bigdecimal + concurrent-ruby (~> 1.0, >= 1.3.1) + connection_pool (>= 2.2.5) + drb + i18n (>= 1.6, < 2) + logger (>= 1.4.2) + minitest (>= 5.1) + securerandom (>= 0.3) + tzinfo (~> 2.0, >= 2.0.5) + uri (>= 0.13.1) + addressable (2.8.7) + public_suffix (>= 2.0.2, < 7.0) + ast (2.4.3) + async (2.27.3) + console (~> 1.29) + fiber-annotation + io-event (~> 1.11) + metrics (~> 0.12) + traces (~> 0.15) + base64 (0.3.0) + benchmark (0.4.1) + bigdecimal (3.2.2) + concurrent-ruby (1.3.5) + connection_pool (2.5.3) + console (1.33.0) + fiber-annotation + fiber-local (~> 1.1) + json + crack (1.0.0) + bigdecimal + rexml + csv (3.3.5) + drb (2.2.3) + erubi (1.13.1) + ffi (1.17.2-aarch64-linux-gnu) + ffi (1.17.2-aarch64-linux-musl) + ffi (1.17.2-arm64-darwin) + ffi (1.17.2-x86_64-darwin) + ffi (1.17.2-x86_64-linux-gnu) + ffi (1.17.2-x86_64-linux-musl) + fiber-annotation (0.2.0) + fiber-local (1.1.0) + fiber-storage + fiber-storage (1.0.1) + fileutils (1.7.3) + hashdiff (1.2.0) + i18n (1.14.7) + concurrent-ruby (~> 1.0) + io-event (1.11.2) + json (2.13.2) + language_server-protocol (3.17.0.5) + lint_roller (1.1.0) + listen (3.9.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + logger (1.7.0) + metrics (0.13.0) + minitest (5.25.5) + minitest-focus (1.4.0) + minitest (>= 4, < 6) + minitest-hooks (1.5.2) + minitest (> 5.3) + minitest-proveit (1.0.0) + minitest (> 5, < 7) + minitest-rg (5.3.0) + minitest (~> 5.0) + mutex_m (0.3.0) + netrc (0.11.0) + parallel (1.27.0) + parser (3.3.9.0) + ast (~> 2.4.1) + racc + prettier_print (1.2.1) + prism (1.4.0) + public_suffix (6.0.2) + racc (1.8.1) + rainbow (3.1.1) + rake (13.3.0) + rb-fsevent (0.11.2) + rb-inotify (0.11.1) + ffi (~> 1.0) + rbi (0.3.6) + prism (~> 1.0) + rbs (>= 3.4.4) + rbs (3.9.4) + logger + redcarpet (3.6.1) + regexp_parser (2.11.2) + rexml (3.4.1) + rubocop (1.79.2) + json (~> 2.3) + language_server-protocol (~> 3.17.0.2) + lint_roller (~> 1.1.0) + parallel (~> 1.10) + parser (>= 3.3.0.2) + rainbow (>= 2.2.2, < 4.0) + regexp_parser (>= 2.9.3, < 3.0) + rubocop-ast (>= 1.46.0, < 2.0) + ruby-progressbar (~> 1.7) + unicode-display_width (>= 2.4.0, < 4.0) + rubocop-ast (1.46.0) + parser (>= 3.3.7.2) + prism (~> 1.4) + ruby-progressbar (1.13.0) + securerandom (0.4.1) + sorbet (0.5.12424) + sorbet-static (= 0.5.12424) + sorbet-runtime (0.5.12424) + sorbet-static (0.5.12424-aarch64-linux) + sorbet-static (0.5.12424-universal-darwin) + sorbet-static (0.5.12424-x86_64-linux) + sorbet-static-and-runtime (0.5.12424) + sorbet (= 0.5.12424) + sorbet-runtime (= 0.5.12424) + spoom (1.6.3) + erubi (>= 1.10.0) + prism (>= 0.28.0) + rbi (>= 0.3.3) + rexml (>= 3.2.6) + sorbet-static-and-runtime (>= 0.5.10187) + thor (>= 0.19.2) + steep (1.10.0) + activesupport (>= 5.1) + concurrent-ruby (>= 1.1.10) + csv (>= 3.0.9) + fileutils (>= 1.1.0) + json (>= 2.1.0) + language_server-protocol (>= 3.17.0.4, < 4.0) + listen (~> 3.0) + logger (>= 1.3.0) + mutex_m (>= 0.3.0) + parser (>= 3.1) + rainbow (>= 2.2.2, < 4.0) + rbs (~> 3.9) + securerandom (>= 0.1) + strscan (>= 1.0.0) + terminal-table (>= 2, < 5) + uri (>= 0.12.0) + strscan (3.1.5) + syntax_tree (6.3.0) + prettier_print (>= 1.2.0) + tapioca (0.16.11) + benchmark + bundler (>= 2.2.25) + netrc (>= 0.11.0) + parallel (>= 1.21.0) + rbi (~> 0.2) + sorbet-static-and-runtime (>= 0.5.11087) + spoom (>= 1.2.0) + thor (>= 1.2.0) + yard-sorbet + terminal-table (4.0.0) + unicode-display_width (>= 1.1.1, < 4) + thor (1.4.0) + traces (0.17.0) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) + unicode-display_width (3.1.5) + unicode-emoji (~> 4.0, >= 4.0.4) + unicode-emoji (4.0.4) + uri (1.0.3) + webmock (3.25.1) + addressable (>= 2.8.0) + crack (>= 0.3.2) + hashdiff (>= 0.4.0, < 2.0.0) + webrick (1.9.1) + yard (0.9.37) + yard-sorbet (0.9.0) + sorbet-runtime + yard + +PLATFORMS + aarch64-linux + aarch64-linux-gnu + aarch64-linux-musl + arm64-darwin + universal-darwin + x86_64-darwin + x86_64-linux-gnu + x86_64-linux-musl + +DEPENDENCIES + async + minitest + minitest-focus + minitest-hooks + minitest-proveit + minitest-rg + rake + rbs + redcarpet + rubocop + scrapegraphai! + sorbet + steep + syntax_tree + syntax_tree-rbs! + tapioca + webmock + webrick + yard + +BUNDLED WITH + 2.4.1 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3088710 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2025 Scrapegraphai + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index aee6b86..c401391 100644 --- a/README.md +++ b/README.md @@ -1 +1,242 @@ -# scrapegraphai-ruby \ No newline at end of file +# Scrapegraphai Ruby API library + +The Scrapegraphai Ruby library provides convenient access to the Scrapegraphai REST API from any Ruby 3.2.0+ application. It ships with comprehensive types & docstrings in Yard, RBS, and RBI – [see below](https://github.com/ScrapeGraphAI/scrapegraphai-ruby#Sorbet) for usage with Sorbet. The standard library's `net/http` is used as the HTTP transport, with connection pooling via the `connection_pool` gem. + +It is generated with [Stainless](https://www.stainless.com/). + +## Documentation + +Documentation for releases of this gem can be found [on RubyDoc](https://gemdocs.org/gems/scrapegraphai). + +The REST API documentation can be found on [scrapegraphai.com](https://scrapegraphai.com). + +## Installation + +To use this gem, install via Bundler by adding the following to your application's `Gemfile`: + + + +```ruby +gem "scrapegraphai", "~> 0.0.1" +``` + + + +## Usage + +```ruby +require "bundler/setup" +require "scrapegraphai" + +scrapegraphai = Scrapegraphai::Client.new( + api_key: ENV["SCRAPEGRAPHAI_API_KEY"], # This is the default and can be omitted + environment: "environment_1" # defaults to "production" +) + +completed_smartscraper = scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + +puts(completed_smartscraper.request_id) +``` + +### Handling errors + +When the library is unable to connect to the API, or if the API returns a non-success status code (i.e., 4xx or 5xx response), a subclass of `Scrapegraphai::Errors::APIError` will be thrown: + +```ruby +begin + smartscraper = scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") +rescue Scrapegraphai::Errors::APIConnectionError => e + puts("The server could not be reached") + puts(e.cause) # an underlying Exception, likely raised within `net/http` +rescue Scrapegraphai::Errors::RateLimitError => e + puts("A 429 status code was received; we should back off a bit.") +rescue Scrapegraphai::Errors::APIStatusError => e + puts("Another non-200-range status code was received") + puts(e.status) +end +``` + +Error codes are as follows: + +| Cause | Error Type | +| ---------------- | -------------------------- | +| HTTP 400 | `BadRequestError` | +| HTTP 401 | `AuthenticationError` | +| HTTP 403 | `PermissionDeniedError` | +| HTTP 404 | `NotFoundError` | +| HTTP 409 | `ConflictError` | +| HTTP 422 | `UnprocessableEntityError` | +| HTTP 429 | `RateLimitError` | +| HTTP >= 500 | `InternalServerError` | +| Other HTTP error | `APIStatusError` | +| Timeout | `APITimeoutError` | +| Network error | `APIConnectionError` | + +### Retries + +Certain errors will be automatically retried 2 times by default, with a short exponential backoff. + +Connection errors (for example, due to a network connectivity problem), 408 Request Timeout, 409 Conflict, 429 Rate Limit, >=500 Internal errors, and timeouts will all be retried by default. + +You can use the `max_retries` option to configure or disable this: + +```ruby +# Configure the default for all requests: +scrapegraphai = Scrapegraphai::Client.new( + max_retries: 0 # default is 2 +) + +# Or, configure per-request: +scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {max_retries: 5} +) +``` + +### Timeouts + +By default, requests will time out after 60 seconds. You can use the timeout option to configure or disable this: + +```ruby +# Configure the default for all requests: +scrapegraphai = Scrapegraphai::Client.new( + timeout: nil # default is 60 +) + +# Or, configure per-request: +scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {timeout: 5} +) +``` + +On timeout, `Scrapegraphai::Errors::APITimeoutError` is raised. + +Note that requests that time out are retried by default. + +## Advanced concepts + +### BaseModel + +All parameter and response objects inherit from `Scrapegraphai::Internal::Type::BaseModel`, which provides several conveniences, including: + +1. All fields, including unknown ones, are accessible with `obj[:prop]` syntax, and can be destructured with `obj => {prop: prop}` or pattern-matching syntax. + +2. Structural equivalence for equality; if two API calls return the same values, comparing the responses with == will return true. + +3. Both instances and the classes themselves can be pretty-printed. + +4. Helpers such as `#to_h`, `#deep_to_h`, `#to_json`, and `#to_yaml`. + +### Making custom or undocumented requests + +#### Undocumented properties + +You can send undocumented parameters to any endpoint, and read undocumented response properties, like so: + +Note: the `extra_` parameters of the same name overrides the documented parameters. + +```ruby +completed_smartscraper = + scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: { + extra_query: {my_query_parameter: value}, + extra_body: {my_body_parameter: value}, + extra_headers: {"my-header": value} + } + ) + +puts(completed_smartscraper[:my_undocumented_property]) +``` + +#### Undocumented request params + +If you want to explicitly send an extra param, you can do so with the `extra_query`, `extra_body`, and `extra_headers` under the `request_options:` parameter when making a request, as seen in the examples above. + +#### Undocumented endpoints + +To make requests to undocumented endpoints while retaining the benefit of auth, retries, and so on, you can make requests using `client.request`, like so: + +```ruby +response = client.request( + method: :post, + path: '/undocumented/endpoint', + query: {"dog": "woof"}, + headers: {"useful-header": "interesting-value"}, + body: {"hello": "world"} +) +``` + +### Concurrency & connection pooling + +The `Scrapegraphai::Client` instances are threadsafe, but are only are fork-safe when there are no in-flight HTTP requests. + +Each instance of `Scrapegraphai::Client` has its own HTTP connection pool with a default size of 99. As such, we recommend instantiating the client once per application in most settings. + +When all available connections from the pool are checked out, requests wait for a new connection to become available, with queue time counting towards the request timeout. + +Unless otherwise specified, other classes in the SDK do not have locks protecting their underlying data structure. + +## Sorbet + +This library provides comprehensive [RBI](https://sorbet.org/docs/rbi) definitions, and has no dependency on sorbet-runtime. + +You can provide typesafe request parameters like so: + +```ruby +scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") +``` + +Or, equivalently: + +```ruby +# Hashes work, but are not typesafe: +scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + +# You can also splat a full Params class: +params = Scrapegraphai::SmartscraperCreateParams.new( + user_prompt: "Extract the product name, price, and description" +) +scrapegraphai.smartscraper.create(**params) +``` + +### Enums + +Since this library does not depend on `sorbet-runtime`, it cannot provide [`T::Enum`](https://sorbet.org/docs/tenum) instances. Instead, we provide "tagged symbols" instead, which is always a primitive at runtime: + +```ruby +# :queued +puts(Scrapegraphai::CompletedSmartscraper::Status::QUEUED) + +# Revealed type: `T.all(Scrapegraphai::CompletedSmartscraper::Status, Symbol)` +T.reveal_type(Scrapegraphai::CompletedSmartscraper::Status::QUEUED) +``` + +Enum parameters have a "relaxed" type, so you can either pass in enum constants or their literal value: + +```ruby +Scrapegraphai::CompletedSmartscraper.new( + status: Scrapegraphai::CompletedSmartscraper::Status::QUEUED, + # … +) + +Scrapegraphai::CompletedSmartscraper.new( + status: :queued, + # … +) +``` + +## Versioning + +This package follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions. As the library is in initial development and has a major version of `0`, APIs may change at any time. + +This package considers improvements to the (non-runtime) `*.rbi` and `*.rbs` type definitions to be non-breaking changes. + +## Requirements + +Ruby 3.2.0 or higher. + +## Contributing + +See [the contributing documentation](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/tree/main/CONTRIBUTING.md). diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..ebbcdc5 --- /dev/null +++ b/Rakefile @@ -0,0 +1,172 @@ +# frozen_string_literal: true + +require "pathname" +require "securerandom" +require "shellwords" + +require "minitest/test_task" +require "rake/clean" +require "rubocop/rake_task" + +tapioca = "sorbet/tapioca" +examples = "examples" +ignore_file = ".ignore" + +FILES_ENV = "FORMAT_FILE" + +CLEAN.push(*%w[.idea/ .ruby-lsp/ .yardoc/ doc/], *FileList["*.gem"], ignore_file) + +CLOBBER.push(*%w[sorbet/rbi/annotations/ sorbet/rbi/gems/], tapioca) + +multitask(:default) do + sh(*%w[rake --tasks]) +end + +desc("Preview docs; use `PORT=` to change the port") +multitask(:"docs:preview") do + sh(*%w[yard server --reload --quiet --bind [::] --port], ENV.fetch("PORT", "8808")) +end + +desc("Run test suites; use `TEST=path/to/test.rb` to run a specific test file") +multitask(:test) do + rb = + FileList[ENV.fetch("TEST", "./test/**/*_test.rb")] + .map { "require_relative(#{_1.dump});" } + .join + + ruby(*%w[-w -e], rb, verbose: false) { fail unless _1 } +end + +xargs = %w[xargs --no-run-if-empty --null --max-procs=0 --max-args=300 --] +ruby_opt = {"RUBYOPT" => [ENV["RUBYOPT"], "--encoding=UTF-8"].compact.join(" ")} + +filtered = ->(ext, dirs) do + if ENV.key?(FILES_ENV) + %w[sed -E -n -e] << "/\\.#{ext}$/p" << "--" << ENV.fetch(FILES_ENV) + else + (%w[find] + dirs + %w[-type f -and -name]) << "*.#{ext}" << "-print0" + end +end + +desc("Lint `*.rb(i)`") +multitask(:"lint:rubocop") do + find = %w[find ./lib ./test ./rbi ./examples -type f -and ( -name *.rb -or -name *.rbi ) -print0] + + rubocop = %w[rubocop] + rubocop += %w[--format github] if ENV.key?("CI") + + # some lines cannot be shortened + rubocop += %w[--except Lint/RedundantCopDisableDirective,Layout/LineLength] + + lint = xargs + rubocop + sh("#{find.shelljoin} | #{lint.shelljoin}") +end + +norm_lines = %w[tr -- \n \0].shelljoin + +desc("Format `*.rb`") +multitask(:"format:rb") do + # while `syntax_tree` is much faster than `rubocop`, `rubocop` is the only formatter with full syntax support + files = filtered["rb", %w[./lib ./test ./examples]] + fmt = xargs + %w[rubocop --fail-level F --autocorrect --format simple --] + sh("#{files.shelljoin} | #{norm_lines} | #{fmt.shelljoin}") +end + +desc("Format `*.rbi`") +multitask(:"format:rbi") do + files = filtered["rbi", %w[./rbi]] + fmt = xargs + %w[stree write --] + sh(ruby_opt, "#{files.shelljoin} | #{norm_lines} | #{fmt.shelljoin}") +end + +desc("Format `*.rbs`") +multitask(:"format:rbs") do + files = filtered["rbs", %w[./sig]] + inplace = /darwin|bsd/ =~ RUBY_PLATFORM ? ["-i", ""] : %w[-i] + uuid = SecureRandom.uuid + + # `syntax_tree` has trouble with `rbs`'s class & module aliases + + sed_bin = /darwin/ =~ RUBY_PLATFORM ? "/usr/bin/sed" : "sed" + sed = xargs + [sed_bin, "-E", *inplace, "-e"] + # annotate unprocessable aliases with a unique comment + pre = sed + ["s/(class|module) ([^ ]+) = (.+$)/# \\1 #{uuid}\\n\\2: \\3/", "--"] + fmt = xargs + %w[stree write --plugin=rbs --] + # remove the unique comment and unprocessable aliases to type aliases + subst = <<~SED + s/# (class|module) #{uuid}/\\1/ + t l1 + b + + : l1 + N + s/\\n *([^:]+): (.+)$/ \\1 = \\2/ + SED + # for each line: + # 1. try transform the unique comment into `class | module`, if successful, branch to label `l1`. + # 2. at label `l1`, join previously annotated line with `class | module` information. + pst = sed + [subst, "--"] + + success = false + + # transform class aliases to type aliases, which syntax tree has no trouble with + sh("#{files.shelljoin} | #{norm_lines} | #{pre.shelljoin}") + # run syntax tree to format `*.rbs` files + sh(ruby_opt, "#{files.shelljoin} | #{norm_lines} | #{fmt.shelljoin}") do + success = _1 + end + # transform type aliases back to class aliases + sh("#{files.shelljoin} | #{norm_lines} | #{pst.shelljoin}") + + # always run post-processing to remove comment marker + fail unless success +end + +desc("Format everything") +multitask(format: [:"format:rb", :"format:rbi", :"format:rbs"]) + +desc("Typecheck `*.rbs`") +multitask(:"typecheck:steep") do + sh(*%w[steep check]) +end + +directory(examples) + +desc("Typecheck `*.rbi`") +multitask("typecheck:sorbet": examples) do + sh(*%w[srb typecheck --dir], examples) +end + +directory(tapioca) do + sh(*%w[tapioca init]) +end + +desc("Typecheck everything") +multitask(typecheck: [:"typecheck:steep", :"typecheck:sorbet"]) + +desc("Lint and typecheck") +multitask(lint: [:"lint:rubocop", :typecheck]) + +desc("Build yard docs") +multitask(:"build:docs") do + sh(*%w[yard]) +end + +desc("Build ruby gem") +multitask(:"build:gem") do + # optimizing for grepping through the gem bundle: many tools honour `.ignore` files, including VSCode + # + # both `rbi` and `sig` directories are navigable by their respective tool chains and therefore can be ignored by tools such as `rg` + Pathname(ignore_file).write(<<~GLOB) + rbi/* + sig/* + GLOB + + sh(*%w[gem build -- scrapegraphai.gemspec]) + rm_rf(ignore_file) +end + +desc("Release ruby gem") +multitask(release: [:"build:gem"]) do + sh(*%w[gem push], *FileList["*.gem"]) +end diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..a37bf27 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,23 @@ +# Security Policy + +## Reporting Security Issues + +This SDK is generated by [Stainless Software Inc](http://stainless.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken. + +To report a security issue, please contact the Stainless team at security@stainless.com. + +## Responsible Disclosure + +We appreciate the efforts of security researchers and individuals who help us maintain the security of +SDKs we generate. If you believe you have found a security vulnerability, please adhere to responsible +disclosure practices by allowing us a reasonable amount of time to investigate and address the issue +before making any information public. + +## Reporting Non-SDK Related Security Issues + +If you encounter security issues that are not directly related to SDKs but pertain to the services +or products provided by Scrapegraphai, please follow the respective company's security reporting guidelines. + +--- + +Thank you for helping us keep the SDKs and systems they interact with secure. diff --git a/Steepfile b/Steepfile new file mode 100644 index 0000000..528b48c --- /dev/null +++ b/Steepfile @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require "yaml" + +target(:lib) do + configure_code_diagnostics(Steep::Diagnostic::Ruby.strict) + + signature("sig") + + YAML.safe_load_file("./manifest.yaml", symbolize_names: true) => {dependencies:} + # currently these libraries lack the `*.rbs` annotations required by `steep` + stdlibs = dependencies - %w[English etc net/http rbconfig set stringio] + + stdlibs.each { library(_1) } +end diff --git a/bin/check-release-environment b/bin/check-release-environment new file mode 100644 index 0000000..c05436e --- /dev/null +++ b/bin/check-release-environment @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +errors=() + +if [ -z "${GEM_HOST_API_KEY}" ]; then + errors+=("The GEM_HOST_API_KEY secret has not been set. Please set it in either this repository's secrets or your organization secrets") +fi + +lenErrors=${#errors[@]} + +if [[ lenErrors -gt 0 ]]; then + echo -e "Found the following errors in the release environment:\n" + + for error in "${errors[@]}"; do + echo -e "- $error\n" + done + + exit 1 +fi + +echo "The environment is ready to push releases!" diff --git a/bin/publish-gem b/bin/publish-gem new file mode 100644 index 0000000..8444af2 --- /dev/null +++ b/bin/publish-gem @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +cd -- "$(dirname -- "$0")/.." + +bundle +find . -maxdepth 1 -type f -name "*.gem" -delete +rake release \ No newline at end of file diff --git a/examples/.keep b/examples/.keep new file mode 100644 index 0000000..d8c73e9 --- /dev/null +++ b/examples/.keep @@ -0,0 +1,4 @@ +File generated from our OpenAPI spec by Stainless. + +This directory can be used to store example files demonstrating usage of this SDK. +It is ignored by Stainless code generation and its content (other than this keep file) won't be touched. \ No newline at end of file diff --git a/lib/scrapegraphai.rb b/lib/scrapegraphai.rb new file mode 100644 index 0000000..f51e4b5 --- /dev/null +++ b/lib/scrapegraphai.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +# Standard libraries. +# rubocop:disable Lint/RedundantRequireStatement +require "English" +require "cgi" +require "date" +require "erb" +require "etc" +require "json" +require "net/http" +require "pathname" +require "rbconfig" +require "securerandom" +require "set" +require "stringio" +require "time" +require "uri" +# rubocop:enable Lint/RedundantRequireStatement + +# We already ship the preferred sorbet manifests in the package itself. +# `tapioca` currently does not offer us a way to opt out of unnecessary compilation. +if Object.const_defined?(:Tapioca) && + caller.chain([$PROGRAM_NAME]).chain(ARGV).any?(/tapioca/) && + ARGV.none?(/dsl/) + return +end + +# Gems. +require "connection_pool" + +# Package files. +require_relative "scrapegraphai/version" +require_relative "scrapegraphai/internal/util" +require_relative "scrapegraphai/internal/type/converter" +require_relative "scrapegraphai/internal/type/unknown" +require_relative "scrapegraphai/internal/type/boolean" +require_relative "scrapegraphai/internal/type/file_input" +require_relative "scrapegraphai/internal/type/enum" +require_relative "scrapegraphai/internal/type/union" +require_relative "scrapegraphai/internal/type/array_of" +require_relative "scrapegraphai/internal/type/hash_of" +require_relative "scrapegraphai/internal/type/base_model" +require_relative "scrapegraphai/internal/type/base_page" +require_relative "scrapegraphai/internal/type/request_parameters" +require_relative "scrapegraphai/internal" +require_relative "scrapegraphai/request_options" +require_relative "scrapegraphai/file_part" +require_relative "scrapegraphai/errors" +require_relative "scrapegraphai/internal/transport/base_client" +require_relative "scrapegraphai/internal/transport/pooled_net_requester" +require_relative "scrapegraphai/client" +require_relative "scrapegraphai/models/completed_markdownify" +require_relative "scrapegraphai/models/completed_search_scraper" +require_relative "scrapegraphai/models/completed_smartscraper" +require_relative "scrapegraphai/models/crawl_retrieve_results_params" +require_relative "scrapegraphai/models/crawl_retrieve_results_response" +require_relative "scrapegraphai/models/crawl_start_params" +require_relative "scrapegraphai/models/crawl_start_response" +require_relative "scrapegraphai/models/credit_retrieve_params" +require_relative "scrapegraphai/models/credit_retrieve_response" +require_relative "scrapegraphai/models/failed_smartscraper" +require_relative "scrapegraphai/models/feedback_submit_params" +require_relative "scrapegraphai/models/feedback_submit_response" +require_relative "scrapegraphai/models/generate_schema_create_params" +require_relative "scrapegraphai/models/generate_schema_create_response" +require_relative "scrapegraphai/models/generate_schema_retrieve_params" +require_relative "scrapegraphai/models/generate_schema_retrieve_response" +require_relative "scrapegraphai/models/healthz_check_params" +require_relative "scrapegraphai/models/healthz_check_response" +require_relative "scrapegraphai/models/markdownify_convert_params" +require_relative "scrapegraphai/models/markdownify_retrieve_status_params" +require_relative "scrapegraphai/models/markdownify_retrieve_status_response" +require_relative "scrapegraphai/models/searchscraper_create_params" +require_relative "scrapegraphai/models/searchscraper_retrieve_status_params" +require_relative "scrapegraphai/models/searchscraper_retrieve_status_response" +require_relative "scrapegraphai/models/smartscraper_create_params" +require_relative "scrapegraphai/models/smartscraper_list_params" +require_relative "scrapegraphai/models/smartscraper_list_response" +require_relative "scrapegraphai/models/smartscraper_retrieve_params" +require_relative "scrapegraphai/models/smartscraper_retrieve_response" +require_relative "scrapegraphai/models/validate_api_key_params" +require_relative "scrapegraphai/models/validate_api_key_response" +require_relative "scrapegraphai/models" +require_relative "scrapegraphai/resources/crawl" +require_relative "scrapegraphai/resources/credits" +require_relative "scrapegraphai/resources/feedback" +require_relative "scrapegraphai/resources/generate_schema" +require_relative "scrapegraphai/resources/healthz" +require_relative "scrapegraphai/resources/markdownify" +require_relative "scrapegraphai/resources/searchscraper" +require_relative "scrapegraphai/resources/smartscraper" +require_relative "scrapegraphai/resources/validate" diff --git a/lib/scrapegraphai/client.rb b/lib/scrapegraphai/client.rb new file mode 100644 index 0000000..108982e --- /dev/null +++ b/lib/scrapegraphai/client.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +module Scrapegraphai + class Client < Scrapegraphai::Internal::Transport::BaseClient + # Default max number of retries to attempt after a failed retryable request. + DEFAULT_MAX_RETRIES = 2 + + # Default per-request timeout. + DEFAULT_TIMEOUT_IN_SECONDS = 60.0 + + # Default initial retry delay in seconds. + # Overall delay is calculated using exponential backoff + jitter. + DEFAULT_INITIAL_RETRY_DELAY = 0.5 + + # Default max retry delay in seconds. + DEFAULT_MAX_RETRY_DELAY = 8.0 + + # rubocop:disable Style/MutableConstant + # @type [Hash{Symbol=>String}] + ENVIRONMENTS = + {production: "https://api.scrapegraphai.com/v1", environment_1: "http://localhost:8001/v1"} + # rubocop:enable Style/MutableConstant + + # API key for authentication + # @return [String] + attr_reader :api_key + + # @return [Scrapegraphai::Resources::Smartscraper] + attr_reader :smartscraper + + # @return [Scrapegraphai::Resources::Markdownify] + attr_reader :markdownify + + # @return [Scrapegraphai::Resources::Searchscraper] + attr_reader :searchscraper + + # @return [Scrapegraphai::Resources::GenerateSchema] + attr_reader :generate_schema + + # @return [Scrapegraphai::Resources::Crawl] + attr_reader :crawl + + # @return [Scrapegraphai::Resources::Credits] + attr_reader :credits + + # @return [Scrapegraphai::Resources::Validate] + attr_reader :validate + + # @return [Scrapegraphai::Resources::Feedback] + attr_reader :feedback + + # @return [Scrapegraphai::Resources::Healthz] + attr_reader :healthz + + # @api private + # + # @return [Hash{String=>String}] + private def auth_headers + {"sgai-apikey" => @api_key} + end + + # Creates and returns a new client for interacting with the API. + # + # @param api_key [String, nil] API key for authentication Defaults to `ENV["SCRAPEGRAPHAI_API_KEY"]` + # + # @param environment [:production, :environment_1, nil] Specifies the environment to use for the API. + # + # Each environment maps to a different base URL: + # + # - `production` corresponds to `https://api.scrapegraphai.com/v1` + # - `environment_1` corresponds to `http://localhost:8001/v1` + # + # @param base_url [String, nil] Override the default base URL for the API, e.g., + # `"https://api.example.com/v2/"`. Defaults to `ENV["SCRAPEGRAPHAI_BASE_URL"]` + # + # @param max_retries [Integer] Max number of retries to attempt after a failed retryable request. + # + # @param timeout [Float] + # + # @param initial_retry_delay [Float] + # + # @param max_retry_delay [Float] + def initialize( + api_key: ENV["SCRAPEGRAPHAI_API_KEY"], + environment: nil, + base_url: ENV["SCRAPEGRAPHAI_BASE_URL"], + max_retries: self.class::DEFAULT_MAX_RETRIES, + timeout: self.class::DEFAULT_TIMEOUT_IN_SECONDS, + initial_retry_delay: self.class::DEFAULT_INITIAL_RETRY_DELAY, + max_retry_delay: self.class::DEFAULT_MAX_RETRY_DELAY + ) + base_url ||= Scrapegraphai::Client::ENVIRONMENTS.fetch(environment&.to_sym || :production) do + message = "environment must be one of #{Scrapegraphai::Client::ENVIRONMENTS.keys}, got #{environment}" + raise ArgumentError.new(message) + end + + if api_key.nil? + raise ArgumentError.new("api_key is required, and can be set via environ: \"SCRAPEGRAPHAI_API_KEY\"") + end + + @api_key = api_key.to_s + + super( + base_url: base_url, + timeout: timeout, + max_retries: max_retries, + initial_retry_delay: initial_retry_delay, + max_retry_delay: max_retry_delay + ) + + @smartscraper = Scrapegraphai::Resources::Smartscraper.new(client: self) + @markdownify = Scrapegraphai::Resources::Markdownify.new(client: self) + @searchscraper = Scrapegraphai::Resources::Searchscraper.new(client: self) + @generate_schema = Scrapegraphai::Resources::GenerateSchema.new(client: self) + @crawl = Scrapegraphai::Resources::Crawl.new(client: self) + @credits = Scrapegraphai::Resources::Credits.new(client: self) + @validate = Scrapegraphai::Resources::Validate.new(client: self) + @feedback = Scrapegraphai::Resources::Feedback.new(client: self) + @healthz = Scrapegraphai::Resources::Healthz.new(client: self) + end + end +end diff --git a/lib/scrapegraphai/errors.rb b/lib/scrapegraphai/errors.rb new file mode 100644 index 0000000..05d0547 --- /dev/null +++ b/lib/scrapegraphai/errors.rb @@ -0,0 +1,228 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Errors + class Error < StandardError + # @!attribute cause + # + # @return [StandardError, nil] + end + + class ConversionError < Scrapegraphai::Errors::Error + # @return [StandardError, nil] + def cause = @cause.nil? ? super : @cause + + # @api private + # + # @param on [Class] + # @param method [Symbol] + # @param target [Object] + # @param value [Object] + # @param cause [StandardError, nil] + def initialize(on:, method:, target:, value:, cause: nil) + cls = on.name.split("::").last + + message = [ + "Failed to parse #{cls}.#{method} from #{value.class} to #{target.inspect}.", + "To get the unparsed API response, use #{cls}[#{method.inspect}].", + cause && "Cause: #{cause.message}" + ].filter(&:itself).join(" ") + + @cause = cause + super(message) + end + end + + class APIError < Scrapegraphai::Errors::Error + # @return [URI::Generic] + attr_accessor :url + + # @return [Integer, nil] + attr_accessor :status + + # @return [Hash{String=>String}, nil] + attr_accessor :headers + + # @return [Object, nil] + attr_accessor :body + + # @api private + # + # @param url [URI::Generic] + # @param status [Integer, nil] + # @param headers [Hash{String=>String}, nil] + # @param body [Object, nil] + # @param request [nil] + # @param response [nil] + # @param message [String, nil] + def initialize(url:, status: nil, headers: nil, body: nil, request: nil, response: nil, message: nil) + @url = url + @status = status + @headers = headers + @body = body + @request = request + @response = response + super(message) + end + end + + class APIConnectionError < Scrapegraphai::Errors::APIError + # @!attribute status + # + # @return [nil] + + # @!attribute body + # + # @return [nil] + + # @api private + # + # @param url [URI::Generic] + # @param status [nil] + # @param headers [Hash{String=>String}, nil] + # @param body [nil] + # @param request [nil] + # @param response [nil] + # @param message [String, nil] + def initialize( + url:, + status: nil, + headers: nil, + body: nil, + request: nil, + response: nil, + message: "Connection error." + ) + super + end + end + + class APITimeoutError < Scrapegraphai::Errors::APIConnectionError + # @api private + # + # @param url [URI::Generic] + # @param status [nil] + # @param headers [Hash{String=>String}, nil] + # @param body [nil] + # @param request [nil] + # @param response [nil] + # @param message [String, nil] + def initialize( + url:, + status: nil, + headers: nil, + body: nil, + request: nil, + response: nil, + message: "Request timed out." + ) + super + end + end + + class APIStatusError < Scrapegraphai::Errors::APIError + # @api private + # + # @param url [URI::Generic] + # @param status [Integer] + # @param headers [Hash{String=>String}, nil] + # @param body [Object, nil] + # @param request [nil] + # @param response [nil] + # @param message [String, nil] + # + # @return [self] + def self.for(url:, status:, headers:, body:, request:, response:, message: nil) + kwargs = + { + url: url, + status: status, + headers: headers, + body: body, + request: request, + response: response, + message: message + } + + case status + in 400 + Scrapegraphai::Errors::BadRequestError.new(**kwargs) + in 401 + Scrapegraphai::Errors::AuthenticationError.new(**kwargs) + in 403 + Scrapegraphai::Errors::PermissionDeniedError.new(**kwargs) + in 404 + Scrapegraphai::Errors::NotFoundError.new(**kwargs) + in 409 + Scrapegraphai::Errors::ConflictError.new(**kwargs) + in 422 + Scrapegraphai::Errors::UnprocessableEntityError.new(**kwargs) + in 429 + Scrapegraphai::Errors::RateLimitError.new(**kwargs) + in (500..) + Scrapegraphai::Errors::InternalServerError.new(**kwargs) + else + Scrapegraphai::Errors::APIStatusError.new(**kwargs) + end + end + + # @!parse + # # @return [Integer] + # attr_accessor :status + + # @api private + # + # @param url [URI::Generic] + # @param status [Integer] + # @param headers [Hash{String=>String}, nil] + # @param body [Object, nil] + # @param request [nil] + # @param response [nil] + # @param message [String, nil] + def initialize(url:, status:, headers:, body:, request:, response:, message: nil) + message ||= {url: url.to_s, status: status, body: body} + super( + url: url, + status: status, + headers: headers, + body: body, + request: request, + response: response, + message: message&.to_s + ) + end + end + + class BadRequestError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 400 + end + + class AuthenticationError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 401 + end + + class PermissionDeniedError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 403 + end + + class NotFoundError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 404 + end + + class ConflictError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 409 + end + + class UnprocessableEntityError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 422 + end + + class RateLimitError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 429 + end + + class InternalServerError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = (500..) + end + end +end diff --git a/lib/scrapegraphai/file_part.rb b/lib/scrapegraphai/file_part.rb new file mode 100644 index 0000000..1f8949f --- /dev/null +++ b/lib/scrapegraphai/file_part.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Scrapegraphai + class FilePart + # @return [Pathname, StringIO, IO, String] + attr_reader :content + + # @return [String, nil] + attr_reader :content_type + + # @return [String, nil] + attr_reader :filename + + # @api private + # + # @return [String] + private def read + case content + in Pathname + content.read(binmode: true) + in StringIO + content.string + in IO + content.read + in String + content + end + end + + # @param a [Object] + # + # @return [String] + def to_json(*a) = read.to_json(*a) + + # @param a [Object] + # + # @return [String] + def to_yaml(*a) = read.to_yaml(*a) + + # @param content [Pathname, StringIO, IO, String] + # @param filename [Pathname, String, nil] + # @param content_type [String, nil] + def initialize(content, filename: nil, content_type: nil) + @content_type = content_type + @filename = + case [filename, (@content = content)] + in [String | Pathname, _] + ::File.basename(filename) + in [nil, Pathname] + content.basename.to_path + in [nil, IO] + content.to_path + else + filename + end + end + end +end diff --git a/lib/scrapegraphai/internal.rb b/lib/scrapegraphai/internal.rb new file mode 100644 index 0000000..54fda1b --- /dev/null +++ b/lib/scrapegraphai/internal.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + OMIT = + Object.new.tap do + _1.define_singleton_method(:inspect) { "#<#{Scrapegraphai::Internal}::OMIT>" } + end + .freeze + + define_sorbet_constant!(:AnyHash) do + T.type_alias { T::Hash[Symbol, T.anything] } + end + define_sorbet_constant!(:FileInput) do + T.type_alias { T.any(Pathname, StringIO, IO, String, Scrapegraphai::FilePart) } + end + end +end diff --git a/lib/scrapegraphai/internal/transport/base_client.rb b/lib/scrapegraphai/internal/transport/base_client.rb new file mode 100644 index 0000000..e889550 --- /dev/null +++ b/lib/scrapegraphai/internal/transport/base_client.rb @@ -0,0 +1,571 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Transport + # @api private + # + # @abstract + class BaseClient + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # from whatwg fetch spec + MAX_REDIRECTS = 20 + + # rubocop:disable Style/MutableConstant + PLATFORM_HEADERS = + { + "x-stainless-arch" => Scrapegraphai::Internal::Util.arch, + "x-stainless-lang" => "ruby", + "x-stainless-os" => Scrapegraphai::Internal::Util.os, + "x-stainless-package-version" => Scrapegraphai::VERSION, + "x-stainless-runtime" => ::RUBY_ENGINE, + "x-stainless-runtime-version" => ::RUBY_ENGINE_VERSION + } + # rubocop:enable Style/MutableConstant + + class << self + # @api private + # + # @param req [Hash{Symbol=>Object}] + # + # @raise [ArgumentError] + def validate!(req) + keys = [:method, :path, :query, :headers, :body, :unwrap, :page, :stream, :model, :options] + case req + in Hash + req.each_key do |k| + unless keys.include?(k) + raise ArgumentError.new("Request `req` keys must be one of #{keys}, got #{k.inspect}") + end + end + else + raise ArgumentError.new("Request `req` must be a Hash or RequestOptions, got #{req.inspect}") + end + end + + # @api private + # + # @param status [Integer] + # @param headers [Hash{String=>String}] + # + # @return [Boolean] + def should_retry?(status, headers:) + coerced = Scrapegraphai::Internal::Util.coerce_boolean(headers["x-should-retry"]) + case [coerced, status] + in [true | false, _] + coerced + in [_, 408 | 409 | 429 | (500..)] + # retry on: + # 408: timeouts + # 409: locks + # 429: rate limits + # 500+: unknown errors + true + else + false + end + end + + # @api private + # + # @param request [Hash{Symbol=>Object}] . + # + # @option request [Symbol] :method + # + # @option request [URI::Generic] :url + # + # @option request [Hash{String=>String}] :headers + # + # @option request [Object] :body + # + # @option request [Integer] :max_retries + # + # @option request [Float] :timeout + # + # @param status [Integer] + # + # @param response_headers [Hash{String=>String}] + # + # @return [Hash{Symbol=>Object}] + def follow_redirect(request, status:, response_headers:) + method, url, headers = request.fetch_values(:method, :url, :headers) + location = + Kernel.then do + URI.join(url, response_headers["location"]) + rescue ArgumentError + message = "Server responded with status #{status} but no valid location header." + raise Scrapegraphai::Errors::APIConnectionError.new( + url: url, + response: response_headers, + message: message + ) + end + + request = {**request, url: location} + + case [url.scheme, location.scheme] + in ["https", "http"] + message = "Tried to redirect to a insecure URL" + raise Scrapegraphai::Errors::APIConnectionError.new( + url: url, + response: response_headers, + message: message + ) + else + nil + end + + # from whatwg fetch spec + case [status, method] + in [301 | 302, :post] | [303, _] + drop = %w[content-encoding content-language content-length content-location content-type] + request = { + **request, + method: method == :head ? :head : :get, + headers: headers.except(*drop), + body: nil + } + else + end + + # from undici + if Scrapegraphai::Internal::Util.uri_origin(url) != Scrapegraphai::Internal::Util.uri_origin(location) + drop = %w[authorization cookie host proxy-authorization] + request = {**request, headers: request.fetch(:headers).except(*drop)} + end + + request + end + + # @api private + # + # @param status [Integer, Scrapegraphai::Errors::APIConnectionError] + # @param stream [Enumerable, nil] + def reap_connection!(status, stream:) + case status + in (..199) | (300..499) + stream&.each { next } + in Scrapegraphai::Errors::APIConnectionError | (500..) + Scrapegraphai::Internal::Util.close_fused!(stream) + else + end + end + end + + # @return [URI::Generic] + attr_reader :base_url + + # @return [Float] + attr_reader :timeout + + # @return [Integer] + attr_reader :max_retries + + # @return [Float] + attr_reader :initial_retry_delay + + # @return [Float] + attr_reader :max_retry_delay + + # @return [Hash{String=>String}] + attr_reader :headers + + # @return [String, nil] + attr_reader :idempotency_header + + # @api private + # @return [Scrapegraphai::Internal::Transport::PooledNetRequester] + attr_reader :requester + + # @api private + # + # @param base_url [String] + # @param timeout [Float] + # @param max_retries [Integer] + # @param initial_retry_delay [Float] + # @param max_retry_delay [Float] + # @param headers [Hash{String=>String, Integer, Array, nil}] + # @param idempotency_header [String, nil] + def initialize( + base_url:, + timeout: 0.0, + max_retries: 0, + initial_retry_delay: 0.0, + max_retry_delay: 0.0, + headers: {}, + idempotency_header: nil + ) + @requester = Scrapegraphai::Internal::Transport::PooledNetRequester.new + @headers = Scrapegraphai::Internal::Util.normalized_headers( + self.class::PLATFORM_HEADERS, + { + "accept" => "application/json", + "content-type" => "application/json" + }, + headers + ) + @base_url_components = Scrapegraphai::Internal::Util.parse_uri(base_url) + @base_url = Scrapegraphai::Internal::Util.unparse_uri(@base_url_components) + @idempotency_header = idempotency_header&.to_s&.downcase + @timeout = timeout + @max_retries = max_retries + @initial_retry_delay = initial_retry_delay + @max_retry_delay = max_retry_delay + end + + # @api private + # + # @return [Hash{String=>String}] + private def auth_headers = {} + + # @api private + # + # @return [String] + private def generate_idempotency_key = "stainless-ruby-retry-#{SecureRandom.uuid}" + + # @api private + # + # @param req [Hash{Symbol=>Object}] . + # + # @option req [Symbol] :method + # + # @option req [String, Array] :path + # + # @option req [Hash{String=>Array, String, nil}, nil] :query + # + # @option req [Hash{String=>String, Integer, Array, nil}, nil] :headers + # + # @option req [Object, nil] :body + # + # @option req [Symbol, Integer, Array, Proc, nil] :unwrap + # + # @option req [Class, nil] :page + # + # @option req [Class, nil] :stream + # + # @option req [Scrapegraphai::Internal::Type::Converter, Class, nil] :model + # + # @param opts [Hash{Symbol=>Object}] . + # + # @option opts [String, nil] :idempotency_key + # + # @option opts [Hash{String=>Array, String, nil}, nil] :extra_query + # + # @option opts [Hash{String=>String, nil}, nil] :extra_headers + # + # @option opts [Object, nil] :extra_body + # + # @option opts [Integer, nil] :max_retries + # + # @option opts [Float, nil] :timeout + # + # @return [Hash{Symbol=>Object}] + private def build_request(req, opts) + method, uninterpolated_path = req.fetch_values(:method, :path) + + path = Scrapegraphai::Internal::Util.interpolate_path(uninterpolated_path) + + query = Scrapegraphai::Internal::Util.deep_merge(req[:query].to_h, opts[:extra_query].to_h) + + headers = Scrapegraphai::Internal::Util.normalized_headers( + @headers, + auth_headers, + req[:headers].to_h, + opts[:extra_headers].to_h + ) + + if @idempotency_header && + !headers.key?(@idempotency_header) && + (!Net::HTTP::IDEMPOTENT_METHODS_.include?(method.to_s.upcase) || opts.key?(:idempotency_key)) + headers[@idempotency_header] = opts.fetch(:idempotency_key) { generate_idempotency_key } + end + + unless headers.key?("x-stainless-retry-count") + headers["x-stainless-retry-count"] = "0" + end + + timeout = opts.fetch(:timeout, @timeout).to_f.clamp(0..) + unless headers.key?("x-stainless-timeout") || timeout.zero? + headers["x-stainless-timeout"] = timeout.to_s + end + + headers.reject! { |_, v| v.to_s.empty? } + + body = + case method + in :get | :head | :options | :trace + nil + else + Scrapegraphai::Internal::Util.deep_merge(*[req[:body], opts[:extra_body]].compact) + end + + url = Scrapegraphai::Internal::Util.join_parsed_uri( + @base_url_components, + {**req, path: path, query: query} + ) + headers, encoded = Scrapegraphai::Internal::Util.encode_content(headers, body) + { + method: method, + url: url, + headers: headers, + body: encoded, + max_retries: opts.fetch(:max_retries, @max_retries), + timeout: timeout + } + end + + # @api private + # + # @param headers [Hash{String=>String}] + # @param retry_count [Integer] + # + # @return [Float] + private def retry_delay(headers, retry_count:) + # Non-standard extension + span = Float(headers["retry-after-ms"], exception: false)&.then { _1 / 1000 } + return span if span + + retry_header = headers["retry-after"] + return span if (span = Float(retry_header, exception: false)) + + span = retry_header&.then do + Time.httpdate(_1) - Time.now + rescue ArgumentError + nil + end + return span if span + + scale = retry_count**2 + jitter = 1 - (0.25 * rand) + (@initial_retry_delay * scale * jitter).clamp(0, @max_retry_delay) + end + + # @api private + # + # @param request [Hash{Symbol=>Object}] . + # + # @option request [Symbol] :method + # + # @option request [URI::Generic] :url + # + # @option request [Hash{String=>String}] :headers + # + # @option request [Object] :body + # + # @option request [Integer] :max_retries + # + # @option request [Float] :timeout + # + # @param redirect_count [Integer] + # + # @param retry_count [Integer] + # + # @param send_retry_header [Boolean] + # + # @raise [Scrapegraphai::Errors::APIError] + # @return [Array(Integer, Net::HTTPResponse, Enumerable)] + def send_request(request, redirect_count:, retry_count:, send_retry_header:) + url, headers, max_retries, timeout = request.fetch_values(:url, :headers, :max_retries, :timeout) + input = {**request.except(:timeout), deadline: Scrapegraphai::Internal::Util.monotonic_secs + timeout} + + if send_retry_header + headers["x-stainless-retry-count"] = retry_count.to_s + end + + begin + status, response, stream = @requester.execute(input) + rescue Scrapegraphai::Errors::APIConnectionError => e + status = e + end + headers = Scrapegraphai::Internal::Util.normalized_headers(response&.each_header&.to_h) + + case status + in ..299 + [status, response, stream] + in 300..399 if redirect_count >= self.class::MAX_REDIRECTS + self.class.reap_connection!(status, stream: stream) + + message = "Failed to complete the request within #{self.class::MAX_REDIRECTS} redirects." + raise Scrapegraphai::Errors::APIConnectionError.new( + url: url, + response: response, + message: message + ) + in 300..399 + self.class.reap_connection!(status, stream: stream) + + request = self.class.follow_redirect(request, status: status, response_headers: headers) + send_request( + request, + redirect_count: redirect_count + 1, + retry_count: retry_count, + send_retry_header: send_retry_header + ) + in Scrapegraphai::Errors::APIConnectionError if retry_count >= max_retries + raise status + in (400..) if retry_count >= max_retries || !self.class.should_retry?(status, headers: headers) + decoded = Kernel.then do + Scrapegraphai::Internal::Util.decode_content(headers, stream: stream, suppress_error: true) + ensure + self.class.reap_connection!(status, stream: stream) + end + + raise Scrapegraphai::Errors::APIStatusError.for( + url: url, + status: status, + headers: headers, + body: decoded, + request: nil, + response: response + ) + in (400..) | Scrapegraphai::Errors::APIConnectionError + self.class.reap_connection!(status, stream: stream) + + delay = retry_delay(response || {}, retry_count: retry_count) + sleep(delay) + + send_request( + request, + redirect_count: redirect_count, + retry_count: retry_count + 1, + send_retry_header: send_retry_header + ) + end + end + + # Execute the request specified by `req`. This is the method that all resource + # methods call into. + # + # @overload request(method, path, query: {}, headers: {}, body: nil, unwrap: nil, page: nil, stream: nil, model: Scrapegraphai::Internal::Type::Unknown, options: {}) + # + # @param method [Symbol] + # + # @param path [String, Array] + # + # @param query [Hash{String=>Array, String, nil}, nil] + # + # @param headers [Hash{String=>String, Integer, Array, nil}, nil] + # + # @param body [Object, nil] + # + # @param unwrap [Symbol, Integer, Array, Proc, nil] + # + # @param page [Class, nil] + # + # @param stream [Class, nil] + # + # @param model [Scrapegraphai::Internal::Type::Converter, Class, nil] + # + # @param options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] . + # + # @option options [String, nil] :idempotency_key + # + # @option options [Hash{String=>Array, String, nil}, nil] :extra_query + # + # @option options [Hash{String=>String, nil}, nil] :extra_headers + # + # @option options [Object, nil] :extra_body + # + # @option options [Integer, nil] :max_retries + # + # @option options [Float, nil] :timeout + # + # @raise [Scrapegraphai::Errors::APIError] + # @return [Object] + def request(req) + self.class.validate!(req) + model = req.fetch(:model) { Scrapegraphai::Internal::Type::Unknown } + opts = req[:options].to_h + unwrap = req[:unwrap] + Scrapegraphai::RequestOptions.validate!(opts) + request = build_request(req.except(:options), opts) + url = request.fetch(:url) + + # Don't send the current retry count in the headers if the caller modified the header defaults. + send_retry_header = request.fetch(:headers)["x-stainless-retry-count"] == "0" + status, response, stream = send_request( + request, + redirect_count: 0, + retry_count: 0, + send_retry_header: send_retry_header + ) + + headers = Scrapegraphai::Internal::Util.normalized_headers(response.each_header.to_h) + decoded = Scrapegraphai::Internal::Util.decode_content(headers, stream: stream) + case req + in {stream: Class => st} + st.new( + model: model, + url: url, + status: status, + headers: headers, + response: response, + unwrap: unwrap, + stream: decoded + ) + in {page: Class => page} + page.new(client: self, req: req, headers: headers, page_data: decoded) + else + unwrapped = Scrapegraphai::Internal::Util.dig(decoded, unwrap) + Scrapegraphai::Internal::Type::Converter.coerce(model, unwrapped) + end + end + + # @api private + # + # @return [String] + def inspect + # rubocop:disable Layout/LineLength + "#<#{self.class.name}:0x#{object_id.to_s(16)} base_url=#{@base_url} max_retries=#{@max_retries} timeout=#{@timeout}>" + # rubocop:enable Layout/LineLength + end + + define_sorbet_constant!(:RequestComponents) do + T.type_alias do + { + method: Symbol, + path: T.any(String, T::Array[String]), + query: T.nilable(T::Hash[String, T.nilable(T.any(T::Array[String], String))]), + headers: T.nilable( + T::Hash[String, + T.nilable( + T.any( + String, + Integer, + T::Array[T.nilable(T.any(String, Integer))] + ) + )] + ), + body: T.nilable(T.anything), + unwrap: T.nilable( + T.any( + Symbol, + Integer, + T::Array[T.any(Symbol, Integer)], + T.proc.params(arg0: T.anything).returns(T.anything) + ) + ), + page: T.nilable(T::Class[Scrapegraphai::Internal::Type::BasePage[Scrapegraphai::Internal::Type::BaseModel]]), + stream: T.nilable(T::Class[T.anything]), + model: T.nilable(Scrapegraphai::Internal::Type::Converter::Input), + options: T.nilable(Scrapegraphai::RequestOptions::OrHash) + } + end + end + define_sorbet_constant!(:RequestInput) do + T.type_alias do + { + method: Symbol, + url: URI::Generic, + headers: T::Hash[String, String], + body: T.anything, + max_retries: Integer, + timeout: Float + } + end + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/transport/pooled_net_requester.rb b/lib/scrapegraphai/internal/transport/pooled_net_requester.rb new file mode 100644 index 0000000..2968d6a --- /dev/null +++ b/lib/scrapegraphai/internal/transport/pooled_net_requester.rb @@ -0,0 +1,198 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Transport + # @api private + class PooledNetRequester + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # from the golang stdlib + # https://github.com/golang/go/blob/c8eced8580028328fde7c03cbfcb720ce15b2358/src/net/http/transport.go#L49 + KEEP_ALIVE_TIMEOUT = 30 + + DEFAULT_MAX_CONNECTIONS = [Etc.nprocessors, 99].max + + class << self + # @api private + # + # @param url [URI::Generic] + # + # @return [Net::HTTP] + def connect(url) + port = + case [url.port, url.scheme] + in [Integer, _] + url.port + in [nil, "http" | "ws"] + Net::HTTP.http_default_port + in [nil, "https" | "wss"] + Net::HTTP.https_default_port + end + + Net::HTTP.new(url.host, port).tap do + _1.use_ssl = %w[https wss].include?(url.scheme) + _1.max_retries = 0 + end + end + + # @api private + # + # @param conn [Net::HTTP] + # @param deadline [Float] + def calibrate_socket_timeout(conn, deadline) + timeout = deadline - Scrapegraphai::Internal::Util.monotonic_secs + conn.open_timeout = conn.read_timeout = conn.write_timeout = conn.continue_timeout = timeout + end + + # @api private + # + # @param request [Hash{Symbol=>Object}] . + # + # @option request [Symbol] :method + # + # @option request [URI::Generic] :url + # + # @option request [Hash{String=>String}] :headers + # + # @param blk [Proc] + # + # @yieldparam [String] + # @return [Array(Net::HTTPGenericRequest, Proc)] + def build_request(request, &blk) + method, url, headers, body = request.fetch_values(:method, :url, :headers, :body) + req = Net::HTTPGenericRequest.new( + method.to_s.upcase, + !body.nil?, + method != :head, + URI(url.to_s) # ensure we construct a URI class of the right scheme + ) + + headers.each { req[_1] = _2 } + + case body + in nil + nil + in String + req["content-length"] ||= body.bytesize.to_s unless req["transfer-encoding"] + req.body_stream = Scrapegraphai::Internal::Util::ReadIOAdapter.new(body, &blk) + in StringIO + req["content-length"] ||= body.size.to_s unless req["transfer-encoding"] + req.body_stream = Scrapegraphai::Internal::Util::ReadIOAdapter.new(body, &blk) + in Pathname | IO | Enumerator + req["transfer-encoding"] ||= "chunked" unless req["content-length"] + req.body_stream = Scrapegraphai::Internal::Util::ReadIOAdapter.new(body, &blk) + end + + [req, req.body_stream&.method(:close)] + end + end + + # @api private + # + # @param url [URI::Generic] + # @param deadline [Float] + # @param blk [Proc] + # + # @raise [Timeout::Error] + # @yieldparam [Net::HTTP] + private def with_pool(url, deadline:, &blk) + origin = Scrapegraphai::Internal::Util.uri_origin(url) + timeout = deadline - Scrapegraphai::Internal::Util.monotonic_secs + pool = + @mutex.synchronize do + @pools[origin] ||= ConnectionPool.new(size: @size) do + self.class.connect(url) + end + end + + pool.with(timeout: timeout, &blk) + end + + # @api private + # + # @param request [Hash{Symbol=>Object}] . + # + # @option request [Symbol] :method + # + # @option request [URI::Generic] :url + # + # @option request [Hash{String=>String}] :headers + # + # @option request [Object] :body + # + # @option request [Float] :deadline + # + # @return [Array(Integer, Net::HTTPResponse, Enumerable)] + def execute(request) + url, deadline = request.fetch_values(:url, :deadline) + + req = nil + eof = false + finished = false + closing = nil + + # rubocop:disable Metrics/BlockLength + enum = Enumerator.new do |y| + next if finished + + with_pool(url, deadline: deadline) do |conn| + req, closing = self.class.build_request(request) do + self.class.calibrate_socket_timeout(conn, deadline) + end + + self.class.calibrate_socket_timeout(conn, deadline) + unless conn.started? + conn.keep_alive_timeout = self.class::KEEP_ALIVE_TIMEOUT + conn.start + end + + self.class.calibrate_socket_timeout(conn, deadline) + conn.request(req) do |rsp| + y << [req, rsp] + break if finished + + rsp.read_body do |bytes| + y << bytes.force_encoding(Encoding::BINARY) + break if finished + + self.class.calibrate_socket_timeout(conn, deadline) + end + eof = true + end + ensure + conn.finish if !eof && conn&.started? + end + rescue Timeout::Error + raise Scrapegraphai::Errors::APITimeoutError.new(url: url, request: req) + rescue StandardError + raise Scrapegraphai::Errors::APIConnectionError.new(url: url, request: req) + end + # rubocop:enable Metrics/BlockLength + + _, response = enum.next + body = Scrapegraphai::Internal::Util.fused_enum(enum, external: true) do + finished = true + loop { enum.next } + ensure + closing&.call + end + [Integer(response.code), response, body] + end + + # @api private + # + # @param size [Integer] + def initialize(size: self.class::DEFAULT_MAX_CONNECTIONS) + @mutex = Mutex.new + @size = size + @pools = {} + end + + define_sorbet_constant!(:Request) do + T.type_alias { {method: Symbol, url: URI::Generic, headers: T::Hash[String, String], body: T.anything, deadline: Float} } + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/array_of.rb b/lib/scrapegraphai/internal/type/array_of.rb new file mode 100644 index 0000000..dd87527 --- /dev/null +++ b/lib/scrapegraphai/internal/type/array_of.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + # + # @abstract + # + # @generic Elem + # + # Array of items of a given type. + class ArrayOf + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + private_class_method :new + + # @overload [](type_info, spec = {}) + # + # @param type_info [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] + # + # @param spec [Hash{Symbol=>Object}] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + # + # @return [self] + def self.[](...) = new(...) + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ===(other) = other.is_a?(Array) && other.all?(item_type) + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ==(other) + # rubocop:disable Layout/LineLength + other.is_a?(Scrapegraphai::Internal::Type::ArrayOf) && other.nilable? == nilable? && other.item_type == item_type + # rubocop:enable Layout/LineLength + end + + # @api public + # + # @return [Integer] + def hash = [self.class, item_type].hash + + # @api private + # + # @param value [Array, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [Array, Object] + def coerce(value, state:) + exactness = state.fetch(:exactness) + + unless value.is_a?(Array) + exactness[:no] += 1 + state[:error] = TypeError.new("#{value.class} can't be coerced into #{Array}") + return value + end + + target = item_type + exactness[:yes] += 1 + value + .map do |item| + case [nilable?, item] + in [true, nil] + exactness[:yes] += 1 + nil + else + Scrapegraphai::Internal::Type::Converter.coerce(target, item, state: state) + end + end + end + + # @api private + # + # @param value [Array, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Array, Object] + def dump(value, state:) + target = item_type + if value.is_a?(Array) + value.map do + Scrapegraphai::Internal::Type::Converter.dump(target, _1, state: state) + end + else + super + end + end + + # @api private + # + # @return [Object] + def to_sorbet_type + T::Array[Scrapegraphai::Internal::Util::SorbetRuntimeSupport.to_sorbet_type(item_type)] + end + + # @api private + # + # @return [generic] + protected def item_type = @item_type_fn.call + + # @api private + # + # @return [Boolean] + protected def nilable? = @nilable + + # @api private + # + # @param type_info [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] + # + # @param spec [Hash{Symbol=>Object}] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + def initialize(type_info, spec = {}) + @item_type_fn = Scrapegraphai::Internal::Type::Converter.type_info(type_info || spec) + @meta = Scrapegraphai::Internal::Type::Converter.meta_info(type_info, spec) + @nilable = spec.fetch(:nil?, false) + end + + # @api private + # + # @param depth [Integer] + # + # @return [String] + def inspect(depth: 0) + items = Scrapegraphai::Internal::Type::Converter.inspect(item_type, depth: depth.succ) + + "#{self.class}[#{[items, nilable? ? 'nil' : nil].compact.join(' | ')}]" + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/base_model.rb b/lib/scrapegraphai/internal/type/base_model.rb new file mode 100644 index 0000000..7d0c770 --- /dev/null +++ b/lib/scrapegraphai/internal/type/base_model.rb @@ -0,0 +1,534 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @abstract + class BaseModel + extend Scrapegraphai::Internal::Type::Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + class << self + # @api private + # + # Assumes superclass fields are totally defined before fields are accessed / + # defined on subclasses. + # + # @param child [Class] + def inherited(child) + super + child.known_fields.replace(known_fields.dup) + end + + # @api private + # + # @return [Hash{Symbol=>Hash{Symbol=>Object}}] + def known_fields = @known_fields ||= {} + + # @api private + # + # @return [Hash{Symbol=>Hash{Symbol=>Object}}] + def fields + known_fields.transform_values do |field| + {**field.except(:type_fn), type: field.fetch(:type_fn).call} + end + end + + # @api private + # + # @param name_sym [Symbol] + # + # @param required [Boolean] + # + # @param type_info [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] + # + # @param spec [Hash{Symbol=>Object}] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + private def add_field(name_sym, required:, type_info:, spec:) + meta = Scrapegraphai::Internal::Type::Converter.meta_info(type_info, spec) + type_fn, info = + case type_info + in Proc | Scrapegraphai::Internal::Type::Converter | Class + [Scrapegraphai::Internal::Type::Converter.type_info({**spec, union: type_info}), spec] + in Hash + [Scrapegraphai::Internal::Type::Converter.type_info(type_info), type_info] + end + + setter = :"#{name_sym}=" + api_name = info.fetch(:api_name, name_sym) + nilable = info.fetch(:nil?, false) + const = required && !nilable ? info.fetch(:const, Scrapegraphai::Internal::OMIT) : Scrapegraphai::Internal::OMIT + + [name_sym, setter].each { undef_method(_1) } if known_fields.key?(name_sym) + + known_fields[name_sym] = + { + mode: @mode, + api_name: api_name, + required: required, + nilable: nilable, + const: const, + type_fn: type_fn, + meta: meta + } + + define_method(setter) do |value| + target = type_fn.call + state = Scrapegraphai::Internal::Type::Converter.new_coerce_state(translate_names: false) + coerced = Scrapegraphai::Internal::Type::Converter.coerce(target, value, state: state) + status = @coerced.store(name_sym, state.fetch(:error) || true) + stored = + case [target, status] + in [Scrapegraphai::Internal::Type::Converter | Symbol, true] + coerced + else + value + end + @data.store(name_sym, stored) + end + + # rubocop:disable Style/CaseEquality + # rubocop:disable Metrics/BlockLength + define_method(name_sym) do + target = type_fn.call + + case @coerced[name_sym] + in true | false if Scrapegraphai::Internal::Type::Converter === target + @data.fetch(name_sym) + in ::StandardError => e + raise Scrapegraphai::Errors::ConversionError.new( + on: self.class, + method: __method__, + target: target, + value: @data.fetch(name_sym), + cause: e + ) + else + Kernel.then do + value = @data.fetch(name_sym) { const == Scrapegraphai::Internal::OMIT ? nil : const } + state = Scrapegraphai::Internal::Type::Converter.new_coerce_state(translate_names: false) + if (nilable || !required) && value.nil? + nil + else + Scrapegraphai::Internal::Type::Converter.coerce( + target, value, state: state + ) + end + rescue StandardError => e + raise Scrapegraphai::Errors::ConversionError.new( + on: self.class, + method: __method__, + target: target, + value: value, + cause: e + ) + end + end + end + # rubocop:enable Metrics/BlockLength + # rubocop:enable Style/CaseEquality + end + + # @api private + # + # @param name_sym [Symbol] + # + # @param type_info [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] + # + # @param spec [Hash{Symbol=>Object}] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + def required(name_sym, type_info, spec = {}) + add_field(name_sym, required: true, type_info: type_info, spec: spec) + end + + # @api private + # + # @param name_sym [Symbol] + # + # @param type_info [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] + # + # @param spec [Hash{Symbol=>Object}] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + def optional(name_sym, type_info, spec = {}) + add_field(name_sym, required: false, type_info: type_info, spec: spec) + end + + # @api private + # + # `request_only` attributes not excluded from `.#coerce` when receiving responses + # even if well behaved servers should not send them + # + # @param blk [Proc] + private def request_only(&blk) + @mode = :dump + blk.call + ensure + @mode = nil + end + + # @api private + # + # `response_only` attributes are omitted from `.#dump` when making requests + # + # @param blk [Proc] + private def response_only(&blk) + @mode = :coerce + blk.call + ensure + @mode = nil + end + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ==(other) + other.is_a?(Class) && other <= Scrapegraphai::Internal::Type::BaseModel && other.fields == fields + end + + # @api public + # + # @return [Integer] + def hash = fields.hash + end + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ==(other) = self.class == other.class && @data == other.to_h + + # @api public + # + # @return [Integer] + def hash = [self.class, @data].hash + + class << self + # @api private + # + # @param value [Scrapegraphai::Internal::Type::BaseModel, Hash{Object=>Object}, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [self, Object] + def coerce(value, state:) + exactness = state.fetch(:exactness) + + if value.is_a?(self) + exactness[:yes] += 1 + return value + end + + unless (val = Scrapegraphai::Internal::Util.coerce_hash(value)).is_a?(Hash) + exactness[:no] += 1 + state[:error] = TypeError.new("#{value.class} can't be coerced into #{Hash}") + return value + end + exactness[:yes] += 1 + + keys = val.keys.to_set + instance = new + data = instance.to_h + status = instance.instance_variable_get(:@coerced) + + # rubocop:disable Metrics/BlockLength + fields.each do |name, field| + mode, required, target = field.fetch_values(:mode, :required, :type) + api_name, nilable, const = field.fetch_values(:api_name, :nilable, :const) + src_name = state.fetch(:translate_names) ? api_name : name + + unless val.key?(src_name) + if required && mode != :dump && const == Scrapegraphai::Internal::OMIT + exactness[nilable ? :maybe : :no] += 1 + else + exactness[:yes] += 1 + end + next + end + + item = val.fetch(src_name) + keys.delete(src_name) + + state[:error] = nil + converted = + if item.nil? && (nilable || !required) + exactness[nilable ? :yes : :maybe] += 1 + nil + else + coerced = Scrapegraphai::Internal::Type::Converter.coerce(target, item, state: state) + case target + in Scrapegraphai::Internal::Type::Converter | Symbol + coerced + else + item + end + end + + status.store(name, state.fetch(:error) || true) + data.store(name, converted) + end + # rubocop:enable Metrics/BlockLength + + keys.each { data.store(_1, val.fetch(_1)) } + instance + end + + # @api private + # + # @param value [self, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Hash{Object=>Object}, Object] + def dump(value, state:) + unless (coerced = Scrapegraphai::Internal::Util.coerce_hash(value)).is_a?(Hash) + return super + end + + acc = {} + + coerced.each do |key, val| + name = key.is_a?(String) ? key.to_sym : key + case (field = known_fields[name]) + in nil + acc.store(name, super(val, state: state)) + else + api_name, mode, type_fn = field.fetch_values(:api_name, :mode, :type_fn) + case mode + in :coerce + next + else + target = type_fn.call + acc.store( + api_name, + Scrapegraphai::Internal::Type::Converter.dump(target, val, state: state) + ) + end + end + end + + known_fields.each_value do |field| + api_name, mode, const = field.fetch_values(:api_name, :mode, :const) + next if mode == :coerce || acc.key?(api_name) || const == Scrapegraphai::Internal::OMIT + acc.store(api_name, const) + end + + acc + end + + # @api private + # + # @return [Object] + def to_sorbet_type + self + end + end + + class << self + # @api private + # + # @param model [Scrapegraphai::Internal::Type::BaseModel] + # @param convert [Boolean] + # + # @return [Hash{Symbol=>Object}] + def recursively_to_h(model, convert:) + rec = ->(x) do + case x + in Scrapegraphai::Internal::Type::BaseModel + if convert + fields = x.class.known_fields + x.to_h.to_h do |key, val| + [key, rec.call(fields.key?(key) ? x.public_send(key) : val)] + rescue Scrapegraphai::Errors::ConversionError + [key, rec.call(val)] + end + else + rec.call(x.to_h) + end + in Hash + x.transform_values(&rec) + in Array + x.map(&rec) + else + x + end + end + rec.call(model) + end + end + + # @api public + # + # Returns the raw value associated with the given key, if found. Otherwise, nil is + # returned. + # + # It is valid to lookup keys that are not in the API spec, for example to access + # undocumented features. This method does not parse response data into + # higher-level types. Lookup by anything other than a Symbol is an ArgumentError. + # + # @param key [Symbol] + # + # @return [Object, nil] + def [](key) + unless key.instance_of?(Symbol) + raise ArgumentError.new("Expected symbol key for lookup, got #{key.inspect}") + end + + @data[key] + end + + # @api public + # + # Returns a Hash of the data underlying this object. O(1) + # + # Keys are Symbols and values are the raw values from the response. The return + # value indicates which values were ever set on the object. i.e. there will be a + # key in this hash if they ever were, even if the set value was nil. + # + # This method is not recursive. The returned value is shared by the object, so it + # should not be mutated. + # + # @return [Hash{Symbol=>Object}] + def to_h = @data + + alias_method :to_hash, :to_h + + # @api public + # + # In addition to the behaviour of `#to_h`, this method will recursively call + # `#to_h` on nested models. + # + # @return [Hash{Symbol=>Object}] + def deep_to_h = self.class.recursively_to_h(@data, convert: false) + + # @param keys [Array, nil] + # + # @return [Hash{Symbol=>Object}] + # + # @example + # # `completed_smartscraper` is a `Scrapegraphai::CompletedSmartscraper` + # completed_smartscraper => { + # error: error, + # request_id: request_id, + # result: result + # } + def deconstruct_keys(keys) + (keys || self.class.known_fields.keys) + .filter_map do |k| + unless self.class.known_fields.key?(k) + next + end + + [k, public_send(k)] + end + .to_h + end + + # @api public + # + # @param a [Object] + # + # @return [String] + def to_json(*a) = Scrapegraphai::Internal::Type::Converter.dump(self.class, self).to_json(*a) + + # @api public + # + # @param a [Object] + # + # @return [String] + def to_yaml(*a) = Scrapegraphai::Internal::Type::Converter.dump(self.class, self).to_yaml(*a) + + # Create a new instance of a model. + # + # @param data [Hash{Symbol=>Object}, self] + def initialize(data = {}) + @data = {} + @coerced = {} + Scrapegraphai::Internal::Util.coerce_hash!(data).each do + if self.class.known_fields.key?(_1) + public_send(:"#{_1}=", _2) + else + @data.store(_1, _2) + @coerced.store(_1, false) + end + end + end + + class << self + # @api private + # + # @param depth [Integer] + # + # @return [String] + def inspect(depth: 0) + return super() if depth.positive? + + depth = depth.succ + deferred = fields.transform_values do |field| + type, required, nilable = field.fetch_values(:type, :required, :nilable) + inspected = [ + Scrapegraphai::Internal::Type::Converter.inspect(type, depth: depth), + !required || nilable ? "nil" : nil + ].compact.join(" | ") + -> { inspected }.tap { _1.define_singleton_method(:inspect) { call } } + end + + "#{name}[#{deferred.inspect}]" + end + end + + # @api public + # + # @return [String] + def to_s = deep_to_h.to_s + + # @api private + # + # @return [String] + def inspect + converted = self.class.recursively_to_h(self, convert: true) + "#<#{self.class}:0x#{object_id.to_s(16)} #{converted}>" + end + + define_sorbet_constant!(:KnownField) do + T.type_alias { {mode: T.nilable(Symbol), required: T::Boolean, nilable: T::Boolean} } + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/base_page.rb b/lib/scrapegraphai/internal/type/base_page.rb new file mode 100644 index 0000000..dcfdaa4 --- /dev/null +++ b/lib/scrapegraphai/internal/type/base_page.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + # + # @generic Elem + # + # This module provides a base implementation for paginated responses in the SDK. + module BasePage + # rubocop:disable Lint/UnusedMethodArgument + + # @api public + # + # @return [Boolean] + def next_page? = (raise NotImplementedError) + + # @api public + # + # @raise [Scrapegraphai::Errors::APIError] + # @return [self] + def next_page = (raise NotImplementedError) + + # @api public + # + # @param blk [Proc] + # + # @yieldparam [generic] + # @return [void] + def auto_paging_each(&blk) = (raise NotImplementedError) + + # @return [Enumerable>] + def to_enum = super(:auto_paging_each) + + alias_method :enum_for, :to_enum + + # @api private + # + # @param client [Scrapegraphai::Internal::Transport::BaseClient] + # @param req [Hash{Symbol=>Object}] + # @param headers [Hash{String=>String}] + # @param page_data [Object] + def initialize(client:, req:, headers:, page_data:) + @client = client + @req = req + @model = req.fetch(:model) + super() + end + + # rubocop:enable Lint/UnusedMethodArgument + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/boolean.rb b/lib/scrapegraphai/internal/type/boolean.rb new file mode 100644 index 0000000..47d94e2 --- /dev/null +++ b/lib/scrapegraphai/internal/type/boolean.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + # + # @abstract + # + # Ruby has no Boolean class; this is something for models to refer to. + class Boolean + extend Scrapegraphai::Internal::Type::Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + private_class_method :new + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def self.===(other) = other == true || other == false + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def self.==(other) = other.is_a?(Class) && other <= Scrapegraphai::Internal::Type::Boolean + + class << self + # @api private + # + # Coerce value to Boolean if possible, otherwise return the original value. + # + # @param value [Boolean, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [Boolean, Object] + def coerce(value, state:) + state.fetch(:exactness)[value == true || value == false ? :yes : :no] += 1 + value + end + + # @!method dump(value, state:) + # @api private + # + # @param value [Boolean, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Boolean, Object] + + # @api private + # + # @return [Object] + def to_sorbet_type + T::Boolean + end + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/converter.rb b/lib/scrapegraphai/internal/type/converter.rb new file mode 100644 index 0000000..066e3a4 --- /dev/null +++ b/lib/scrapegraphai/internal/type/converter.rb @@ -0,0 +1,327 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + module Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # rubocop:disable Lint/UnusedMethodArgument + + # @api private + # + # @param value [Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [Object] + def coerce(value, state:) = (raise NotImplementedError) + + # @api private + # + # @param value [Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Object] + def dump(value, state:) + case value + in Array + value.map { Scrapegraphai::Internal::Type::Unknown.dump(_1, state: state) } + in Hash + value.transform_values { Scrapegraphai::Internal::Type::Unknown.dump(_1, state: state) } + in Scrapegraphai::Internal::Type::BaseModel + value.class.dump(value, state: state) + in StringIO + value.string + in Pathname | IO + state[:can_retry] = false if value.is_a?(IO) + Scrapegraphai::FilePart.new(value) + in Scrapegraphai::FilePart + state[:can_retry] = false if value.content.is_a?(IO) + value + else + value + end + end + + # @api private + # + # @param depth [Integer] + # + # @return [String] + def inspect(depth: 0) + super() + end + + # rubocop:enable Lint/UnusedMethodArgument + + class << self + # @api private + # + # @param spec [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + # + # @return [Proc] + def type_info(spec) + case spec + in Proc + spec + in Hash + type_info(spec.slice(:const, :enum, :union).first&.last) + in true | false + -> { Scrapegraphai::Internal::Type::Boolean } + in Scrapegraphai::Internal::Type::Converter | Class | Symbol + -> { spec } + in NilClass | Integer | Float + -> { spec.class } + end + end + + # @api private + # + # @param type_info [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] . + # + # @option type_info [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option type_info [Proc] :enum + # + # @option type_info [Proc] :union + # + # @option type_info [Boolean] :"nil?" + # + # @param spec [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + # + # @return [Hash{Symbol=>Object}] + def meta_info(type_info, spec) + [spec, type_info].grep(Hash).first.to_h.except(:const, :enum, :union, :nil?) + end + + # @api private + # + # @param translate_names [Boolean] + # + # @return [Hash{Symbol=>Object}] + def new_coerce_state(translate_names: true) + { + translate_names: translate_names, + strictness: true, + exactness: {yes: 0, no: 0, maybe: 0}, + error: nil, + branched: 0 + } + end + + # @api private + # + # Based on `target`, transform `value` into `target`, to the extent possible: + # + # 1. if the given `value` conforms to `target` already, return the given `value` + # 2. if it's possible and safe to convert the given `value` to `target`, then the + # converted value + # 3. otherwise, the given `value` unaltered + # + # The coercion process is subject to improvement between minor release versions. + # See https://docs.pydantic.dev/latest/concepts/unions/#smart-mode + # + # @param target [Scrapegraphai::Internal::Type::Converter, Class] + # + # @param value [Object] + # + # @param state [Hash{Symbol=>Object}] The `strictness` is one of `true`, `false`. This informs the coercion strategy + # when we have to decide between multiple possible conversion targets: + # + # - `true`: the conversion must be exact, with minimum coercion. + # - `false`: the conversion can be approximate, with some coercion. + # + # The `exactness` is `Hash` with keys being one of `yes`, `no`, or `maybe`. For + # any given conversion attempt, the exactness will be updated based on how closely + # the value recursively matches the target type: + # + # - `yes`: the value can be converted to the target type with minimum coercion. + # - `maybe`: the value can be converted to the target type with some reasonable + # coercion. + # - `no`: the value cannot be converted to the target type. + # + # See implementation below for more details. + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [Object] + def coerce(target, value, state: Scrapegraphai::Internal::Type::Converter.new_coerce_state) + # rubocop:disable Metrics/BlockNesting + exactness = state.fetch(:exactness) + + case target + in Scrapegraphai::Internal::Type::Converter + return target.coerce(value, state: state) + in Class + if value.is_a?(target) + exactness[:yes] += 1 + return value + end + + case target + in -> { _1 <= NilClass } + exactness[value.nil? ? :yes : :maybe] += 1 + return nil + in -> { _1 <= Integer } + case value + in Integer + exactness[:yes] += 1 + return value + else + Kernel.then do + return Integer(value).tap { exactness[:maybe] += 1 } + rescue ArgumentError, TypeError => e + state[:error] = e + end + end + in -> { _1 <= Float } + if value.is_a?(Numeric) + exactness[:yes] += 1 + return Float(value) + else + Kernel.then do + return Float(value).tap { exactness[:maybe] += 1 } + rescue ArgumentError, TypeError => e + state[:error] = e + end + end + in -> { _1 <= String } + case value + in String | Symbol | Numeric + exactness[value.is_a?(Numeric) ? :maybe : :yes] += 1 + return value.to_s + in StringIO + exactness[:yes] += 1 + return value.string + else + state[:error] = TypeError.new("#{value.class} can't be coerced into #{String}") + end + in -> { _1 <= Date || _1 <= Time } + Kernel.then do + return target.parse(value).tap { exactness[:yes] += 1 } + rescue ArgumentError, TypeError => e + state[:error] = e + end + in -> { _1 <= StringIO } if value.is_a?(String) + exactness[:yes] += 1 + return StringIO.new(value.b) + else + end + in Symbol + case value + in Symbol | String + if value.to_sym == target + exactness[:yes] += 1 + return target + else + exactness[:maybe] += 1 + return value + end + else + message = "cannot convert non-matching #{value.class} into #{target.inspect}" + state[:error] = ArgumentError.new(message) + end + else + end + + exactness[:no] += 1 + value + # rubocop:enable Metrics/BlockNesting + end + + # @api private + # + # @param target [Scrapegraphai::Internal::Type::Converter, Class] + # + # @param value [Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Object] + def dump(target, value, state: {can_retry: true}) + case target + in Scrapegraphai::Internal::Type::Converter + target.dump(value, state: state) + else + Scrapegraphai::Internal::Type::Unknown.dump(value, state: state) + end + end + + # @api private + # + # @param target [Object] + # @param depth [Integer] + # + # @return [String] + def inspect(target, depth:) + case target + in Scrapegraphai::Internal::Type::Converter + target.inspect(depth: depth.succ) + else + target.inspect + end + end + end + + define_sorbet_constant!(:Input) do + T.type_alias { T.any(Scrapegraphai::Internal::Type::Converter, T::Class[T.anything]) } + end + define_sorbet_constant!(:CoerceState) do + T.type_alias do + { + translate_names: T::Boolean, + strictness: T::Boolean, + exactness: {yes: Integer, no: Integer, maybe: Integer}, + error: T::Class[StandardError], + branched: Integer + } + end + end + define_sorbet_constant!(:DumpState) do + T.type_alias { {can_retry: T::Boolean} } + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/enum.rb b/lib/scrapegraphai/internal/type/enum.rb new file mode 100644 index 0000000..631b58f --- /dev/null +++ b/lib/scrapegraphai/internal/type/enum.rb @@ -0,0 +1,131 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + # + # A value from among a specified list of options. OpenAPI enum values map to Ruby + # values in the SDK as follows: + # + # 1. boolean => true | false + # 2. integer => Integer + # 3. float => Float + # 4. string => Symbol + # + # We can therefore convert string values to Symbols, but can't convert other + # values safely. + module Enum + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # All of the valid Symbol values for this enum. + # + # @return [Array] + def values = constants.map { const_get(_1) } + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ===(other) = values.include?(other) + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ==(other) + # rubocop:disable Style/CaseEquality + Scrapegraphai::Internal::Type::Enum === other && other.values.to_set == values.to_set + # rubocop:enable Style/CaseEquality + end + + # @api public + # + # @return [Integer] + def hash = values.to_set.hash + + # @api private + # + # Unlike with primitives, `Enum` additionally validates that the value is a member + # of the enum. + # + # @param value [String, Symbol, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [Symbol, Object] + def coerce(value, state:) + exactness = state.fetch(:exactness) + val = value.is_a?(String) ? value.to_sym : value + + if values.include?(val) + exactness[:yes] += 1 + val + elsif values.first&.class == val.class + exactness[:maybe] += 1 + value + else + exactness[:no] += 1 + state[:error] = TypeError.new("#{value.class} can't be coerced into #{self}") + value + end + end + + # @!method dump(value, state:) + # @api private + # + # @param value [Symbol, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Symbol, Object] + + # @api private + # + # @return [Object] + def to_sorbet_type + types = values.map { Scrapegraphai::Internal::Util::SorbetRuntimeSupport.to_sorbet_type(_1) }.uniq + case types + in [] + T.noreturn + in [type] + type + else + T.any(*types) + end + end + + # @api private + # + # @param depth [Integer] + # + # @return [String] + def inspect(depth: 0) + if depth.positive? + return is_a?(Module) ? super() : self.class.name + end + + members = values.map { Scrapegraphai::Internal::Type::Converter.inspect(_1, depth: depth.succ) } + prefix = is_a?(Module) ? name : self.class.name + + "#{prefix}[#{members.join(' | ')}]" + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/file_input.rb b/lib/scrapegraphai/internal/type/file_input.rb new file mode 100644 index 0000000..312923b --- /dev/null +++ b/lib/scrapegraphai/internal/type/file_input.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + # + # @abstract + # + # Either `Pathname` or `StringIO`, or `IO`, or + # `Scrapegraphai::Internal::Type::FileInput`. + # + # Note: when `IO` is used, all retries are disabled, since many IO` streams are + # not rewindable. + class FileInput + extend Scrapegraphai::Internal::Type::Converter + + private_class_method :new + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def self.===(other) + case other + in Pathname | StringIO | IO | String | Scrapegraphai::FilePart + true + else + false + end + end + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def self.==(other) = other.is_a?(Class) && other <= Scrapegraphai::Internal::Type::FileInput + + class << self + # @api private + # + # @param value [StringIO, String, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [StringIO, Object] + def coerce(value, state:) + exactness = state.fetch(:exactness) + case value + in String + exactness[:yes] += 1 + StringIO.new(value) + in StringIO + exactness[:yes] += 1 + value + else + state[:error] = TypeError.new("#{value.class} can't be coerced into #{StringIO}") + exactness[:no] += 1 + value + end + end + + # @api private + # + # @param value [Pathname, StringIO, IO, String, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Pathname, StringIO, IO, String, Object] + def dump(value, state:) + case value + in StringIO | String + # https://datatracker.ietf.org/doc/html/rfc7578#section-4.2 + # while not required, a filename is recommended, and in practice many servers do expect this + Scrapegraphai::FilePart.new(value, filename: "upload") + in IO + state[:can_retry] = false + value.to_path.nil? ? Scrapegraphai::FilePart.new(value, filename: "upload") : value + in Scrapegraphai::FilePart if value.content.is_a?(IO) + state[:can_retry] = false + value + else + value + end + end + + # @api private + # + # @return [Object] + def to_sorbet_type + T.any(Pathname, StringIO, IO, String, Scrapegraphai::FilePart) + end + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/hash_of.rb b/lib/scrapegraphai/internal/type/hash_of.rb new file mode 100644 index 0000000..f564a1c --- /dev/null +++ b/lib/scrapegraphai/internal/type/hash_of.rb @@ -0,0 +1,188 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + # + # @abstract + # + # @generic Elem + # + # Hash of items of a given type. + class HashOf + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + private_class_method :new + + # @overload [](type_info, spec = {}) + # + # @param type_info [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] + # + # @param spec [Hash{Symbol=>Object}] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + # + # @return [self] + def self.[](...) = new(...) + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ===(other) + type = item_type + case other + in Hash + other.all? do |key, val| + case [key, val] + in [Symbol | String, ^type] + true + else + false + end + end + else + false + end + end + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ==(other) + # rubocop:disable Layout/LineLength + other.is_a?(Scrapegraphai::Internal::Type::HashOf) && other.nilable? == nilable? && other.item_type == item_type + # rubocop:enable Layout/LineLength + end + + # @api public + # + # @return [Integer] + def hash = [self.class, item_type].hash + + # @api private + # + # @param value [Hash{Object=>Object}, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [Hash{Symbol=>Object}, Object] + def coerce(value, state:) + exactness = state.fetch(:exactness) + + unless value.is_a?(Hash) + exactness[:no] += 1 + state[:error] = TypeError.new("#{value.class} can't be coerced into #{Hash}") + return value + end + + target = item_type + exactness[:yes] += 1 + value + .to_h do |key, val| + k = key.is_a?(String) ? key.to_sym : key + v = + case [nilable?, val] + in [true, nil] + exactness[:yes] += 1 + nil + else + Scrapegraphai::Internal::Type::Converter.coerce(target, val, state: state) + end + + exactness[:no] += 1 unless k.is_a?(Symbol) + [k, v] + end + end + + # @api private + # + # @param value [Hash{Object=>Object}, Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Hash{Symbol=>Object}, Object] + def dump(value, state:) + target = item_type + if value.is_a?(Hash) + value.transform_values do + Scrapegraphai::Internal::Type::Converter.dump(target, _1, state: state) + end + else + super + end + end + + # @api private + # + # @return [Object] + def to_sorbet_type + T::Hash[Scrapegraphai::Internal::Util::SorbetRuntimeSupport.to_sorbet_type(item_type)] + end + + # @api private + # + # @return [generic] + protected def item_type = @item_type_fn.call + + # @api private + # + # @return [Boolean] + protected def nilable? = @nilable + + # @api private + # + # @param type_info [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] + # + # @param spec [Hash{Symbol=>Object}] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + def initialize(type_info, spec = {}) + @item_type_fn = Scrapegraphai::Internal::Type::Converter.type_info(type_info || spec) + @meta = Scrapegraphai::Internal::Type::Converter.meta_info(type_info, spec) + @nilable = spec.fetch(:nil?, false) + end + + # @api private + # + # @param depth [Integer] + # + # @return [String] + def inspect(depth: 0) + items = Scrapegraphai::Internal::Type::Converter.inspect(item_type, depth: depth.succ) + + "#{self.class}[#{[items, nilable? ? 'nil' : nil].compact.join(' | ')}]" + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/request_parameters.rb b/lib/scrapegraphai/internal/type/request_parameters.rb new file mode 100644 index 0000000..067de0d --- /dev/null +++ b/lib/scrapegraphai/internal/type/request_parameters.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + module RequestParameters + # @!attribute request_options + # Options to specify HTTP behaviour for this request. + # + # @return [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + + # @param mod [Module] + def self.included(mod) + raise ArgumentError.new(mod) unless mod <= Scrapegraphai::Internal::Type::BaseModel + + mod.optional(:request_options, Scrapegraphai::RequestOptions) + end + + # @api private + module Converter + # @api private + # + # @param params [Object] + # + # @return [Array(Object, Hash{Symbol=>Object})] + def dump_request(params) + state = {can_retry: true} + case (dumped = dump(params, state: state)) + in Hash + options = Scrapegraphai::Internal::Util.coerce_hash!(dumped[:request_options]).to_h + request_options = state.fetch(:can_retry) ? options : {**options, max_retries: 0} + [dumped.except(:request_options), request_options] + else + [dumped, nil] + end + end + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/union.rb b/lib/scrapegraphai/internal/type/union.rb new file mode 100644 index 0000000..25b626b --- /dev/null +++ b/lib/scrapegraphai/internal/type/union.rb @@ -0,0 +1,254 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + # + # @example + # # `smartscraper_retrieve_response` is a `Scrapegraphai::Models::SmartscraperRetrieveResponse` + # case smartscraper_retrieve_response + # when Scrapegraphai::CompletedSmartscraper + # puts(smartscraper_retrieve_response.error) + # when Scrapegraphai::FailedSmartscraper + # puts(smartscraper_retrieve_response.request_id) + # else + # puts(smartscraper_retrieve_response) + # end + module Union + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # @api private + # + # All of the specified variant info for this union. + # + # @return [ArrayObject})>] + private def known_variants = (@known_variants ||= []) + + # @api private + # + # @return [ArrayObject})>] + protected def derefed_variants + known_variants.map { |key, variant_fn, meta| [key, variant_fn.call, meta] } + end + + # All of the specified variants for this union. + # + # @return [Array] + def variants = derefed_variants.map { _2 } + + # @api private + # + # @param property [Symbol] + private def discriminator(property) + case property + in Symbol + @discriminator = property + end + end + + # @api private + # + # @param key [Symbol, Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] + # + # @param spec [Hash{Symbol=>Object}, Proc, Scrapegraphai::Internal::Type::Converter, Class] . + # + # @option spec [NilClass, TrueClass, FalseClass, Integer, Float, Symbol] :const + # + # @option spec [Proc] :enum + # + # @option spec [Proc] :union + # + # @option spec [Boolean] :"nil?" + private def variant(key, spec = nil) + meta = Scrapegraphai::Internal::Type::Converter.meta_info(nil, spec) + variant_info = + case key + in Symbol + [key, Scrapegraphai::Internal::Type::Converter.type_info(spec), meta] + in Proc | Scrapegraphai::Internal::Type::Converter | Class | Hash + [nil, Scrapegraphai::Internal::Type::Converter.type_info(key), meta] + end + + known_variants << variant_info + end + + # @api private + # + # @param value [Object] + # + # @return [Scrapegraphai::Internal::Type::Converter, Class, nil] + private def resolve_variant(value) + case [@discriminator, value] + in [_, Scrapegraphai::Internal::Type::BaseModel] + value.class + in [Symbol, Hash] + key = value.fetch(@discriminator) do + value.fetch(@discriminator.to_s, Scrapegraphai::Internal::OMIT) + end + + return nil if key == Scrapegraphai::Internal::OMIT + + key = key.to_sym if key.is_a?(String) + _, found = known_variants.find { |k,| k == key } + found&.call + else + nil + end + end + + # rubocop:disable Style/HashEachMethods + # rubocop:disable Style/CaseEquality + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ===(other) + known_variants.any? do |_, variant_fn| + variant_fn.call === other + end + end + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def ==(other) + Scrapegraphai::Internal::Type::Union === other && other.derefed_variants == derefed_variants + end + + # @api public + # + # @return [Integer] + def hash = variants.hash + + # @api private + # + # Tries to efficiently coerce the given value to one of the known variants. + # + # If the value cannot match any of the known variants, the coercion is considered + # non-viable and returns the original value. + # + # @param value [Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [Object] + def coerce(value, state:) + if (target = resolve_variant(value)) + return Scrapegraphai::Internal::Type::Converter.coerce(target, value, state: state) + end + + strictness = state.fetch(:strictness) + exactness = state.fetch(:exactness) + + alternatives = [] + known_variants.each do |_, variant_fn| + target = variant_fn.call + exact = state[:exactness] = {yes: 0, no: 0, maybe: 0} + state[:branched] += 1 + + coerced = Scrapegraphai::Internal::Type::Converter.coerce(target, value, state: state) + yes, no, maybe = exact.values + if (no + maybe).zero? || (!strictness && yes.positive?) + exact.each { exactness[_1] += _2 } + state[:exactness] = exactness + return coerced + elsif maybe.positive? + alternatives << [[-yes, -maybe, no], exact, coerced] + end + end + + case alternatives.sort_by!(&:first) + in [] + exactness[:no] += 1 + state[:error] = ArgumentError.new("no matching variant for #{value.inspect}") + value + in [[_, exact, coerced], *] + exact.each { exactness[_1] += _2 } + coerced + end + .tap { state[:exactness] = exactness } + ensure + state[:strictness] = strictness + end + + # @api private + # + # @param value [Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Object] + def dump(value, state:) + if (target = resolve_variant(value)) + return Scrapegraphai::Internal::Type::Converter.dump(target, value, state: state) + end + + known_variants.each do + target = _2.call + if target === value + return Scrapegraphai::Internal::Type::Converter.dump( + target, + value, + state: state + ) + end + end + + super + end + + # @api private + # + # @return [Object] + def to_sorbet_type + types = variants.map { Scrapegraphai::Internal::Util::SorbetRuntimeSupport.to_sorbet_type(_1) }.uniq + case types + in [] + T.noreturn + in [type] + type + else + T.any(*types) + end + end + + # rubocop:enable Style/CaseEquality + # rubocop:enable Style/HashEachMethods + + # @api private + # + # @param depth [Integer] + # + # @return [String] + def inspect(depth: 0) + if depth.positive? + return is_a?(Module) ? super() : self.class.name + end + + members = variants.map { Scrapegraphai::Internal::Type::Converter.inspect(_1, depth: depth.succ) } + prefix = is_a?(Module) ? name : self.class.name + + "#{prefix}[#{members.join(' | ')}]" + end + end + end + end +end diff --git a/lib/scrapegraphai/internal/type/unknown.rb b/lib/scrapegraphai/internal/type/unknown.rb new file mode 100644 index 0000000..0c0edd7 --- /dev/null +++ b/lib/scrapegraphai/internal/type/unknown.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + module Type + # @api private + # + # @abstract + # + # When we don't know what to expect for the value. + class Unknown + extend Scrapegraphai::Internal::Type::Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # rubocop:disable Lint/UnusedMethodArgument + + private_class_method :new + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def self.===(other) = true + + # @api public + # + # @param other [Object] + # + # @return [Boolean] + def self.==(other) = other.is_a?(Class) && other <= Scrapegraphai::Internal::Type::Unknown + + class << self + # @api private + # + # No coercion needed for Unknown type. + # + # @param value [Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :translate_names + # + # @option state [Boolean] :strictness + # + # @option state [Hash{Symbol=>Object}] :exactness + # + # @option state [Class] :error + # + # @option state [Integer] :branched + # + # @return [Object] + def coerce(value, state:) + state.fetch(:exactness)[:yes] += 1 + value + end + + # @!method dump(value, state:) + # @api private + # + # @param value [Object] + # + # @param state [Hash{Symbol=>Object}] . + # + # @option state [Boolean] :can_retry + # + # @return [Object] + + # @api private + # + # @return [Object] + def to_sorbet_type + T.anything + end + end + + # rubocop:enable Lint/UnusedMethodArgument + end + end + end +end diff --git a/lib/scrapegraphai/internal/util.rb b/lib/scrapegraphai/internal/util.rb new file mode 100644 index 0000000..91e588e --- /dev/null +++ b/lib/scrapegraphai/internal/util.rb @@ -0,0 +1,915 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Internal + # @api private + module Util + # @api private + # + # @return [Float] + def self.monotonic_secs = Process.clock_gettime(Process::CLOCK_MONOTONIC) + + # @api private + # + # @param ns [Module, Class] + # + # @return [Enumerable] + def self.walk_namespaces(ns) + ns.constants(false).lazy.flat_map do + case (c = ns.const_get(_1, false)) + in Module | Class + walk_namespaces(c) + else + [] + end + end + .chain([ns]) + end + + class << self + # @api private + # + # @return [String] + def arch + case (arch = RbConfig::CONFIG["arch"])&.downcase + in nil + "unknown" + in /aarch64|arm64/ + "arm64" + in /x86_64/ + "x64" + in /arm/ + "arm" + else + "other:#{arch}" + end + end + + # @api private + # + # @return [String] + def os + case (host = RbConfig::CONFIG["host_os"])&.downcase + in nil + "Unknown" + in /linux/ + "Linux" + in /darwin/ + "MacOS" + in /freebsd/ + "FreeBSD" + in /openbsd/ + "OpenBSD" + in /mswin|mingw|cygwin|ucrt/ + "Windows" + else + "Other:#{host}" + end + end + end + + class << self + # @api private + # + # @param input [Object] + # + # @return [Boolean] + def primitive?(input) + case input + in true | false | Numeric | Symbol | String + true + else + false + end + end + + # @api private + # + # @param input [String, Boolean] + # + # @return [Boolean, Object] + def coerce_boolean(input) + case input.is_a?(String) ? input.downcase : input + in "true" + true + in "false" + false + else + input + end + end + + # @api private + # + # @param input [String, Boolean] + # + # @raise [ArgumentError] + # @return [Boolean, nil] + def coerce_boolean!(input) + case coerce_boolean(input) + in true | false | nil => coerced + coerced + else + raise ArgumentError.new("Unable to coerce #{input.inspect} into boolean value") + end + end + + # @api private + # + # @param input [String, Integer] + # + # @return [Integer, Object] + def coerce_integer(input) + Integer(input, exception: false) || input + end + + # @api private + # + # @param input [String, Integer, Float] + # + # @return [Float, Object] + def coerce_float(input) + Float(input, exception: false) || input + end + + # @api private + # + # @param input [Object] + # + # @return [Hash{Object=>Object}, Object] + def coerce_hash(input) + case input + in NilClass | Array | Set | Enumerator | StringIO | IO + input + else + input.respond_to?(:to_h) ? input.to_h : input + end + end + + # @api private + # + # @param input [Object] + # + # @raise [ArgumentError] + # @return [Hash{Object=>Object}, nil] + def coerce_hash!(input) + case coerce_hash(input) + in Hash | nil => coerced + coerced + else + message = "Expected a #{Hash} or #{Scrapegraphai::Internal::Type::BaseModel}, got #{data.inspect}" + raise ArgumentError.new(message) + end + end + end + + class << self + # @api private + # + # @param lhs [Object] + # @param rhs [Object] + # @param concat [Boolean] + # + # @return [Object] + private def deep_merge_lr(lhs, rhs, concat: false) + case [lhs, rhs, concat] + in [Hash, Hash, _] + lhs.merge(rhs) { deep_merge_lr(_2, _3, concat: concat) } + in [Array, Array, true] + lhs.concat(rhs) + else + rhs + end + end + + # @api private + # + # Recursively merge one hash with another. If the values at a given key are not + # both hashes, just take the new value. + # + # @param values [Array] + # + # @param sentinel [Object, nil] the value to return if no values are provided. + # + # @param concat [Boolean] whether to merge sequences by concatenation. + # + # @return [Object] + def deep_merge(*values, sentinel: nil, concat: false) + case values + in [value, *values] + values.reduce(value) do |acc, val| + deep_merge_lr(acc, val, concat: concat) + end + else + sentinel + end + end + + # @api private + # + # @param data [Hash{Symbol=>Object}, Array, Object] + # @param pick [Symbol, Integer, Array, Proc, nil] + # @param blk [Proc, nil] + # + # @return [Object, nil] + def dig(data, pick, &blk) + case [data, pick] + in [_, nil] + data + in [Hash, Symbol] | [Array, Integer] + data.fetch(pick) { blk&.call } + in [Hash | Array, Array] + pick.reduce(data) do |acc, key| + case acc + in Hash if acc.key?(key) + acc.fetch(key) + in Array if key.is_a?(Integer) && key < acc.length + acc[key] + else + return blk&.call + end + end + in [_, Proc] + pick.call(data) + else + blk&.call + end + end + end + + class << self + # @api private + # + # @param uri [URI::Generic] + # + # @return [String] + def uri_origin(uri) + "#{uri.scheme}://#{uri.host}#{":#{uri.port}" unless uri.port == uri.default_port}" + end + + # @api private + # + # @param path [String, Array] + # + # @return [String] + def interpolate_path(path) + case path + in String + path + in [] + "" + in [String => p, *interpolations] + encoded = interpolations.map { ERB::Util.url_encode(_1) } + format(p, *encoded) + end + end + end + + class << self + # @api private + # + # @param query [String, nil] + # + # @return [Hash{String=>Array}] + def decode_query(query) + CGI.parse(query.to_s) + end + + # @api private + # + # @param query [Hash{String=>Array, String, nil}, nil] + # + # @return [String, nil] + def encode_query(query) + query.to_h.empty? ? nil : URI.encode_www_form(query) + end + end + + class << self + # @api private + # + # @param url [URI::Generic, String] + # + # @return [Hash{Symbol=>String, Integer, nil}] + def parse_uri(url) + parsed = URI::Generic.component.zip(URI.split(url)).to_h + {**parsed, query: decode_query(parsed.fetch(:query))} + end + + # @api private + # + # @param parsed [Hash{Symbol=>String, Integer, nil}] . + # + # @option parsed [String, nil] :scheme + # + # @option parsed [String, nil] :host + # + # @option parsed [Integer, nil] :port + # + # @option parsed [String, nil] :path + # + # @option parsed [Hash{String=>Array}] :query + # + # @return [URI::Generic] + def unparse_uri(parsed) + URI::Generic.build(**parsed, query: encode_query(parsed.fetch(:query))) + end + + # @api private + # + # @param lhs [Hash{Symbol=>String, Integer, nil}] . + # + # @option lhs [String, nil] :scheme + # + # @option lhs [String, nil] :host + # + # @option lhs [Integer, nil] :port + # + # @option lhs [String, nil] :path + # + # @option lhs [Hash{String=>Array}] :query + # + # @param rhs [Hash{Symbol=>String, Integer, nil}] . + # + # @option rhs [String, nil] :scheme + # + # @option rhs [String, nil] :host + # + # @option rhs [Integer, nil] :port + # + # @option rhs [String, nil] :path + # + # @option rhs [Hash{String=>Array}] :query + # + # @return [URI::Generic] + def join_parsed_uri(lhs, rhs) + base_path, base_query = lhs.fetch_values(:path, :query) + slashed = base_path.end_with?("/") ? base_path : "#{base_path}/" + + parsed_path, parsed_query = parse_uri(rhs.fetch(:path)).fetch_values(:path, :query) + override = URI::Generic.build(**rhs.slice(:scheme, :host, :port), path: parsed_path) + + joined = URI.join(URI::Generic.build(lhs.except(:path, :query)), slashed, override) + query = deep_merge( + joined.path == base_path ? base_query : {}, + parsed_query, + rhs[:query].to_h, + concat: true + ) + + joined.query = encode_query(query) + joined + end + end + + class << self + # @api private + # + # @param headers [Hash{String=>String, Integer, Array, nil}] + # + # @return [Hash{String=>String}] + def normalized_headers(*headers) + {}.merge(*headers.compact).to_h do |key, val| + value = + case val + in Array + val.filter_map { _1&.to_s&.strip }.join(", ") + else + val&.to_s&.strip + end + [key.downcase, value] + end + end + end + + # @api private + # + # An adapter that satisfies the IO interface required by `::IO.copy_stream` + class ReadIOAdapter + # @api private + # + # @return [Boolean, nil] + def close? = @closing + + # @api private + def close + case @stream + in Enumerator + Scrapegraphai::Internal::Util.close_fused!(@stream) + in IO if close? + @stream.close + else + end + end + + # @api private + # + # @param max_len [Integer, nil] + # + # @return [String] + private def read_enum(max_len) + case max_len + in nil + @stream.to_a.join + in Integer + @buf << @stream.next while @buf.length < max_len + @buf.slice!(..max_len) + end + rescue StopIteration + @stream = nil + @buf.slice!(0..) + end + + # @api private + # + # @param max_len [Integer, nil] + # @param out_string [String, nil] + # + # @return [String, nil] + def read(max_len = nil, out_string = nil) + case @stream + in nil + nil + in IO | StringIO + @stream.read(max_len, out_string) + in Enumerator + read = read_enum(max_len) + case out_string + in String + out_string.replace(read) + in nil + read + end + end + .tap(&@blk) + end + + # @api private + # + # @param src [String, Pathname, StringIO, Enumerable] + # @param blk [Proc] + # + # @yieldparam [String] + def initialize(src, &blk) + @stream = + case src + in String + StringIO.new(src) + in Pathname + @closing = true + src.open(binmode: true) + else + src + end + @buf = String.new + @blk = blk + end + end + + class << self + # @param blk [Proc] + # + # @yieldparam [Enumerator::Yielder] + # @return [Enumerable] + def writable_enum(&blk) + Enumerator.new do |y| + buf = String.new + y.define_singleton_method(:write) do + self << buf.replace(_1) + buf.bytesize + end + + blk.call(y) + end + end + end + + # @type [Regexp] + JSON_CONTENT = %r{^application/(?:vnd(?:\.[^.]+)*\+)?json(?!l)} + # @type [Regexp] + JSONL_CONTENT = %r{^application/(:?x-(?:n|l)djson)|(:?(?:x-)?jsonl)} + + class << self + # @api private + # + # @param y [Enumerator::Yielder] + # @param val [Object] + # @param closing [Array] + # @param content_type [String, nil] + private def write_multipart_content(y, val:, closing:, content_type: nil) + content_line = "Content-Type: %s\r\n\r\n" + + case val + in Scrapegraphai::FilePart + return write_multipart_content( + y, + val: val.content, + closing: closing, + content_type: val.content_type + ) + in Pathname + y << format(content_line, content_type || "application/octet-stream") + io = val.open(binmode: true) + closing << io.method(:close) + IO.copy_stream(io, y) + in IO + y << format(content_line, content_type || "application/octet-stream") + IO.copy_stream(val, y) + in StringIO + y << format(content_line, content_type || "application/octet-stream") + y << val.string + in -> { primitive?(_1) } + y << format(content_line, content_type || "text/plain") + y << val.to_s + else + y << format(content_line, content_type || "application/json") + y << JSON.generate(val) + end + y << "\r\n" + end + + # @api private + # + # @param y [Enumerator::Yielder] + # @param boundary [String] + # @param key [Symbol, String] + # @param val [Object] + # @param closing [Array] + private def write_multipart_chunk(y, boundary:, key:, val:, closing:) + y << "--#{boundary}\r\n" + y << "Content-Disposition: form-data" + + unless key.nil? + name = ERB::Util.url_encode(key.to_s) + y << "; name=\"#{name}\"" + end + + case val + in Scrapegraphai::FilePart unless val.filename.nil? + filename = ERB::Util.url_encode(val.filename) + y << "; filename=\"#{filename}\"" + in Pathname | IO + filename = ERB::Util.url_encode(::File.basename(val.to_path)) + y << "; filename=\"#{filename}\"" + else + end + y << "\r\n" + + write_multipart_content(y, val: val, closing: closing) + end + + # @api private + # + # https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.1.1.md#special-considerations-for-multipart-content + # + # @param body [Object] + # + # @return [Array(String, Enumerable)] + private def encode_multipart_streaming(body) + # RFC 1521 Section 7.2.1 says we should have 70 char maximum for boundary length + boundary = SecureRandom.urlsafe_base64(46) + + closing = [] + strio = writable_enum do |y| + case body + in Hash + body.each do |key, val| + case val + in Array if val.all? { primitive?(_1) } + val.each do |v| + write_multipart_chunk(y, boundary: boundary, key: key, val: v, closing: closing) + end + else + write_multipart_chunk(y, boundary: boundary, key: key, val: val, closing: closing) + end + end + else + write_multipart_chunk(y, boundary: boundary, key: nil, val: body, closing: closing) + end + y << "--#{boundary}--\r\n" + end + + fused_io = fused_enum(strio) { closing.each(&:call) } + [boundary, fused_io] + end + + # @api private + # + # @param headers [Hash{String=>String}] + # @param body [Object] + # + # @return [Object] + def encode_content(headers, body) + # rubocop:disable Style/CaseEquality + # rubocop:disable Layout/LineLength + content_type = headers["content-type"] + case [content_type, body] + in [Scrapegraphai::Internal::Util::JSON_CONTENT, Hash | Array | -> { primitive?(_1) }] + [headers, JSON.generate(body)] + in [Scrapegraphai::Internal::Util::JSONL_CONTENT, Enumerable] unless Scrapegraphai::Internal::Type::FileInput === body + [headers, body.lazy.map { JSON.generate(_1) }] + in [%r{^multipart/form-data}, Hash | Scrapegraphai::Internal::Type::FileInput] + boundary, strio = encode_multipart_streaming(body) + headers = {**headers, "content-type" => "#{content_type}; boundary=#{boundary}"} + [headers, strio] + in [_, Symbol | Numeric] + [headers, body.to_s] + in [_, StringIO] + [headers, body.string] + in [_, Scrapegraphai::FilePart] + [headers, body.content] + else + [headers, body] + end + # rubocop:enable Layout/LineLength + # rubocop:enable Style/CaseEquality + end + + # @api private + # + # https://www.iana.org/assignments/character-sets/character-sets.xhtml + # + # @param content_type [String] + # @param text [String] + def force_charset!(content_type, text:) + charset = /charset=([^;\s]+)/.match(content_type)&.captures&.first + + return unless charset + + begin + encoding = Encoding.find(charset) + text.force_encoding(encoding) + rescue ArgumentError + nil + end + end + + # @api private + # + # Assumes each chunk in stream has `Encoding::BINARY`. + # + # @param headers [Hash{String=>String}] + # @param stream [Enumerable] + # @param suppress_error [Boolean] + # + # @raise [JSON::ParserError] + # @return [Object] + def decode_content(headers, stream:, suppress_error: false) + case (content_type = headers["content-type"]) + in Scrapegraphai::Internal::Util::JSON_CONTENT + json = stream.to_a.join + begin + JSON.parse(json, symbolize_names: true) + rescue JSON::ParserError => e + raise e unless suppress_error + json + end + in Scrapegraphai::Internal::Util::JSONL_CONTENT + lines = decode_lines(stream) + chain_fused(lines) do |y| + lines.each { y << JSON.parse(_1, symbolize_names: true) } + end + in %r{^text/event-stream} + lines = decode_lines(stream) + decode_sse(lines) + else + text = stream.to_a.join + force_charset!(content_type, text: text) + StringIO.new(text) + end + end + end + + class << self + # @api private + # + # https://doc.rust-lang.org/std/iter/trait.FusedIterator.html + # + # @param enum [Enumerable] + # @param external [Boolean] + # @param close [Proc] + # + # @return [Enumerable] + def fused_enum(enum, external: false, &close) + fused = false + iter = Enumerator.new do |y| + next if fused + + fused = true + if external + loop { y << enum.next } + else + enum.each(&y) + end + ensure + close&.call + close = nil + end + + iter.define_singleton_method(:rewind) do + fused = true + self + end + iter + end + + # @api private + # + # @param enum [Enumerable, nil] + def close_fused!(enum) + return unless enum.is_a?(Enumerator) + + # rubocop:disable Lint/UnreachableLoop + enum.rewind.each { break } + # rubocop:enable Lint/UnreachableLoop + end + + # @api private + # + # @param enum [Enumerable, nil] + # @param blk [Proc] + # + # @yieldparam [Enumerator::Yielder] + # @return [Enumerable] + def chain_fused(enum, &blk) + iter = Enumerator.new { blk.call(_1) } + fused_enum(iter) { close_fused!(enum) } + end + end + + class << self + # @api private + # + # Assumes Strings have been forced into having `Encoding::BINARY`. + # + # This decoder is responsible for reassembling lines split across multiple + # fragments. + # + # @param enum [Enumerable] + # + # @return [Enumerable] + def decode_lines(enum) + re = /(\r\n|\r|\n)/ + buffer = String.new + cr_seen = nil + + chain_fused(enum) do |y| + enum.each do |row| + offset = buffer.bytesize + buffer << row + while (match = re.match(buffer, cr_seen&.to_i || offset)) + case [match.captures.first, cr_seen] + in ["\r", nil] + cr_seen = match.end(1) + next + in ["\r" | "\r\n", Integer] + y << buffer.slice!(..(cr_seen.pred)) + else + y << buffer.slice!(..(match.end(1).pred)) + end + offset = 0 + cr_seen = nil + end + end + + y << buffer.slice!(..(cr_seen.pred)) unless cr_seen.nil? + y << buffer unless buffer.empty? + end + end + + # @api private + # + # https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream + # + # Assumes that `lines` has been decoded with `#decode_lines`. + # + # @param lines [Enumerable] + # + # @return [EnumerableObject}>] + def decode_sse(lines) + # rubocop:disable Metrics/BlockLength + chain_fused(lines) do |y| + blank = {event: nil, data: nil, id: nil, retry: nil} + current = {} + + lines.each do |line| + case line.sub(/\R$/, "") + in "" + next if current.empty? + y << {**blank, **current} + current = {} + in /^:/ + next + in /^([^:]+):\s?(.*)$/ + field, value = Regexp.last_match.captures + case field + in "event" + current.merge!(event: value) + in "data" + (current[:data] ||= String.new) << (value << "\n") + in "id" unless value.include?("\0") + current.merge!(id: value) + in "retry" if /^\d+$/ =~ value + current.merge!(retry: Integer(value)) + else + end + else + end + end + # rubocop:enable Metrics/BlockLength + + y << {**blank, **current} unless current.empty? + end + end + end + + # @api private + module SorbetRuntimeSupport + class MissingSorbetRuntimeError < ::RuntimeError + end + + # @api private + # + # @return [Hash{Symbol=>Object}] + private def sorbet_runtime_constants = @sorbet_runtime_constants ||= {} + + # @api private + # + # @param name [Symbol] + def const_missing(name) + super unless sorbet_runtime_constants.key?(name) + + unless Object.const_defined?(:T) + message = "Trying to access a Sorbet constant #{name.inspect} without `sorbet-runtime`." + raise MissingSorbetRuntimeError.new(message) + end + + sorbet_runtime_constants.fetch(name).call + end + + # @api private + # + # @param name [Symbol] + # + # @return [Boolean] + def sorbet_constant_defined?(name) = sorbet_runtime_constants.key?(name) + + # @api private + # + # @param name [Symbol] + # @param blk [Proc] + def define_sorbet_constant!(name, &blk) = sorbet_runtime_constants.store(name, blk) + + # @api private + # + # @return [Object] + def to_sorbet_type = raise NotImplementedError + + class << self + # @api private + # + # @param type [Scrapegraphai::Internal::Util::SorbetRuntimeSupport, Object] + # + # @return [Object] + def to_sorbet_type(type) + case type + in Scrapegraphai::Internal::Util::SorbetRuntimeSupport + type.to_sorbet_type + in Class | Module + type + in true | false + T::Boolean + else + type.class + end + end + end + end + + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + define_sorbet_constant!(:ParsedUri) do + T.type_alias do + { + scheme: T.nilable(String), + host: T.nilable(String), + port: T.nilable(Integer), + path: T.nilable(String), + query: T::Hash[String, T::Array[String]] + } + end + end + + define_sorbet_constant!(:ServerSentEvent) do + T.type_alias do + { + event: T.nilable(String), + data: T.nilable(String), + id: T.nilable(String), + retry: T.nilable(Integer) + } + end + end + end + end +end diff --git a/lib/scrapegraphai/models.rb b/lib/scrapegraphai/models.rb new file mode 100644 index 0000000..2400a4b --- /dev/null +++ b/lib/scrapegraphai/models.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module Scrapegraphai + [ + Scrapegraphai::Internal::Type::BaseModel, + *Scrapegraphai::Internal::Type::BaseModel.subclasses + ].each do |cls| + cls.define_sorbet_constant!(:OrHash) { T.type_alias { T.any(cls, Scrapegraphai::Internal::AnyHash) } } + end + + Scrapegraphai::Internal::Util.walk_namespaces(Scrapegraphai::Models).each do |mod| + case mod + in Scrapegraphai::Internal::Type::Enum | Scrapegraphai::Internal::Type::Union + mod.constants.each do |name| + case mod.const_get(name) + in true | false + mod.define_sorbet_constant!(:TaggedBoolean) { T.type_alias { T::Boolean } } + mod.define_sorbet_constant!(:OrBoolean) { T.type_alias { T::Boolean } } + in Integer + mod.define_sorbet_constant!(:TaggedInteger) { T.type_alias { Integer } } + mod.define_sorbet_constant!(:OrInteger) { T.type_alias { Integer } } + in Float + mod.define_sorbet_constant!(:TaggedFloat) { T.type_alias { Float } } + mod.define_sorbet_constant!(:OrFloat) { T.type_alias { Float } } + in Symbol + mod.define_sorbet_constant!(:TaggedSymbol) { T.type_alias { Symbol } } + mod.define_sorbet_constant!(:OrSymbol) { T.type_alias { T.any(Symbol, String) } } + else + end + end + else + end + end + + Scrapegraphai::Internal::Util.walk_namespaces(Scrapegraphai::Models) + .lazy + .grep(Scrapegraphai::Internal::Type::Union) + .each do |mod| + const = :Variants + next if mod.sorbet_constant_defined?(const) + + mod.define_sorbet_constant!(const) { T.type_alias { mod.to_sorbet_type } } + end + + CompletedMarkdownify = Scrapegraphai::Models::CompletedMarkdownify + + CompletedSearchScraper = Scrapegraphai::Models::CompletedSearchScraper + + CompletedSmartscraper = Scrapegraphai::Models::CompletedSmartscraper + + CrawlRetrieveResultsParams = Scrapegraphai::Models::CrawlRetrieveResultsParams + + CrawlStartParams = Scrapegraphai::Models::CrawlStartParams + + CreditRetrieveParams = Scrapegraphai::Models::CreditRetrieveParams + + FailedSmartscraper = Scrapegraphai::Models::FailedSmartscraper + + FeedbackSubmitParams = Scrapegraphai::Models::FeedbackSubmitParams + + GenerateSchemaCreateParams = Scrapegraphai::Models::GenerateSchemaCreateParams + + GenerateSchemaRetrieveParams = Scrapegraphai::Models::GenerateSchemaRetrieveParams + + HealthzCheckParams = Scrapegraphai::Models::HealthzCheckParams + + MarkdownifyConvertParams = Scrapegraphai::Models::MarkdownifyConvertParams + + MarkdownifyRetrieveStatusParams = Scrapegraphai::Models::MarkdownifyRetrieveStatusParams + + SearchscraperCreateParams = Scrapegraphai::Models::SearchscraperCreateParams + + SearchscraperRetrieveStatusParams = Scrapegraphai::Models::SearchscraperRetrieveStatusParams + + SmartscraperCreateParams = Scrapegraphai::Models::SmartscraperCreateParams + + SmartscraperListParams = Scrapegraphai::Models::SmartscraperListParams + + SmartscraperRetrieveParams = Scrapegraphai::Models::SmartscraperRetrieveParams + + ValidateAPIKeyParams = Scrapegraphai::Models::ValidateAPIKeyParams +end diff --git a/lib/scrapegraphai/models/completed_markdownify.rb b/lib/scrapegraphai/models/completed_markdownify.rb new file mode 100644 index 0000000..783205b --- /dev/null +++ b/lib/scrapegraphai/models/completed_markdownify.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Markdownify#convert + class CompletedMarkdownify < Scrapegraphai::Internal::Type::BaseModel + # @!attribute error + # + # @return [String, nil] + optional :error, String + + # @!attribute request_id + # + # @return [String, nil] + optional :request_id, String + + # @!attribute result + # Markdown content + # + # @return [String, nil] + optional :result, String, nil?: true + + # @!attribute status + # + # @return [Symbol, Scrapegraphai::Models::CompletedMarkdownify::Status, nil] + optional :status, enum: -> { Scrapegraphai::CompletedMarkdownify::Status } + + # @!attribute website_url + # + # @return [String, nil] + optional :website_url, String + + # @!method initialize(error: nil, request_id: nil, result: nil, status: nil, website_url: nil) + # @param error [String] + # + # @param request_id [String] + # + # @param result [String, nil] Markdown content + # + # @param status [Symbol, Scrapegraphai::Models::CompletedMarkdownify::Status] + # + # @param website_url [String] + + # @see Scrapegraphai::Models::CompletedMarkdownify#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + QUEUED = :queued + PROCESSING = :processing + COMPLETED = :completed + + # @!method self.values + # @return [Array] + end + end + end +end diff --git a/lib/scrapegraphai/models/completed_search_scraper.rb b/lib/scrapegraphai/models/completed_search_scraper.rb new file mode 100644 index 0000000..16f7a8c --- /dev/null +++ b/lib/scrapegraphai/models/completed_search_scraper.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Searchscraper#create + class CompletedSearchScraper < Scrapegraphai::Internal::Type::BaseModel + # @!attribute error + # + # @return [String, nil] + optional :error, String, nil?: true + + # @!attribute num_results + # + # @return [Integer, nil] + optional :num_results, Integer + + # @!attribute reference_urls + # URLs of sources used + # + # @return [Array, nil] + optional :reference_urls, Scrapegraphai::Internal::Type::ArrayOf[String] + + # @!attribute request_id + # + # @return [String, nil] + optional :request_id, String + + # @!attribute result + # Merged results from all scraped websites + # + # @return [Object, nil] + optional :result, Scrapegraphai::Internal::Type::Unknown + + # @!attribute status + # + # @return [Symbol, Scrapegraphai::Models::CompletedSearchScraper::Status, nil] + optional :status, enum: -> { Scrapegraphai::CompletedSearchScraper::Status } + + # @!attribute user_prompt + # + # @return [String, nil] + optional :user_prompt, String + + # @!method initialize(error: nil, num_results: nil, reference_urls: nil, request_id: nil, result: nil, status: nil, user_prompt: nil) + # @param error [String, nil] + # + # @param num_results [Integer] + # + # @param reference_urls [Array] URLs of sources used + # + # @param request_id [String] + # + # @param result [Object] Merged results from all scraped websites + # + # @param status [Symbol, Scrapegraphai::Models::CompletedSearchScraper::Status] + # + # @param user_prompt [String] + + # @see Scrapegraphai::Models::CompletedSearchScraper#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + QUEUED = :queued + PROCESSING = :processing + COMPLETED = :completed + + # @!method self.values + # @return [Array] + end + end + end +end diff --git a/lib/scrapegraphai/models/completed_smartscraper.rb b/lib/scrapegraphai/models/completed_smartscraper.rb new file mode 100644 index 0000000..9e93296 --- /dev/null +++ b/lib/scrapegraphai/models/completed_smartscraper.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Smartscraper#create + class CompletedSmartscraper < Scrapegraphai::Internal::Type::BaseModel + # @!attribute error + # Error message (empty on success) + # + # @return [String, nil] + optional :error, String + + # @!attribute request_id + # Unique request identifier + # + # @return [String, nil] + optional :request_id, String + + # @!attribute result + # Extracted data based on prompt/schema + # + # @return [Object, nil] + optional :result, Scrapegraphai::Internal::Type::Unknown, nil?: true + + # @!attribute status + # Processing status + # + # @return [Symbol, Scrapegraphai::Models::CompletedSmartscraper::Status, nil] + optional :status, enum: -> { Scrapegraphai::CompletedSmartscraper::Status } + + # @!attribute user_prompt + # + # @return [String, nil] + optional :user_prompt, String + + # @!attribute website_url + # + # @return [String, nil] + optional :website_url, String, nil?: true + + # @!method initialize(error: nil, request_id: nil, result: nil, status: nil, user_prompt: nil, website_url: nil) + # @param error [String] Error message (empty on success) + # + # @param request_id [String] Unique request identifier + # + # @param result [Object, nil] Extracted data based on prompt/schema + # + # @param status [Symbol, Scrapegraphai::Models::CompletedSmartscraper::Status] Processing status + # + # @param user_prompt [String] + # + # @param website_url [String, nil] + + # Processing status + # + # @see Scrapegraphai::Models::CompletedSmartscraper#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + QUEUED = :queued + PROCESSING = :processing + COMPLETED = :completed + + # @!method self.values + # @return [Array] + end + end + end +end diff --git a/lib/scrapegraphai/models/crawl_retrieve_results_params.rb b/lib/scrapegraphai/models/crawl_retrieve_results_params.rb new file mode 100644 index 0000000..23ade4f --- /dev/null +++ b/lib/scrapegraphai/models/crawl_retrieve_results_params.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Crawl#retrieve_results + class CrawlRetrieveResultsParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!method initialize(request_options: {}) + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/crawl_retrieve_results_response.rb b/lib/scrapegraphai/models/crawl_retrieve_results_response.rb new file mode 100644 index 0000000..51994d0 --- /dev/null +++ b/lib/scrapegraphai/models/crawl_retrieve_results_response.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Crawl#retrieve_results + class CrawlRetrieveResultsResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute result + # Successful crawl results + # + # @return [Object, String, nil] + optional :result, union: -> { Scrapegraphai::Models::CrawlRetrieveResultsResponse::Result } + + # @!attribute status + # + # @return [Symbol, Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status, nil] + optional :status, enum: -> { Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status } + + # @!attribute task_id + # + # @return [String, nil] + optional :task_id, String + + # @!attribute traceback + # Error traceback for failed tasks + # + # @return [String, nil] + optional :traceback, String, nil?: true + + # @!method initialize(result: nil, status: nil, task_id: nil, traceback: nil) + # @param result [Object, String] Successful crawl results + # + # @param status [Symbol, Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status] + # + # @param task_id [String] + # + # @param traceback [String, nil] Error traceback for failed tasks + + # Successful crawl results + # + # @see Scrapegraphai::Models::CrawlRetrieveResultsResponse#result + module Result + extend Scrapegraphai::Internal::Type::Union + + # Successful crawl results + variant Scrapegraphai::Internal::Type::Unknown + + # Error message + variant String + + # @!method self.variants + # @return [Array(Object, String)] + end + + # @see Scrapegraphai::Models::CrawlRetrieveResultsResponse#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + PENDING = :PENDING + STARTED = :STARTED + SUCCESS = :SUCCESS + FAILURE = :FAILURE + RETRY = :RETRY + REVOKED = :REVOKED + + # @!method self.values + # @return [Array] + end + end + end +end diff --git a/lib/scrapegraphai/models/crawl_start_params.rb b/lib/scrapegraphai/models/crawl_start_params.rb new file mode 100644 index 0000000..788bbbd --- /dev/null +++ b/lib/scrapegraphai/models/crawl_start_params.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Crawl#start + class CrawlStartParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!attribute url + # Starting URL for crawling + # + # @return [String] + required :url, String + + # @!attribute depth + # Maximum crawl depth from starting URL + # + # @return [Integer, nil] + optional :depth, Integer + + # @!attribute extraction_mode + # Use AI extraction (true) or markdown conversion (false) + # + # @return [Boolean, nil] + optional :extraction_mode, Scrapegraphai::Internal::Type::Boolean + + # @!attribute max_pages + # Maximum number of pages to crawl + # + # @return [Integer, nil] + optional :max_pages, Integer + + # @!attribute prompt + # Extraction prompt (required if extraction_mode is true) + # + # @return [String, nil] + optional :prompt, String, nil?: true + + # @!attribute render_heavy_js + # Enable heavy JavaScript rendering + # + # @return [Boolean, nil] + optional :render_heavy_js, Scrapegraphai::Internal::Type::Boolean + + # @!attribute rules + # + # @return [Scrapegraphai::Models::CrawlStartParams::Rules, nil] + optional :rules, -> { Scrapegraphai::CrawlStartParams::Rules } + + # @!attribute schema + # Output schema for extraction + # + # @return [Object, nil] + optional :schema, Scrapegraphai::Internal::Type::Unknown, nil?: true + + # @!attribute sitemap + # Use sitemap for crawling + # + # @return [Boolean, nil] + optional :sitemap, Scrapegraphai::Internal::Type::Boolean + + # @!method initialize(url:, depth: nil, extraction_mode: nil, max_pages: nil, prompt: nil, render_heavy_js: nil, rules: nil, schema: nil, sitemap: nil, request_options: {}) + # @param url [String] Starting URL for crawling + # + # @param depth [Integer] Maximum crawl depth from starting URL + # + # @param extraction_mode [Boolean] Use AI extraction (true) or markdown conversion (false) + # + # @param max_pages [Integer] Maximum number of pages to crawl + # + # @param prompt [String, nil] Extraction prompt (required if extraction_mode is true) + # + # @param render_heavy_js [Boolean] Enable heavy JavaScript rendering + # + # @param rules [Scrapegraphai::Models::CrawlStartParams::Rules] + # + # @param schema [Object, nil] Output schema for extraction + # + # @param sitemap [Boolean] Use sitemap for crawling + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + + class Rules < Scrapegraphai::Internal::Type::BaseModel + # @!attribute exclude + # URL patterns to exclude from crawling + # + # @return [Array, nil] + optional :exclude, Scrapegraphai::Internal::Type::ArrayOf[String] + + # @!attribute same_domain + # Restrict crawling to same domain + # + # @return [Boolean, nil] + optional :same_domain, Scrapegraphai::Internal::Type::Boolean + + # @!method initialize(exclude: nil, same_domain: nil) + # @param exclude [Array] URL patterns to exclude from crawling + # + # @param same_domain [Boolean] Restrict crawling to same domain + end + end + end +end diff --git a/lib/scrapegraphai/models/crawl_start_response.rb b/lib/scrapegraphai/models/crawl_start_response.rb new file mode 100644 index 0000000..ce614dc --- /dev/null +++ b/lib/scrapegraphai/models/crawl_start_response.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Crawl#start + class CrawlStartResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute task_id + # Celery task identifier + # + # @return [String, nil] + optional :task_id, String + + # @!method initialize(task_id: nil) + # @param task_id [String] Celery task identifier + end + end +end diff --git a/lib/scrapegraphai/models/credit_retrieve_params.rb b/lib/scrapegraphai/models/credit_retrieve_params.rb new file mode 100644 index 0000000..7974eb0 --- /dev/null +++ b/lib/scrapegraphai/models/credit_retrieve_params.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Credits#retrieve + class CreditRetrieveParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!method initialize(request_options: {}) + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/credit_retrieve_response.rb b/lib/scrapegraphai/models/credit_retrieve_response.rb new file mode 100644 index 0000000..7f3467d --- /dev/null +++ b/lib/scrapegraphai/models/credit_retrieve_response.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Credits#retrieve + class CreditRetrieveResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute remaining_credits + # Number of credits remaining + # + # @return [Integer, nil] + optional :remaining_credits, Integer + + # @!attribute total_credits_used + # Total credits consumed + # + # @return [Integer, nil] + optional :total_credits_used, Integer + + # @!method initialize(remaining_credits: nil, total_credits_used: nil) + # @param remaining_credits [Integer] Number of credits remaining + # + # @param total_credits_used [Integer] Total credits consumed + end + end +end diff --git a/lib/scrapegraphai/models/failed_smartscraper.rb b/lib/scrapegraphai/models/failed_smartscraper.rb new file mode 100644 index 0000000..8c26604 --- /dev/null +++ b/lib/scrapegraphai/models/failed_smartscraper.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + class FailedSmartscraper < Scrapegraphai::Internal::Type::BaseModel + # @!attribute error + # Error description + # + # @return [String, nil] + optional :error, String + + # @!attribute request_id + # + # @return [String, nil] + optional :request_id, String + + # @!attribute result + # + # @return [Object, nil] + optional :result, Scrapegraphai::Internal::Type::Unknown, nil?: true + + # @!attribute status + # + # @return [Symbol, Scrapegraphai::Models::FailedSmartscraper::Status, nil] + optional :status, enum: -> { Scrapegraphai::FailedSmartscraper::Status } + + # @!attribute user_prompt + # + # @return [String, nil] + optional :user_prompt, String + + # @!attribute website_url + # + # @return [String, nil] + optional :website_url, String, nil?: true + + # @!method initialize(error: nil, request_id: nil, result: nil, status: nil, user_prompt: nil, website_url: nil) + # @param error [String] Error description + # + # @param request_id [String] + # + # @param result [Object, nil] + # + # @param status [Symbol, Scrapegraphai::Models::FailedSmartscraper::Status] + # + # @param user_prompt [String] + # + # @param website_url [String, nil] + + # @see Scrapegraphai::Models::FailedSmartscraper#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + FAILED = :failed + + # @!method self.values + # @return [Array] + end + end + end +end diff --git a/lib/scrapegraphai/models/feedback_submit_params.rb b/lib/scrapegraphai/models/feedback_submit_params.rb new file mode 100644 index 0000000..7bfc274 --- /dev/null +++ b/lib/scrapegraphai/models/feedback_submit_params.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Feedback#submit + class FeedbackSubmitParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!attribute rating + # Rating score + # + # @return [Integer] + required :rating, Integer + + # @!attribute request_id + # Request to provide feedback for + # + # @return [String] + required :request_id, String + + # @!attribute feedback_text + # Optional feedback comments + # + # @return [String, nil] + optional :feedback_text, String, nil?: true + + # @!method initialize(rating:, request_id:, feedback_text: nil, request_options: {}) + # @param rating [Integer] Rating score + # + # @param request_id [String] Request to provide feedback for + # + # @param feedback_text [String, nil] Optional feedback comments + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/feedback_submit_response.rb b/lib/scrapegraphai/models/feedback_submit_response.rb new file mode 100644 index 0000000..3b854a0 --- /dev/null +++ b/lib/scrapegraphai/models/feedback_submit_response.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Feedback#submit + class FeedbackSubmitResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute feedback_id + # + # @return [String, nil] + optional :feedback_id, String + + # @!attribute feedback_timestamp + # + # @return [Time, nil] + optional :feedback_timestamp, Time + + # @!attribute message + # + # @return [String, nil] + optional :message, String + + # @!attribute request_id + # + # @return [String, nil] + optional :request_id, String + + # @!method initialize(feedback_id: nil, feedback_timestamp: nil, message: nil, request_id: nil) + # @param feedback_id [String] + # @param feedback_timestamp [Time] + # @param message [String] + # @param request_id [String] + end + end +end diff --git a/lib/scrapegraphai/models/generate_schema_create_params.rb b/lib/scrapegraphai/models/generate_schema_create_params.rb new file mode 100644 index 0000000..2165940 --- /dev/null +++ b/lib/scrapegraphai/models/generate_schema_create_params.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::GenerateSchema#create + class GenerateSchemaCreateParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!attribute user_prompt + # Natural language description of desired schema + # + # @return [String] + required :user_prompt, String + + # @!attribute existing_schema + # Existing schema to modify or extend + # + # @return [Object, nil] + optional :existing_schema, Scrapegraphai::Internal::Type::Unknown, nil?: true + + # @!method initialize(user_prompt:, existing_schema: nil, request_options: {}) + # @param user_prompt [String] Natural language description of desired schema + # + # @param existing_schema [Object, nil] Existing schema to modify or extend + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/generate_schema_create_response.rb b/lib/scrapegraphai/models/generate_schema_create_response.rb new file mode 100644 index 0000000..cf2ecd6 --- /dev/null +++ b/lib/scrapegraphai/models/generate_schema_create_response.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::GenerateSchema#create + class GenerateSchemaCreateResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute error + # + # @return [String, nil] + optional :error, String, nil?: true + + # @!attribute generated_schema + # Generated JSON schema + # + # @return [Object, nil] + optional :generated_schema, Scrapegraphai::Internal::Type::Unknown + + # @!attribute refined_prompt + # Enhanced search prompt generated from user input + # + # @return [String, nil] + optional :refined_prompt, String + + # @!attribute request_id + # + # @return [String, nil] + optional :request_id, String + + # @!attribute status + # + # @return [Symbol, Scrapegraphai::Models::GenerateSchemaCreateResponse::Status, nil] + optional :status, enum: -> { Scrapegraphai::Models::GenerateSchemaCreateResponse::Status } + + # @!attribute user_prompt + # + # @return [String, nil] + optional :user_prompt, String + + # @!method initialize(error: nil, generated_schema: nil, refined_prompt: nil, request_id: nil, status: nil, user_prompt: nil) + # @param error [String, nil] + # + # @param generated_schema [Object] Generated JSON schema + # + # @param refined_prompt [String] Enhanced search prompt generated from user input + # + # @param request_id [String] + # + # @param status [Symbol, Scrapegraphai::Models::GenerateSchemaCreateResponse::Status] + # + # @param user_prompt [String] + + # @see Scrapegraphai::Models::GenerateSchemaCreateResponse#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + COMPLETED = :completed + + # @!method self.values + # @return [Array] + end + end + end +end diff --git a/lib/scrapegraphai/models/generate_schema_retrieve_params.rb b/lib/scrapegraphai/models/generate_schema_retrieve_params.rb new file mode 100644 index 0000000..50d105b --- /dev/null +++ b/lib/scrapegraphai/models/generate_schema_retrieve_params.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::GenerateSchema#retrieve + class GenerateSchemaRetrieveParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!method initialize(request_options: {}) + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/generate_schema_retrieve_response.rb b/lib/scrapegraphai/models/generate_schema_retrieve_response.rb new file mode 100644 index 0000000..d37fda5 --- /dev/null +++ b/lib/scrapegraphai/models/generate_schema_retrieve_response.rb @@ -0,0 +1,119 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::GenerateSchema#retrieve + module GenerateSchemaRetrieveResponse + extend Scrapegraphai::Internal::Type::Union + + variant -> { Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse } + + variant -> { Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse } + + class CompletedSchemaGenerationResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute error + # + # @return [String, nil] + optional :error, String, nil?: true + + # @!attribute generated_schema + # + # @return [Object, nil] + optional :generated_schema, Scrapegraphai::Internal::Type::Unknown + + # @!attribute refined_prompt + # + # @return [String, nil] + optional :refined_prompt, String + + # @!attribute request_id + # + # @return [String, nil] + optional :request_id, String + + # @!attribute status + # + # @return [Symbol, Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status, nil] + optional :status, + enum: -> { Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status } + + # @!attribute user_prompt + # + # @return [String, nil] + optional :user_prompt, String + + # @!method initialize(error: nil, generated_schema: nil, refined_prompt: nil, request_id: nil, status: nil, user_prompt: nil) + # @param error [String, nil] + # @param generated_schema [Object] + # @param refined_prompt [String] + # @param request_id [String] + # @param status [Symbol, Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status] + # @param user_prompt [String] + + # @see Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + COMPLETED = :completed + + # @!method self.values + # @return [Array] + end + end + + class FailedSchemaGenerationResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute error + # + # @return [String, nil] + optional :error, String + + # @!attribute generated_schema + # + # @return [Object, nil] + optional :generated_schema, Scrapegraphai::Internal::Type::Unknown, nil?: true + + # @!attribute refined_prompt + # + # @return [String, nil] + optional :refined_prompt, String, nil?: true + + # @!attribute request_id + # + # @return [String, nil] + optional :request_id, String + + # @!attribute status + # + # @return [Symbol, Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status, nil] + optional :status, + enum: -> { Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status } + + # @!attribute user_prompt + # + # @return [String, nil] + optional :user_prompt, String + + # @!method initialize(error: nil, generated_schema: nil, refined_prompt: nil, request_id: nil, status: nil, user_prompt: nil) + # @param error [String] + # @param generated_schema [Object, nil] + # @param refined_prompt [String, nil] + # @param request_id [String] + # @param status [Symbol, Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status] + # @param user_prompt [String] + + # @see Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + FAILED = :failed + + # @!method self.values + # @return [Array] + end + end + + # @!method self.variants + # @return [Array(Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse, Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse)] + end + end +end diff --git a/lib/scrapegraphai/models/healthz_check_params.rb b/lib/scrapegraphai/models/healthz_check_params.rb new file mode 100644 index 0000000..ae11f4c --- /dev/null +++ b/lib/scrapegraphai/models/healthz_check_params.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Healthz#check + class HealthzCheckParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!method initialize(request_options: {}) + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/healthz_check_response.rb b/lib/scrapegraphai/models/healthz_check_response.rb new file mode 100644 index 0000000..22c6ad0 --- /dev/null +++ b/lib/scrapegraphai/models/healthz_check_response.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Healthz#check + class HealthzCheckResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute services + # + # @return [Hash{Symbol=>String}, nil] + optional :services, Scrapegraphai::Internal::Type::HashOf[String] + + # @!attribute status + # + # @return [String, nil] + optional :status, String + + # @!method initialize(services: nil, status: nil) + # @param services [Hash{Symbol=>String}] + # @param status [String] + end + end +end diff --git a/lib/scrapegraphai/models/markdownify_convert_params.rb b/lib/scrapegraphai/models/markdownify_convert_params.rb new file mode 100644 index 0000000..7da7e9d --- /dev/null +++ b/lib/scrapegraphai/models/markdownify_convert_params.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Markdownify#convert + class MarkdownifyConvertParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!attribute website_url + # URL to convert to markdown + # + # @return [String] + required :website_url, String + + # @!attribute headers + # + # @return [Hash{Symbol=>String}, nil] + optional :headers, Scrapegraphai::Internal::Type::HashOf[String] + + # @!attribute steps + # Interaction steps before conversion + # + # @return [Array, nil] + optional :steps, Scrapegraphai::Internal::Type::ArrayOf[String] + + # @!method initialize(website_url:, headers: nil, steps: nil, request_options: {}) + # @param website_url [String] URL to convert to markdown + # + # @param headers [Hash{Symbol=>String}] + # + # @param steps [Array] Interaction steps before conversion + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/markdownify_retrieve_status_params.rb b/lib/scrapegraphai/models/markdownify_retrieve_status_params.rb new file mode 100644 index 0000000..051bc55 --- /dev/null +++ b/lib/scrapegraphai/models/markdownify_retrieve_status_params.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Markdownify#retrieve_status + class MarkdownifyRetrieveStatusParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!method initialize(request_options: {}) + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/markdownify_retrieve_status_response.rb b/lib/scrapegraphai/models/markdownify_retrieve_status_response.rb new file mode 100644 index 0000000..d0a5586 --- /dev/null +++ b/lib/scrapegraphai/models/markdownify_retrieve_status_response.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Markdownify#retrieve_status + module MarkdownifyRetrieveStatusResponse + extend Scrapegraphai::Internal::Type::Union + + variant -> { Scrapegraphai::CompletedMarkdownify } + + variant -> { Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse } + + class FailedMarkdownifyResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute error + # + # @return [String, nil] + optional :error, String + + # @!attribute request_id + # + # @return [String, nil] + optional :request_id, String + + # @!attribute result + # + # @return [String, nil] + optional :result, String, nil?: true + + # @!attribute status + # + # @return [Symbol, Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status, nil] + optional :status, + enum: -> { Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status } + + # @!attribute website_url + # + # @return [String, nil] + optional :website_url, String + + # @!method initialize(error: nil, request_id: nil, result: nil, status: nil, website_url: nil) + # @param error [String] + # @param request_id [String] + # @param result [String, nil] + # @param status [Symbol, Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status] + # @param website_url [String] + + # @see Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + FAILED = :failed + + # @!method self.values + # @return [Array] + end + end + + # @!method self.variants + # @return [Array(Scrapegraphai::Models::CompletedMarkdownify, Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse)] + end + end +end diff --git a/lib/scrapegraphai/models/searchscraper_create_params.rb b/lib/scrapegraphai/models/searchscraper_create_params.rb new file mode 100644 index 0000000..c0513ae --- /dev/null +++ b/lib/scrapegraphai/models/searchscraper_create_params.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Searchscraper#create + class SearchscraperCreateParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!attribute user_prompt + # Search query and extraction instruction + # + # @return [String] + required :user_prompt, String + + # @!attribute headers + # + # @return [Hash{Symbol=>String}, nil] + optional :headers, Scrapegraphai::Internal::Type::HashOf[String] + + # @!attribute num_results + # Number of websites to scrape from search results + # + # @return [Integer, nil] + optional :num_results, Integer + + # @!attribute output_schema + # JSON schema for structured output + # + # @return [Object, nil] + optional :output_schema, Scrapegraphai::Internal::Type::Unknown + + # @!method initialize(user_prompt:, headers: nil, num_results: nil, output_schema: nil, request_options: {}) + # @param user_prompt [String] Search query and extraction instruction + # + # @param headers [Hash{Symbol=>String}] + # + # @param num_results [Integer] Number of websites to scrape from search results + # + # @param output_schema [Object] JSON schema for structured output + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/searchscraper_retrieve_status_params.rb b/lib/scrapegraphai/models/searchscraper_retrieve_status_params.rb new file mode 100644 index 0000000..c8f5e3a --- /dev/null +++ b/lib/scrapegraphai/models/searchscraper_retrieve_status_params.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Searchscraper#retrieve_status + class SearchscraperRetrieveStatusParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!method initialize(request_options: {}) + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/searchscraper_retrieve_status_response.rb b/lib/scrapegraphai/models/searchscraper_retrieve_status_response.rb new file mode 100644 index 0000000..c4a32cc --- /dev/null +++ b/lib/scrapegraphai/models/searchscraper_retrieve_status_response.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Searchscraper#retrieve_status + module SearchscraperRetrieveStatusResponse + extend Scrapegraphai::Internal::Type::Union + + variant -> { Scrapegraphai::CompletedSearchScraper } + + variant -> { Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse } + + class FailedSearchScraperResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute error + # + # @return [String, nil] + optional :error, String + + # @!attribute num_results + # + # @return [Integer, nil] + optional :num_results, Integer + + # @!attribute reference_urls + # + # @return [Array, nil] + optional :reference_urls, Scrapegraphai::Internal::Type::ArrayOf[String] + + # @!attribute request_id + # + # @return [String, nil] + optional :request_id, String + + # @!attribute result + # + # @return [Object, nil] + optional :result, Scrapegraphai::Internal::Type::Unknown, nil?: true + + # @!attribute status + # + # @return [Symbol, Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status, nil] + optional :status, + enum: -> { Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status } + + # @!attribute user_prompt + # + # @return [String, nil] + optional :user_prompt, String + + # @!method initialize(error: nil, num_results: nil, reference_urls: nil, request_id: nil, result: nil, status: nil, user_prompt: nil) + # @param error [String] + # @param num_results [Integer] + # @param reference_urls [Array] + # @param request_id [String] + # @param result [Object, nil] + # @param status [Symbol, Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status] + # @param user_prompt [String] + + # @see Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse#status + module Status + extend Scrapegraphai::Internal::Type::Enum + + FAILED = :failed + + # @!method self.values + # @return [Array] + end + end + + # @!method self.variants + # @return [Array(Scrapegraphai::Models::CompletedSearchScraper, Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse)] + end + end +end diff --git a/lib/scrapegraphai/models/smartscraper_create_params.rb b/lib/scrapegraphai/models/smartscraper_create_params.rb new file mode 100644 index 0000000..0f77340 --- /dev/null +++ b/lib/scrapegraphai/models/smartscraper_create_params.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Smartscraper#create + class SmartscraperCreateParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!attribute user_prompt + # Extraction instruction for the LLM + # + # @return [String] + required :user_prompt, String + + # @!attribute cookies + # Cookies to include in the request + # + # @return [Hash{Symbol=>String}, nil] + optional :cookies, Scrapegraphai::Internal::Type::HashOf[String] + + # @!attribute headers + # HTTP headers to include in the request + # + # @return [Hash{Symbol=>String}, nil] + optional :headers, Scrapegraphai::Internal::Type::HashOf[String] + + # @!attribute number_of_scrolls + # Number of infinite scroll operations to perform + # + # @return [Integer, nil] + optional :number_of_scrolls, Integer + + # @!attribute output_schema + # JSON schema defining the expected output structure + # + # @return [Object, nil] + optional :output_schema, Scrapegraphai::Internal::Type::Unknown + + # @!attribute render_heavy_js + # Enable heavy JavaScript rendering + # + # @return [Boolean, nil] + optional :render_heavy_js, Scrapegraphai::Internal::Type::Boolean + + # @!attribute steps + # Website interaction steps (e.g., clicking buttons) + # + # @return [Array, nil] + optional :steps, Scrapegraphai::Internal::Type::ArrayOf[String] + + # @!attribute total_pages + # Number of pages to process for pagination + # + # @return [Integer, nil] + optional :total_pages, Integer + + # @!attribute website_html + # HTML content to process (max 2MB, mutually exclusive with website_url) + # + # @return [String, nil] + optional :website_html, String + + # @!attribute website_url + # URL to scrape (mutually exclusive with website_html) + # + # @return [String, nil] + optional :website_url, String + + # @!method initialize(user_prompt:, cookies: nil, headers: nil, number_of_scrolls: nil, output_schema: nil, render_heavy_js: nil, steps: nil, total_pages: nil, website_html: nil, website_url: nil, request_options: {}) + # @param user_prompt [String] Extraction instruction for the LLM + # + # @param cookies [Hash{Symbol=>String}] Cookies to include in the request + # + # @param headers [Hash{Symbol=>String}] HTTP headers to include in the request + # + # @param number_of_scrolls [Integer] Number of infinite scroll operations to perform + # + # @param output_schema [Object] JSON schema defining the expected output structure + # + # @param render_heavy_js [Boolean] Enable heavy JavaScript rendering + # + # @param steps [Array] Website interaction steps (e.g., clicking buttons) + # + # @param total_pages [Integer] Number of pages to process for pagination + # + # @param website_html [String] HTML content to process (max 2MB, mutually exclusive with website_url) + # + # @param website_url [String] URL to scrape (mutually exclusive with website_html) + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/smartscraper_list_params.rb b/lib/scrapegraphai/models/smartscraper_list_params.rb new file mode 100644 index 0000000..7507d5a --- /dev/null +++ b/lib/scrapegraphai/models/smartscraper_list_params.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Smartscraper#list + class SmartscraperListParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!method initialize(request_options: {}) + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/smartscraper_list_response.rb b/lib/scrapegraphai/models/smartscraper_list_response.rb new file mode 100644 index 0000000..63743b4 --- /dev/null +++ b/lib/scrapegraphai/models/smartscraper_list_response.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Smartscraper#list + module SmartscraperListResponse + extend Scrapegraphai::Internal::Type::Union + + variant -> { Scrapegraphai::CompletedSmartscraper } + + variant -> { Scrapegraphai::FailedSmartscraper } + + # @!method self.variants + # @return [Array(Scrapegraphai::Models::CompletedSmartscraper, Scrapegraphai::Models::FailedSmartscraper)] + end + end +end diff --git a/lib/scrapegraphai/models/smartscraper_retrieve_params.rb b/lib/scrapegraphai/models/smartscraper_retrieve_params.rb new file mode 100644 index 0000000..4aabe7e --- /dev/null +++ b/lib/scrapegraphai/models/smartscraper_retrieve_params.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Smartscraper#retrieve + class SmartscraperRetrieveParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!method initialize(request_options: {}) + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/smartscraper_retrieve_response.rb b/lib/scrapegraphai/models/smartscraper_retrieve_response.rb new file mode 100644 index 0000000..698cf7f --- /dev/null +++ b/lib/scrapegraphai/models/smartscraper_retrieve_response.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Smartscraper#retrieve + module SmartscraperRetrieveResponse + extend Scrapegraphai::Internal::Type::Union + + variant -> { Scrapegraphai::CompletedSmartscraper } + + variant -> { Scrapegraphai::FailedSmartscraper } + + # @!method self.variants + # @return [Array(Scrapegraphai::Models::CompletedSmartscraper, Scrapegraphai::Models::FailedSmartscraper)] + end + end +end diff --git a/lib/scrapegraphai/models/validate_api_key_params.rb b/lib/scrapegraphai/models/validate_api_key_params.rb new file mode 100644 index 0000000..7f270be --- /dev/null +++ b/lib/scrapegraphai/models/validate_api_key_params.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Validate#api_key + class ValidateAPIKeyParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + # @!method initialize(request_options: {}) + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + end + end +end diff --git a/lib/scrapegraphai/models/validate_api_key_response.rb b/lib/scrapegraphai/models/validate_api_key_response.rb new file mode 100644 index 0000000..cb3e033 --- /dev/null +++ b/lib/scrapegraphai/models/validate_api_key_response.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Models + # @see Scrapegraphai::Resources::Validate#api_key + class ValidateAPIKeyResponse < Scrapegraphai::Internal::Type::BaseModel + # @!attribute email + # + # @return [String, nil] + optional :email, String + + # @!method initialize(email: nil) + # @param email [String] + end + end +end diff --git a/lib/scrapegraphai/request_options.rb b/lib/scrapegraphai/request_options.rb new file mode 100644 index 0000000..50e8cb7 --- /dev/null +++ b/lib/scrapegraphai/request_options.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +module Scrapegraphai + # Specify HTTP behaviour to use for a specific request. These options supplement + # or override those provided at the client level. + # + # When making a request, you can pass an actual {RequestOptions} instance, or + # simply pass a Hash with symbol keys matching the attributes on this class. + class RequestOptions < Scrapegraphai::Internal::Type::BaseModel + # @api private + # + # @param opts [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}] + # + # @raise [ArgumentError] + def self.validate!(opts) + case opts + in Scrapegraphai::RequestOptions | Hash + opts.to_h.each_key do |k| + unless fields.include?(k) + raise ArgumentError.new("Request `opts` keys must be one of #{fields.keys}, got #{k.inspect}") + end + end + else + raise ArgumentError.new("Request `opts` must be a Hash or RequestOptions, got #{opts.inspect}") + end + end + + # @!attribute idempotency_key + # Idempotency key to send with request and all associated retries. Will only be + # sent for write requests. + # + # @return [String, nil] + optional :idempotency_key, String + + # @!attribute extra_query + # Extra query params to send with the request. These are `.merge`’d into any + # `query` given at the client level. + # + # @return [Hash{String=>Array, String, nil}, nil] + optional :extra_query, + Scrapegraphai::Internal::Type::HashOf[Scrapegraphai::Internal::Type::ArrayOf[String]] + + # @!attribute extra_headers + # Extra headers to send with the request. These are `.merged`’d into any + # `extra_headers` given at the client level. + # + # @return [Hash{String=>String, nil}, nil] + optional :extra_headers, Scrapegraphai::Internal::Type::HashOf[String, nil?: true] + + # @!attribute extra_body + # Extra data to send with the request. These are deep merged into any data + # generated as part of the normal request. + # + # @return [Object, nil] + optional :extra_body, Scrapegraphai::Internal::Type::HashOf[Scrapegraphai::Internal::Type::Unknown] + + # @!attribute max_retries + # Maximum number of retries to attempt after a failed initial request. + # + # @return [Integer, nil] + optional :max_retries, Integer + + # @!attribute timeout + # Request timeout in seconds. + # + # @return [Float, nil] + optional :timeout, Float + + # @!method initialize(values = {}) + # Returns a new instance of RequestOptions. + # + # @param values [Hash{Symbol=>Object}] + + define_sorbet_constant!(:OrHash) do + T.type_alias { T.any(Scrapegraphai::RequestOptions, Scrapegraphai::Internal::AnyHash) } + end + end +end diff --git a/lib/scrapegraphai/resources/crawl.rb b/lib/scrapegraphai/resources/crawl.rb new file mode 100644 index 0000000..fa78c6d --- /dev/null +++ b/lib/scrapegraphai/resources/crawl.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Resources + class Crawl + # Retrieve the status and results of a crawling job + # + # @overload retrieve_results(task_id, request_options: {}) + # + # @param task_id [String] Celery task identifier + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CrawlRetrieveResultsResponse] + # + # @see Scrapegraphai::Models::CrawlRetrieveResultsParams + def retrieve_results(task_id, params = {}) + @client.request( + method: :get, + path: ["crawl/%1$s", task_id], + model: Scrapegraphai::Models::CrawlRetrieveResultsResponse, + options: params[:request_options] + ) + end + + # Initiate comprehensive website crawling with sitemap support. Supports both AI + # extraction mode and markdown conversion mode. Returns a task ID for async + # processing. + # + # @overload start(url:, depth: nil, extraction_mode: nil, max_pages: nil, prompt: nil, render_heavy_js: nil, rules: nil, schema: nil, sitemap: nil, request_options: {}) + # + # @param url [String] Starting URL for crawling + # + # @param depth [Integer] Maximum crawl depth from starting URL + # + # @param extraction_mode [Boolean] Use AI extraction (true) or markdown conversion (false) + # + # @param max_pages [Integer] Maximum number of pages to crawl + # + # @param prompt [String, nil] Extraction prompt (required if extraction_mode is true) + # + # @param render_heavy_js [Boolean] Enable heavy JavaScript rendering + # + # @param rules [Scrapegraphai::Models::CrawlStartParams::Rules] + # + # @param schema [Object, nil] Output schema for extraction + # + # @param sitemap [Boolean] Use sitemap for crawling + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CrawlStartResponse] + # + # @see Scrapegraphai::Models::CrawlStartParams + def start(params) + parsed, options = Scrapegraphai::CrawlStartParams.dump_request(params) + @client.request( + method: :post, + path: "crawl", + body: parsed, + model: Scrapegraphai::Models::CrawlStartResponse, + options: options + ) + end + + # @api private + # + # @param client [Scrapegraphai::Client] + def initialize(client:) + @client = client + end + end + end +end diff --git a/lib/scrapegraphai/resources/credits.rb b/lib/scrapegraphai/resources/credits.rb new file mode 100644 index 0000000..4131626 --- /dev/null +++ b/lib/scrapegraphai/resources/credits.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Resources + class Credits + # Retrieve the current credit balance and usage for the authenticated user + # + # @overload retrieve(request_options: {}) + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CreditRetrieveResponse] + # + # @see Scrapegraphai::Models::CreditRetrieveParams + def retrieve(params = {}) + @client.request( + method: :get, + path: "credits", + model: Scrapegraphai::Models::CreditRetrieveResponse, + options: params[:request_options] + ) + end + + # @api private + # + # @param client [Scrapegraphai::Client] + def initialize(client:) + @client = client + end + end + end +end diff --git a/lib/scrapegraphai/resources/feedback.rb b/lib/scrapegraphai/resources/feedback.rb new file mode 100644 index 0000000..2317306 --- /dev/null +++ b/lib/scrapegraphai/resources/feedback.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Resources + class Feedback + # Submit feedback for a specific request + # + # @overload submit(rating:, request_id:, feedback_text: nil, request_options: {}) + # + # @param rating [Integer] Rating score + # + # @param request_id [String] Request to provide feedback for + # + # @param feedback_text [String, nil] Optional feedback comments + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::FeedbackSubmitResponse] + # + # @see Scrapegraphai::Models::FeedbackSubmitParams + def submit(params) + parsed, options = Scrapegraphai::FeedbackSubmitParams.dump_request(params) + @client.request( + method: :post, + path: "feedback", + body: parsed, + model: Scrapegraphai::Models::FeedbackSubmitResponse, + options: options + ) + end + + # @api private + # + # @param client [Scrapegraphai::Client] + def initialize(client:) + @client = client + end + end + end +end diff --git a/lib/scrapegraphai/resources/generate_schema.rb b/lib/scrapegraphai/resources/generate_schema.rb new file mode 100644 index 0000000..ba5eadc --- /dev/null +++ b/lib/scrapegraphai/resources/generate_schema.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Resources + class GenerateSchema + # Generate or modify JSON schemas based on natural language descriptions. Can + # create new schemas or extend existing ones. + # + # @overload create(user_prompt:, existing_schema: nil, request_options: {}) + # + # @param user_prompt [String] Natural language description of desired schema + # + # @param existing_schema [Object, nil] Existing schema to modify or extend + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::GenerateSchemaCreateResponse] + # + # @see Scrapegraphai::Models::GenerateSchemaCreateParams + def create(params) + parsed, options = Scrapegraphai::GenerateSchemaCreateParams.dump_request(params) + @client.request( + method: :post, + path: "generate_schema", + body: parsed, + model: Scrapegraphai::Models::GenerateSchemaCreateResponse, + options: options + ) + end + + # Retrieve the status and results of a schema generation request + # + # @overload retrieve(request_id, request_options: {}) + # + # @param request_id [String] Unique request identifier + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse, Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse] + # + # @see Scrapegraphai::Models::GenerateSchemaRetrieveParams + def retrieve(request_id, params = {}) + @client.request( + method: :get, + path: ["generate_schema/%1$s", request_id], + model: Scrapegraphai::Models::GenerateSchemaRetrieveResponse, + options: params[:request_options] + ) + end + + # @api private + # + # @param client [Scrapegraphai::Client] + def initialize(client:) + @client = client + end + end + end +end diff --git a/lib/scrapegraphai/resources/healthz.rb b/lib/scrapegraphai/resources/healthz.rb new file mode 100644 index 0000000..fbe3fd8 --- /dev/null +++ b/lib/scrapegraphai/resources/healthz.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Resources + class Healthz + # Check the health status of the service + # + # @overload check(request_options: {}) + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::HealthzCheckResponse] + # + # @see Scrapegraphai::Models::HealthzCheckParams + def check(params = {}) + @client.request( + method: :get, + path: "healthz", + model: Scrapegraphai::Models::HealthzCheckResponse, + options: params[:request_options] + ) + end + + # @api private + # + # @param client [Scrapegraphai::Client] + def initialize(client:) + @client = client + end + end + end +end diff --git a/lib/scrapegraphai/resources/markdownify.rb b/lib/scrapegraphai/resources/markdownify.rb new file mode 100644 index 0000000..919dde0 --- /dev/null +++ b/lib/scrapegraphai/resources/markdownify.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Resources + class Markdownify + # Convert web page content to clean Markdown format + # + # @overload convert(website_url:, headers: nil, steps: nil, request_options: {}) + # + # @param website_url [String] URL to convert to markdown + # + # @param headers [Hash{Symbol=>String}] + # + # @param steps [Array] Interaction steps before conversion + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CompletedMarkdownify] + # + # @see Scrapegraphai::Models::MarkdownifyConvertParams + def convert(params) + parsed, options = Scrapegraphai::MarkdownifyConvertParams.dump_request(params) + @client.request( + method: :post, + path: "markdownify", + body: parsed, + model: Scrapegraphai::CompletedMarkdownify, + options: options + ) + end + + # Retrieve the status and results of a markdown conversion + # + # @overload retrieve_status(request_id, request_options: {}) + # + # @param request_id [String] + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CompletedMarkdownify, Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse] + # + # @see Scrapegraphai::Models::MarkdownifyRetrieveStatusParams + def retrieve_status(request_id, params = {}) + @client.request( + method: :get, + path: ["markdownify/%1$s", request_id], + model: Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse, + options: params[:request_options] + ) + end + + # @api private + # + # @param client [Scrapegraphai::Client] + def initialize(client:) + @client = client + end + end + end +end diff --git a/lib/scrapegraphai/resources/searchscraper.rb b/lib/scrapegraphai/resources/searchscraper.rb new file mode 100644 index 0000000..978ba59 --- /dev/null +++ b/lib/scrapegraphai/resources/searchscraper.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Resources + class Searchscraper + # Performs web search, selects relevant URLs, and extracts structured data from + # multiple websites. Uses LLM to refine search queries and merge results from + # different sources. + # + # @overload create(user_prompt:, headers: nil, num_results: nil, output_schema: nil, request_options: {}) + # + # @param user_prompt [String] Search query and extraction instruction + # + # @param headers [Hash{Symbol=>String}] + # + # @param num_results [Integer] Number of websites to scrape from search results + # + # @param output_schema [Object] JSON schema for structured output + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CompletedSearchScraper] + # + # @see Scrapegraphai::Models::SearchscraperCreateParams + def create(params) + parsed, options = Scrapegraphai::SearchscraperCreateParams.dump_request(params) + @client.request( + method: :post, + path: "searchscraper", + body: parsed, + model: Scrapegraphai::CompletedSearchScraper, + options: options + ) + end + + # Retrieve the status and results of a search scraping operation + # + # @overload retrieve_status(request_id, request_options: {}) + # + # @param request_id [String] Unique request identifier + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CompletedSearchScraper, Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse] + # + # @see Scrapegraphai::Models::SearchscraperRetrieveStatusParams + def retrieve_status(request_id, params = {}) + @client.request( + method: :get, + path: ["searchscraper/%1$s", request_id], + model: Scrapegraphai::Models::SearchscraperRetrieveStatusResponse, + options: params[:request_options] + ) + end + + # @api private + # + # @param client [Scrapegraphai::Client] + def initialize(client:) + @client = client + end + end + end +end diff --git a/lib/scrapegraphai/resources/smartscraper.rb b/lib/scrapegraphai/resources/smartscraper.rb new file mode 100644 index 0000000..c56e9f6 --- /dev/null +++ b/lib/scrapegraphai/resources/smartscraper.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Resources + class Smartscraper + # Main scraping endpoint with LLM-powered content analysis. Supports various + # fetching providers, infinite scrolling, pagination, and custom output schemas. + # + # @overload create(user_prompt:, cookies: nil, headers: nil, number_of_scrolls: nil, output_schema: nil, render_heavy_js: nil, steps: nil, total_pages: nil, website_html: nil, website_url: nil, request_options: {}) + # + # @param user_prompt [String] Extraction instruction for the LLM + # + # @param cookies [Hash{Symbol=>String}] Cookies to include in the request + # + # @param headers [Hash{Symbol=>String}] HTTP headers to include in the request + # + # @param number_of_scrolls [Integer] Number of infinite scroll operations to perform + # + # @param output_schema [Object] JSON schema defining the expected output structure + # + # @param render_heavy_js [Boolean] Enable heavy JavaScript rendering + # + # @param steps [Array] Website interaction steps (e.g., clicking buttons) + # + # @param total_pages [Integer] Number of pages to process for pagination + # + # @param website_html [String] HTML content to process (max 2MB, mutually exclusive with website_url) + # + # @param website_url [String] URL to scrape (mutually exclusive with website_html) + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CompletedSmartscraper] + # + # @see Scrapegraphai::Models::SmartscraperCreateParams + def create(params) + parsed, options = Scrapegraphai::SmartscraperCreateParams.dump_request(params) + @client.request( + method: :post, + path: "smartscraper", + body: parsed, + model: Scrapegraphai::CompletedSmartscraper, + options: options + ) + end + + # Retrieve the status and results of a scraping operation + # + # @overload retrieve(request_id, request_options: {}) + # + # @param request_id [String] Unique request identifier + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CompletedSmartscraper, Scrapegraphai::Models::FailedSmartscraper] + # + # @see Scrapegraphai::Models::SmartscraperRetrieveParams + def retrieve(request_id, params = {}) + @client.request( + method: :get, + path: ["smartscraper/%1$s", request_id], + model: Scrapegraphai::Models::SmartscraperRetrieveResponse, + options: params[:request_options] + ) + end + + # Retrieve the status and results of a scraping operation + # + # @overload list(request_options: {}) + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::CompletedSmartscraper, Scrapegraphai::Models::FailedSmartscraper] + # + # @see Scrapegraphai::Models::SmartscraperListParams + def list(params = {}) + @client.request( + method: :get, + path: "smartscraper", + model: Scrapegraphai::Models::SmartscraperListResponse, + options: params[:request_options] + ) + end + + # @api private + # + # @param client [Scrapegraphai::Client] + def initialize(client:) + @client = client + end + end + end +end diff --git a/lib/scrapegraphai/resources/validate.rb b/lib/scrapegraphai/resources/validate.rb new file mode 100644 index 0000000..96f2ca8 --- /dev/null +++ b/lib/scrapegraphai/resources/validate.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Resources + class Validate + # Validate the API key and retrieve associated user email + # + # @overload api_key(request_options: {}) + # + # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] + # + # @return [Scrapegraphai::Models::ValidateAPIKeyResponse] + # + # @see Scrapegraphai::Models::ValidateAPIKeyParams + def api_key(params = {}) + @client.request( + method: :get, + path: "validate", + model: Scrapegraphai::Models::ValidateAPIKeyResponse, + options: params[:request_options] + ) + end + + # @api private + # + # @param client [Scrapegraphai::Client] + def initialize(client:) + @client = client + end + end + end +end diff --git a/lib/scrapegraphai/version.rb b/lib/scrapegraphai/version.rb new file mode 100644 index 0000000..2ef4d62 --- /dev/null +++ b/lib/scrapegraphai/version.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +module Scrapegraphai + VERSION = "0.0.1" +end diff --git a/manifest.yaml b/manifest.yaml new file mode 100644 index 0000000..556686f --- /dev/null +++ b/manifest.yaml @@ -0,0 +1,15 @@ +dependencies: + - English + - cgi + - date + - erb + - etc + - json + - net/http + - pathname + - rbconfig + - securerandom + - set + - stringio + - time + - uri diff --git a/rbi/scrapegraphai/client.rbi b/rbi/scrapegraphai/client.rbi new file mode 100644 index 0000000..f83bd3e --- /dev/null +++ b/rbi/scrapegraphai/client.rbi @@ -0,0 +1,91 @@ +# typed: strong + +module Scrapegraphai + class Client < Scrapegraphai::Internal::Transport::BaseClient + DEFAULT_MAX_RETRIES = 2 + + DEFAULT_TIMEOUT_IN_SECONDS = T.let(60.0, Float) + + DEFAULT_INITIAL_RETRY_DELAY = T.let(0.5, Float) + + DEFAULT_MAX_RETRY_DELAY = T.let(8.0, Float) + + ENVIRONMENTS = + T.let( + { + production: "https://api.scrapegraphai.com/v1", + environment_1: "http://localhost:8001/v1" + }, + T::Hash[Symbol, String] + ) + + # API key for authentication + sig { returns(String) } + attr_reader :api_key + + sig { returns(Scrapegraphai::Resources::Smartscraper) } + attr_reader :smartscraper + + sig { returns(Scrapegraphai::Resources::Markdownify) } + attr_reader :markdownify + + sig { returns(Scrapegraphai::Resources::Searchscraper) } + attr_reader :searchscraper + + sig { returns(Scrapegraphai::Resources::GenerateSchema) } + attr_reader :generate_schema + + sig { returns(Scrapegraphai::Resources::Crawl) } + attr_reader :crawl + + sig { returns(Scrapegraphai::Resources::Credits) } + attr_reader :credits + + sig { returns(Scrapegraphai::Resources::Validate) } + attr_reader :validate + + sig { returns(Scrapegraphai::Resources::Feedback) } + attr_reader :feedback + + sig { returns(Scrapegraphai::Resources::Healthz) } + attr_reader :healthz + + # @api private + sig { override.returns(T::Hash[String, String]) } + private def auth_headers + end + + # Creates and returns a new client for interacting with the API. + sig do + params( + api_key: T.nilable(String), + environment: T.nilable(T.any(Symbol, String)), + base_url: T.nilable(String), + max_retries: Integer, + timeout: Float, + initial_retry_delay: Float, + max_retry_delay: Float + ).returns(T.attached_class) + end + def self.new( + # API key for authentication Defaults to `ENV["SCRAPEGRAPHAI_API_KEY"]` + api_key: ENV["SCRAPEGRAPHAI_API_KEY"], + # Specifies the environment to use for the API. + # + # Each environment maps to a different base URL: + # + # - `production` corresponds to `https://api.scrapegraphai.com/v1` + # - `environment_1` corresponds to `http://localhost:8001/v1` + environment: nil, + # Override the default base URL for the API, e.g., + # `"https://api.example.com/v2/"`. Defaults to `ENV["SCRAPEGRAPHAI_BASE_URL"]` + base_url: ENV["SCRAPEGRAPHAI_BASE_URL"], + # Max number of retries to attempt after a failed retryable request. + max_retries: Scrapegraphai::Client::DEFAULT_MAX_RETRIES, + timeout: Scrapegraphai::Client::DEFAULT_TIMEOUT_IN_SECONDS, + initial_retry_delay: Scrapegraphai::Client::DEFAULT_INITIAL_RETRY_DELAY, + max_retry_delay: Scrapegraphai::Client::DEFAULT_MAX_RETRY_DELAY + ) + end + end +end diff --git a/rbi/scrapegraphai/errors.rbi b/rbi/scrapegraphai/errors.rbi new file mode 100644 index 0000000..55df99d --- /dev/null +++ b/rbi/scrapegraphai/errors.rbi @@ -0,0 +1,205 @@ +# typed: strong + +module Scrapegraphai + module Errors + class Error < StandardError + sig { returns(T.nilable(StandardError)) } + attr_accessor :cause + end + + class ConversionError < Scrapegraphai::Errors::Error + sig { returns(T.nilable(StandardError)) } + def cause + end + + # @api private + sig do + params( + on: T::Class[StandardError], + method: Symbol, + target: T.anything, + value: T.anything, + cause: T.nilable(StandardError) + ).returns(T.attached_class) + end + def self.new(on:, method:, target:, value:, cause: nil) + end + end + + class APIError < Scrapegraphai::Errors::Error + sig { returns(URI::Generic) } + attr_accessor :url + + sig { returns(T.nilable(Integer)) } + attr_accessor :status + + sig { returns(T.nilable(T::Hash[String, String])) } + attr_accessor :headers + + sig { returns(T.nilable(T.anything)) } + attr_accessor :body + + # @api private + sig do + params( + url: URI::Generic, + status: T.nilable(Integer), + headers: T.nilable(T::Hash[String, String]), + body: T.nilable(Object), + request: NilClass, + response: NilClass, + message: T.nilable(String) + ).returns(T.attached_class) + end + def self.new( + url:, + status: nil, + headers: nil, + body: nil, + request: nil, + response: nil, + message: nil + ) + end + end + + class APIConnectionError < Scrapegraphai::Errors::APIError + sig { returns(NilClass) } + attr_accessor :status + + sig { returns(NilClass) } + attr_accessor :body + + # @api private + sig do + params( + url: URI::Generic, + status: NilClass, + headers: T.nilable(T::Hash[String, String]), + body: NilClass, + request: NilClass, + response: NilClass, + message: T.nilable(String) + ).returns(T.attached_class) + end + def self.new( + url:, + status: nil, + headers: nil, + body: nil, + request: nil, + response: nil, + message: "Connection error." + ) + end + end + + class APITimeoutError < Scrapegraphai::Errors::APIConnectionError + # @api private + sig do + params( + url: URI::Generic, + status: NilClass, + headers: T.nilable(T::Hash[String, String]), + body: NilClass, + request: NilClass, + response: NilClass, + message: T.nilable(String) + ).returns(T.attached_class) + end + def self.new( + url:, + status: nil, + headers: nil, + body: nil, + request: nil, + response: nil, + message: "Request timed out." + ) + end + end + + class APIStatusError < Scrapegraphai::Errors::APIError + # @api private + sig do + params( + url: URI::Generic, + status: Integer, + headers: T.nilable(T::Hash[String, String]), + body: T.nilable(Object), + request: NilClass, + response: NilClass, + message: T.nilable(String) + ).returns(T.attached_class) + end + def self.for( + url:, + status:, + headers:, + body:, + request:, + response:, + message: nil + ) + end + + sig { returns(Integer) } + attr_accessor :status + + # @api private + sig do + params( + url: URI::Generic, + status: Integer, + headers: T.nilable(T::Hash[String, String]), + body: T.nilable(Object), + request: NilClass, + response: NilClass, + message: T.nilable(String) + ).returns(T.attached_class) + end + def self.new( + url:, + status:, + headers:, + body:, + request:, + response:, + message: nil + ) + end + end + + class BadRequestError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 400 + end + + class AuthenticationError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 401 + end + + class PermissionDeniedError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 403 + end + + class NotFoundError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 404 + end + + class ConflictError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 409 + end + + class UnprocessableEntityError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 422 + end + + class RateLimitError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = 429 + end + + class InternalServerError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS = T.let((500..), T::Range[Integer]) + end + end +end diff --git a/rbi/scrapegraphai/file_part.rbi b/rbi/scrapegraphai/file_part.rbi new file mode 100644 index 0000000..e8b3a4f --- /dev/null +++ b/rbi/scrapegraphai/file_part.rbi @@ -0,0 +1,37 @@ +# typed: strong + +module Scrapegraphai + class FilePart + sig { returns(T.any(Pathname, StringIO, IO, String)) } + attr_reader :content + + sig { returns(T.nilable(String)) } + attr_reader :content_type + + sig { returns(T.nilable(String)) } + attr_reader :filename + + # @api private + sig { returns(String) } + private def read + end + + sig { params(a: T.anything).returns(String) } + def to_json(*a) + end + + sig { params(a: T.anything).returns(String) } + def to_yaml(*a) + end + + sig do + params( + content: T.any(Pathname, StringIO, IO, String), + filename: T.nilable(T.any(Pathname, String)), + content_type: T.nilable(String) + ).returns(T.attached_class) + end + def self.new(content, filename: nil, content_type: nil) + end + end +end diff --git a/rbi/scrapegraphai/internal.rbi b/rbi/scrapegraphai/internal.rbi new file mode 100644 index 0000000..46561ca --- /dev/null +++ b/rbi/scrapegraphai/internal.rbi @@ -0,0 +1,18 @@ +# typed: strong + +module Scrapegraphai + module Internal + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # Due to the current WIP status of Shapes support in Sorbet, types referencing + # this alias might be refined in the future. + AnyHash = T.type_alias { T::Hash[Symbol, T.anything] } + + FileInput = + T.type_alias do + T.any(Pathname, StringIO, IO, String, Scrapegraphai::FilePart) + end + + OMIT = T.let(Object.new.freeze, T.anything) + end +end diff --git a/rbi/scrapegraphai/internal/transport/base_client.rbi b/rbi/scrapegraphai/internal/transport/base_client.rbi new file mode 100644 index 0000000..f3505fa --- /dev/null +++ b/rbi/scrapegraphai/internal/transport/base_client.rbi @@ -0,0 +1,300 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Transport + # @api private + class BaseClient + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + abstract! + + RequestComponents = + T.type_alias do + { + method: Symbol, + path: T.any(String, T::Array[String]), + query: + T.nilable( + T::Hash[String, T.nilable(T.any(T::Array[String], String))] + ), + headers: + T.nilable( + T::Hash[ + String, + T.nilable( + T.any( + String, + Integer, + T::Array[T.nilable(T.any(String, Integer))] + ) + ) + ] + ), + body: T.nilable(T.anything), + unwrap: + T.nilable( + T.any( + Symbol, + Integer, + T::Array[T.any(Symbol, Integer)], + T.proc.params(arg0: T.anything).returns(T.anything) + ) + ), + page: + T.nilable( + T::Class[ + Scrapegraphai::Internal::Type::BasePage[ + Scrapegraphai::Internal::Type::BaseModel + ] + ] + ), + stream: T.nilable(T::Class[T.anything]), + model: T.nilable(Scrapegraphai::Internal::Type::Converter::Input), + options: T.nilable(Scrapegraphai::RequestOptions::OrHash) + } + end + + RequestInput = + T.type_alias do + { + method: Symbol, + url: URI::Generic, + headers: T::Hash[String, String], + body: T.anything, + max_retries: Integer, + timeout: Float + } + end + + # from whatwg fetch spec + MAX_REDIRECTS = 20 + + PLATFORM_HEADERS = T::Hash[String, String] + + class << self + # @api private + sig do + params( + req: + Scrapegraphai::Internal::Transport::BaseClient::RequestComponents + ).void + end + def validate!(req) + end + + # @api private + sig do + params(status: Integer, headers: T::Hash[String, String]).returns( + T::Boolean + ) + end + def should_retry?(status, headers:) + end + + # @api private + sig do + params( + request: + Scrapegraphai::Internal::Transport::BaseClient::RequestInput, + status: Integer, + response_headers: T::Hash[String, String] + ).returns( + Scrapegraphai::Internal::Transport::BaseClient::RequestInput + ) + end + def follow_redirect(request, status:, response_headers:) + end + + # @api private + sig do + params( + status: T.any(Integer, Scrapegraphai::Errors::APIConnectionError), + stream: T.nilable(T::Enumerable[String]) + ).void + end + def reap_connection!(status, stream:) + end + end + + sig { returns(URI::Generic) } + attr_reader :base_url + + sig { returns(Float) } + attr_reader :timeout + + sig { returns(Integer) } + attr_reader :max_retries + + sig { returns(Float) } + attr_reader :initial_retry_delay + + sig { returns(Float) } + attr_reader :max_retry_delay + + sig { returns(T::Hash[String, String]) } + attr_reader :headers + + sig { returns(T.nilable(String)) } + attr_reader :idempotency_header + + # @api private + sig { returns(Scrapegraphai::Internal::Transport::PooledNetRequester) } + attr_reader :requester + + # @api private + sig do + params( + base_url: String, + timeout: Float, + max_retries: Integer, + initial_retry_delay: Float, + max_retry_delay: Float, + headers: + T::Hash[ + String, + T.nilable( + T.any( + String, + Integer, + T::Array[T.nilable(T.any(String, Integer))] + ) + ) + ], + idempotency_header: T.nilable(String) + ).returns(T.attached_class) + end + def self.new( + base_url:, + timeout: 0.0, + max_retries: 0, + initial_retry_delay: 0.0, + max_retry_delay: 0.0, + headers: {}, + idempotency_header: nil + ) + end + + # @api private + sig { overridable.returns(T::Hash[String, String]) } + private def auth_headers + end + + # @api private + sig { returns(String) } + private def generate_idempotency_key + end + + # @api private + sig do + overridable + .params( + req: + Scrapegraphai::Internal::Transport::BaseClient::RequestComponents, + opts: Scrapegraphai::Internal::AnyHash + ) + .returns( + Scrapegraphai::Internal::Transport::BaseClient::RequestInput + ) + end + private def build_request(req, opts) + end + + # @api private + sig do + params( + headers: T::Hash[String, String], + retry_count: Integer + ).returns(Float) + end + private def retry_delay(headers, retry_count:) + end + + # @api private + sig do + params( + request: + Scrapegraphai::Internal::Transport::BaseClient::RequestInput, + redirect_count: Integer, + retry_count: Integer, + send_retry_header: T::Boolean + ).returns([Integer, Net::HTTPResponse, T::Enumerable[String]]) + end + def send_request( + request, + redirect_count:, + retry_count:, + send_retry_header: + ) + end + + # Execute the request specified by `req`. This is the method that all resource + # methods call into. + # + # @overload request(method, path, query: {}, headers: {}, body: nil, unwrap: nil, page: nil, stream: nil, model: Scrapegraphai::Internal::Type::Unknown, options: {}) + sig do + params( + method: Symbol, + path: T.any(String, T::Array[String]), + query: + T.nilable( + T::Hash[String, T.nilable(T.any(T::Array[String], String))] + ), + headers: + T.nilable( + T::Hash[ + String, + T.nilable( + T.any( + String, + Integer, + T::Array[T.nilable(T.any(String, Integer))] + ) + ) + ] + ), + body: T.nilable(T.anything), + unwrap: + T.nilable( + T.any( + Symbol, + Integer, + T::Array[T.any(Symbol, Integer)], + T.proc.params(arg0: T.anything).returns(T.anything) + ) + ), + page: + T.nilable( + T::Class[ + Scrapegraphai::Internal::Type::BasePage[ + Scrapegraphai::Internal::Type::BaseModel + ] + ] + ), + stream: T.nilable(T::Class[T.anything]), + model: T.nilable(Scrapegraphai::Internal::Type::Converter::Input), + options: T.nilable(Scrapegraphai::RequestOptions::OrHash) + ).returns(T.anything) + end + def request( + method, + path, + query: {}, + headers: {}, + body: nil, + unwrap: nil, + page: nil, + stream: nil, + model: Scrapegraphai::Internal::Type::Unknown, + options: {} + ) + end + + # @api private + sig { returns(String) } + def inspect + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/transport/pooled_net_requester.rbi b/rbi/scrapegraphai/internal/transport/pooled_net_requester.rbi new file mode 100644 index 0000000..05c4de1 --- /dev/null +++ b/rbi/scrapegraphai/internal/transport/pooled_net_requester.rbi @@ -0,0 +1,80 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Transport + # @api private + class PooledNetRequester + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + Request = + T.type_alias do + { + method: Symbol, + url: URI::Generic, + headers: T::Hash[String, String], + body: T.anything, + deadline: Float + } + end + + # from the golang stdlib + # https://github.com/golang/go/blob/c8eced8580028328fde7c03cbfcb720ce15b2358/src/net/http/transport.go#L49 + KEEP_ALIVE_TIMEOUT = 30 + + DEFAULT_MAX_CONNECTIONS = T.let(T.unsafe(nil), Integer) + + class << self + # @api private + sig { params(url: URI::Generic).returns(Net::HTTP) } + def connect(url) + end + + # @api private + sig { params(conn: Net::HTTP, deadline: Float).void } + def calibrate_socket_timeout(conn, deadline) + end + + # @api private + sig do + params( + request: + Scrapegraphai::Internal::Transport::PooledNetRequester::Request, + blk: T.proc.params(arg0: String).void + ).returns([Net::HTTPGenericRequest, T.proc.void]) + end + def build_request(request, &blk) + end + end + + # @api private + sig do + params( + url: URI::Generic, + deadline: Float, + blk: T.proc.params(arg0: Net::HTTP).void + ).void + end + private def with_pool(url, deadline:, &blk) + end + + # @api private + sig do + params( + request: + Scrapegraphai::Internal::Transport::PooledNetRequester::Request + ).returns([Integer, Net::HTTPResponse, T::Enumerable[String]]) + end + def execute(request) + end + + # @api private + sig { params(size: Integer).returns(T.attached_class) } + def self.new( + size: Scrapegraphai::Internal::Transport::PooledNetRequester::DEFAULT_MAX_CONNECTIONS + ) + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/array_of.rbi b/rbi/scrapegraphai/internal/type/array_of.rbi new file mode 100644 index 0000000..3cac488 --- /dev/null +++ b/rbi/scrapegraphai/internal/type/array_of.rbi @@ -0,0 +1,104 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + # + # Array of items of a given type. + class ArrayOf + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + abstract! + + Elem = type_member(:out) + + sig do + params( + type_info: + T.any( + Scrapegraphai::Internal::AnyHash, + T.proc.returns(Scrapegraphai::Internal::Type::Converter::Input), + Scrapegraphai::Internal::Type::Converter::Input + ), + spec: Scrapegraphai::Internal::AnyHash + ).returns(T.attached_class) + end + def self.[](type_info, spec = {}) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ===(other) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ==(other) + end + + sig { returns(Integer) } + def hash + end + + # @api private + sig do + override + .params( + value: T.any(T::Array[T.anything], T.anything), + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ) + .returns(T.any(T::Array[T.anything], T.anything)) + end + def coerce(value, state:) + end + + # @api private + sig do + override + .params( + value: T.any(T::Array[T.anything], T.anything), + state: Scrapegraphai::Internal::Type::Converter::DumpState + ) + .returns(T.any(T::Array[T.anything], T.anything)) + end + def dump(value, state:) + end + + # @api private + sig { returns(T.anything) } + def to_sorbet_type + end + + # @api private + sig { returns(Elem) } + protected def item_type + end + + # @api private + sig { returns(T::Boolean) } + protected def nilable? + end + + # @api private + sig do + params( + type_info: + T.any( + Scrapegraphai::Internal::AnyHash, + T.proc.returns(Scrapegraphai::Internal::Type::Converter::Input), + Scrapegraphai::Internal::Type::Converter::Input + ), + spec: Scrapegraphai::Internal::AnyHash + ).void + end + def initialize(type_info, spec = {}) + end + + # @api private + sig { params(depth: Integer).returns(String) } + def inspect(depth: 0) + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/base_model.rbi b/rbi/scrapegraphai/internal/type/base_model.rbi new file mode 100644 index 0000000..204dd02 --- /dev/null +++ b/rbi/scrapegraphai/internal/type/base_model.rbi @@ -0,0 +1,310 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + class BaseModel + extend Scrapegraphai::Internal::Type::Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + abstract! + + KnownField = + T.type_alias do + { + mode: T.nilable(Symbol), + required: T::Boolean, + nilable: T::Boolean + } + end + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Internal::Type::BaseModel, + Scrapegraphai::Internal::AnyHash + ) + end + + class << self + # @api private + # + # Assumes superclass fields are totally defined before fields are accessed / + # defined on subclasses. + sig { params(child: T.self_type).void } + def inherited(child) + end + + # @api private + sig do + returns( + T::Hash[ + Symbol, + T.all( + Scrapegraphai::Internal::Type::BaseModel::KnownField, + { + type_fn: + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ) + } + ) + ] + ) + end + def known_fields + end + + # @api private + sig do + returns( + T::Hash[ + Symbol, + T.all( + Scrapegraphai::Internal::Type::BaseModel::KnownField, + { type: Scrapegraphai::Internal::Type::Converter::Input } + ) + ] + ) + end + def fields + end + + # @api private + sig do + params( + name_sym: Symbol, + required: T::Boolean, + type_info: + T.any( + { + const: + T.nilable( + T.any(NilClass, T::Boolean, Integer, Float, Symbol) + ), + enum: + T.nilable( + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ) + ), + union: + T.nilable( + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ) + ), + api_name: Symbol, + nil?: T::Boolean + }, + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ), + Scrapegraphai::Internal::Type::Converter::Input + ), + spec: Scrapegraphai::Internal::AnyHash + ).void + end + private def add_field(name_sym, required:, type_info:, spec:) + end + + # @api private + sig do + params( + name_sym: Symbol, + type_info: + T.any( + Scrapegraphai::Internal::AnyHash, + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ), + Scrapegraphai::Internal::Type::Converter::Input + ), + spec: Scrapegraphai::Internal::AnyHash + ).void + end + def required(name_sym, type_info, spec = {}) + end + + # @api private + sig do + params( + name_sym: Symbol, + type_info: + T.any( + Scrapegraphai::Internal::AnyHash, + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ), + Scrapegraphai::Internal::Type::Converter::Input + ), + spec: Scrapegraphai::Internal::AnyHash + ).void + end + def optional(name_sym, type_info, spec = {}) + end + + # @api private + # + # `request_only` attributes not excluded from `.#coerce` when receiving responses + # even if well behaved servers should not send them + sig { params(blk: T.proc.void).void } + private def request_only(&blk) + end + + # @api private + # + # `response_only` attributes are omitted from `.#dump` when making requests + sig { params(blk: T.proc.void).void } + private def response_only(&blk) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ==(other) + end + + sig { returns(Integer) } + def hash + end + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ==(other) + end + + sig { returns(Integer) } + def hash + end + + class << self + # @api private + sig do + override + .params( + value: + T.any( + Scrapegraphai::Internal::Type::BaseModel, + T::Hash[T.anything, T.anything], + T.anything + ), + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ) + .returns(T.any(T.attached_class, T.anything)) + end + def coerce(value, state:) + end + + # @api private + sig do + override + .params( + value: T.any(T.attached_class, T.anything), + state: Scrapegraphai::Internal::Type::Converter::DumpState + ) + .returns(T.any(T::Hash[T.anything, T.anything], T.anything)) + end + def dump(value, state:) + end + + # @api private + sig { returns(T.anything) } + def to_sorbet_type + end + end + + class << self + # @api private + sig do + params( + model: Scrapegraphai::Internal::Type::BaseModel, + convert: T::Boolean + ).returns(Scrapegraphai::Internal::AnyHash) + end + def recursively_to_h(model, convert:) + end + end + + # Returns the raw value associated with the given key, if found. Otherwise, nil is + # returned. + # + # It is valid to lookup keys that are not in the API spec, for example to access + # undocumented features. This method does not parse response data into + # higher-level types. Lookup by anything other than a Symbol is an ArgumentError. + sig { params(key: Symbol).returns(T.nilable(T.anything)) } + def [](key) + end + + # Returns a Hash of the data underlying this object. O(1) + # + # Keys are Symbols and values are the raw values from the response. The return + # value indicates which values were ever set on the object. i.e. there will be a + # key in this hash if they ever were, even if the set value was nil. + # + # This method is not recursive. The returned value is shared by the object, so it + # should not be mutated. + sig { overridable.returns(Scrapegraphai::Internal::AnyHash) } + def to_h + end + + # Returns a Hash of the data underlying this object. O(1) + # + # Keys are Symbols and values are the raw values from the response. The return + # value indicates which values were ever set on the object. i.e. there will be a + # key in this hash if they ever were, even if the set value was nil. + # + # This method is not recursive. The returned value is shared by the object, so it + # should not be mutated. + sig { overridable.returns(Scrapegraphai::Internal::AnyHash) } + def to_hash + end + + # In addition to the behaviour of `#to_h`, this method will recursively call + # `#to_h` on nested models. + sig { overridable.returns(Scrapegraphai::Internal::AnyHash) } + def deep_to_h + end + + sig do + params(keys: T.nilable(T::Array[Symbol])).returns( + Scrapegraphai::Internal::AnyHash + ) + end + def deconstruct_keys(keys) + end + + sig { params(a: T.anything).returns(String) } + def to_json(*a) + end + + sig { params(a: T.anything).returns(String) } + def to_yaml(*a) + end + + # Create a new instance of a model. + sig do + params(data: T.any(T::Hash[Symbol, T.anything], T.self_type)).returns( + T.attached_class + ) + end + def self.new(data = {}) + end + + class << self + # @api private + sig { params(depth: Integer).returns(String) } + def inspect(depth: 0) + end + end + + sig { returns(String) } + def to_s + end + + # @api private + sig { returns(String) } + def inspect + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/base_page.rbi b/rbi/scrapegraphai/internal/type/base_page.rbi new file mode 100644 index 0000000..d31316d --- /dev/null +++ b/rbi/scrapegraphai/internal/type/base_page.rbi @@ -0,0 +1,43 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + # + # This module provides a base implementation for paginated responses in the SDK. + module BasePage + Elem = type_member(:out) + + sig { overridable.returns(T::Boolean) } + def next_page? + end + + sig { overridable.returns(T.self_type) } + def next_page + end + + sig { overridable.params(blk: T.proc.params(arg0: Elem).void).void } + def auto_paging_each(&blk) + end + + sig { returns(T::Enumerable[Elem]) } + def to_enum + end + + # @api private + sig do + params( + client: Scrapegraphai::Internal::Transport::BaseClient, + req: + Scrapegraphai::Internal::Transport::BaseClient::RequestComponents, + headers: T::Hash[String, String], + page_data: T.anything + ).void + end + def initialize(client:, req:, headers:, page_data:) + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/boolean.rbi b/rbi/scrapegraphai/internal/type/boolean.rbi new file mode 100644 index 0000000..5f99c13 --- /dev/null +++ b/rbi/scrapegraphai/internal/type/boolean.rbi @@ -0,0 +1,58 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + # + # Ruby has no Boolean class; this is something for models to refer to. + class Boolean + extend Scrapegraphai::Internal::Type::Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + abstract! + + sig { params(other: T.anything).returns(T::Boolean) } + def self.===(other) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def self.==(other) + end + + class << self + # @api private + # + # Coerce value to Boolean if possible, otherwise return the original value. + sig do + override + .params( + value: T.any(T::Boolean, T.anything), + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ) + .returns(T.any(T::Boolean, T.anything)) + end + def coerce(value, state:) + end + + # @api private + sig do + override + .params( + value: T.any(T::Boolean, T.anything), + state: Scrapegraphai::Internal::Type::Converter::DumpState + ) + .returns(T.any(T::Boolean, T.anything)) + end + def dump(value, state:) + end + + # @api private + sig { returns(T.anything) } + def to_sorbet_type + end + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/converter.rbi b/rbi/scrapegraphai/internal/type/converter.rbi new file mode 100644 index 0000000..af9babb --- /dev/null +++ b/rbi/scrapegraphai/internal/type/converter.rbi @@ -0,0 +1,225 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + module Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + Input = + T.type_alias do + T.any( + Scrapegraphai::Internal::Type::Converter, + T::Class[T.anything] + ) + end + + CoerceState = + T.type_alias do + { + translate_names: T::Boolean, + strictness: T::Boolean, + exactness: { + yes: Integer, + no: Integer, + maybe: Integer + }, + error: T::Class[StandardError], + branched: Integer + } + end + + DumpState = T.type_alias { { can_retry: T::Boolean } } + + # @api private + sig do + overridable + .params( + value: T.anything, + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ) + .returns(T.anything) + end + def coerce(value, state:) + end + + # @api private + sig do + overridable + .params( + value: T.anything, + state: Scrapegraphai::Internal::Type::Converter::DumpState + ) + .returns(T.anything) + end + def dump(value, state:) + end + + # @api private + sig { params(depth: Integer).returns(String) } + def inspect(depth: 0) + end + + class << self + # @api private + sig do + params( + spec: + T.any( + { + const: + T.nilable( + T.any(NilClass, T::Boolean, Integer, Float, Symbol) + ), + enum: + T.nilable( + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ) + ), + union: + T.nilable( + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ) + ) + }, + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ), + Scrapegraphai::Internal::Type::Converter::Input + ) + ).returns(T.proc.returns(T.anything)) + end + def self.type_info(spec) + end + + # @api private + sig do + params( + type_info: + T.any( + { + const: + T.nilable( + T.any(NilClass, T::Boolean, Integer, Float, Symbol) + ), + enum: + T.nilable( + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ) + ), + union: + T.nilable( + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ) + ) + }, + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ), + Scrapegraphai::Internal::Type::Converter::Input + ), + spec: + T.any( + { + const: + T.nilable( + T.any(NilClass, T::Boolean, Integer, Float, Symbol) + ), + enum: + T.nilable( + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ) + ), + union: + T.nilable( + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ) + ) + }, + T.proc.returns( + Scrapegraphai::Internal::Type::Converter::Input + ), + Scrapegraphai::Internal::Type::Converter::Input + ) + ).returns(Scrapegraphai::Internal::AnyHash) + end + def self.meta_info(type_info, spec) + end + + # @api private + sig do + params(translate_names: T::Boolean).returns( + Scrapegraphai::Internal::Type::Converter::CoerceState + ) + end + def self.new_coerce_state(translate_names: true) + end + + # @api private + # + # Based on `target`, transform `value` into `target`, to the extent possible: + # + # 1. if the given `value` conforms to `target` already, return the given `value` + # 2. if it's possible and safe to convert the given `value` to `target`, then the + # converted value + # 3. otherwise, the given `value` unaltered + # + # The coercion process is subject to improvement between minor release versions. + # See https://docs.pydantic.dev/latest/concepts/unions/#smart-mode + sig do + params( + target: Scrapegraphai::Internal::Type::Converter::Input, + value: T.anything, + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ).returns(T.anything) + end + def self.coerce( + target, + value, + # The `strictness` is one of `true`, `false`. This informs the coercion strategy + # when we have to decide between multiple possible conversion targets: + # + # - `true`: the conversion must be exact, with minimum coercion. + # - `false`: the conversion can be approximate, with some coercion. + # + # The `exactness` is `Hash` with keys being one of `yes`, `no`, or `maybe`. For + # any given conversion attempt, the exactness will be updated based on how closely + # the value recursively matches the target type: + # + # - `yes`: the value can be converted to the target type with minimum coercion. + # - `maybe`: the value can be converted to the target type with some reasonable + # coercion. + # - `no`: the value cannot be converted to the target type. + # + # See implementation below for more details. + state: Scrapegraphai::Internal::Type::Converter.new_coerce_state + ) + end + + # @api private + sig do + params( + target: Scrapegraphai::Internal::Type::Converter::Input, + value: T.anything, + state: Scrapegraphai::Internal::Type::Converter::DumpState + ).returns(T.anything) + end + def self.dump(target, value, state: { can_retry: true }) + end + + # @api private + sig { params(target: T.anything, depth: Integer).returns(String) } + def self.inspect(target, depth:) + end + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/enum.rbi b/rbi/scrapegraphai/internal/type/enum.rbi new file mode 100644 index 0000000..fc2a725 --- /dev/null +++ b/rbi/scrapegraphai/internal/type/enum.rbi @@ -0,0 +1,82 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + # + # A value from among a specified list of options. OpenAPI enum values map to Ruby + # values in the SDK as follows: + # + # 1. boolean => true | false + # 2. integer => Integer + # 3. float => Float + # 4. string => Symbol + # + # We can therefore convert string values to Symbols, but can't convert other + # values safely. + module Enum + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # All of the valid Symbol values for this enum. + sig do + overridable.returns( + T::Array[T.any(NilClass, T::Boolean, Integer, Float, Symbol)] + ) + end + def values + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ===(other) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ==(other) + end + + sig { returns(Integer) } + def hash + end + + # @api private + # + # Unlike with primitives, `Enum` additionally validates that the value is a member + # of the enum. + sig do + override + .params( + value: T.any(String, Symbol, T.anything), + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ) + .returns(T.any(Symbol, T.anything)) + end + def coerce(value, state:) + end + + # @api private + sig do + override + .params( + value: T.any(Symbol, T.anything), + state: Scrapegraphai::Internal::Type::Converter::DumpState + ) + .returns(T.any(Symbol, T.anything)) + end + def dump(value, state:) + end + + # @api private + sig { returns(T.anything) } + def to_sorbet_type + end + + # @api private + sig { params(depth: Integer).returns(String) } + def inspect(depth: 0) + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/file_input.rbi b/rbi/scrapegraphai/internal/type/file_input.rbi new file mode 100644 index 0000000..1d9e688 --- /dev/null +++ b/rbi/scrapegraphai/internal/type/file_input.rbi @@ -0,0 +1,59 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + # + # Either `Pathname` or `StringIO`, or `IO`, or + # `Scrapegraphai::Internal::Type::FileInput`. + # + # Note: when `IO` is used, all retries are disabled, since many IO` streams are + # not rewindable. + class FileInput + extend Scrapegraphai::Internal::Type::Converter + + abstract! + + sig { params(other: T.anything).returns(T::Boolean) } + def self.===(other) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def self.==(other) + end + + class << self + # @api private + sig do + override + .params( + value: T.any(StringIO, String, T.anything), + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ) + .returns(T.any(StringIO, T.anything)) + end + def coerce(value, state:) + end + + # @api private + sig do + override + .params( + value: T.any(Pathname, StringIO, IO, String, T.anything), + state: Scrapegraphai::Internal::Type::Converter::DumpState + ) + .returns(T.any(Pathname, StringIO, IO, String, T.anything)) + end + def dump(value, state:) + end + + # @api private + sig { returns(T.anything) } + def to_sorbet_type + end + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/hash_of.rbi b/rbi/scrapegraphai/internal/type/hash_of.rbi new file mode 100644 index 0000000..f9ea8ec --- /dev/null +++ b/rbi/scrapegraphai/internal/type/hash_of.rbi @@ -0,0 +1,104 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + # + # Hash of items of a given type. + class HashOf + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + abstract! + + Elem = type_member(:out) + + sig do + params( + type_info: + T.any( + Scrapegraphai::Internal::AnyHash, + T.proc.returns(Scrapegraphai::Internal::Type::Converter::Input), + Scrapegraphai::Internal::Type::Converter::Input + ), + spec: Scrapegraphai::Internal::AnyHash + ).returns(T.attached_class) + end + def self.[](type_info, spec = {}) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ===(other) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ==(other) + end + + sig { returns(Integer) } + def hash + end + + # @api private + sig do + override + .params( + value: T.any(T::Hash[T.anything, T.anything], T.anything), + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ) + .returns(T.any(Scrapegraphai::Internal::AnyHash, T.anything)) + end + def coerce(value, state:) + end + + # @api private + sig do + override + .params( + value: T.any(T::Hash[T.anything, T.anything], T.anything), + state: Scrapegraphai::Internal::Type::Converter::DumpState + ) + .returns(T.any(Scrapegraphai::Internal::AnyHash, T.anything)) + end + def dump(value, state:) + end + + # @api private + sig { returns(T.anything) } + def to_sorbet_type + end + + # @api private + sig { returns(Elem) } + protected def item_type + end + + # @api private + sig { returns(T::Boolean) } + protected def nilable? + end + + # @api private + sig do + params( + type_info: + T.any( + Scrapegraphai::Internal::AnyHash, + T.proc.returns(Scrapegraphai::Internal::Type::Converter::Input), + Scrapegraphai::Internal::Type::Converter::Input + ), + spec: Scrapegraphai::Internal::AnyHash + ).void + end + def initialize(type_info, spec = {}) + end + + # @api private + sig { params(depth: Integer).returns(String) } + def inspect(depth: 0) + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/request_parameters.rbi b/rbi/scrapegraphai/internal/type/request_parameters.rbi new file mode 100644 index 0000000..41d678f --- /dev/null +++ b/rbi/scrapegraphai/internal/type/request_parameters.rbi @@ -0,0 +1,31 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + module RequestParameters + # Options to specify HTTP behaviour for this request. + sig { returns(Scrapegraphai::RequestOptions) } + attr_reader :request_options + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).void + end + attr_writer :request_options + + # @api private + module Converter + # @api private + sig do + params(params: T.anything).returns( + [T.anything, Scrapegraphai::Internal::AnyHash] + ) + end + def dump_request(params) + end + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/union.rbi b/rbi/scrapegraphai/internal/type/union.rbi new file mode 100644 index 0000000..e2b28f2 --- /dev/null +++ b/rbi/scrapegraphai/internal/type/union.rbi @@ -0,0 +1,128 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + module Union + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # @api private + # + # All of the specified variant info for this union. + sig do + returns( + T::Array[ + [ + T.nilable(Symbol), + T.proc.returns(Scrapegraphai::Internal::Type::Converter::Input), + Scrapegraphai::Internal::AnyHash + ] + ] + ) + end + private def known_variants + end + + # @api private + sig do + returns( + T::Array[ + [T.nilable(Symbol), T.anything, Scrapegraphai::Internal::AnyHash] + ] + ) + end + protected def derefed_variants + end + + # All of the specified variants for this union. + sig { overridable.returns(T::Array[T.anything]) } + def variants + end + + # @api private + sig { params(property: Symbol).void } + private def discriminator(property) + end + + # @api private + sig do + params( + key: + T.any( + Symbol, + Scrapegraphai::Internal::AnyHash, + T.proc.returns(T.anything), + T.anything + ), + spec: + T.any( + Scrapegraphai::Internal::AnyHash, + T.proc.returns(T.anything), + T.anything + ) + ).void + end + private def variant(key, spec = nil) + end + + # @api private + sig { params(value: T.anything).returns(T.nilable(T.anything)) } + private def resolve_variant(value) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ===(other) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def ==(other) + end + + sig { returns(Integer) } + def hash + end + + # @api private + # + # Tries to efficiently coerce the given value to one of the known variants. + # + # If the value cannot match any of the known variants, the coercion is considered + # non-viable and returns the original value. + sig do + override + .params( + value: T.anything, + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ) + .returns(T.anything) + end + def coerce(value, state:) + end + + # @api private + sig do + override + .params( + value: T.anything, + state: Scrapegraphai::Internal::Type::Converter::DumpState + ) + .returns(T.anything) + end + def dump(value, state:) + end + + # @api private + sig { returns(T.anything) } + def to_sorbet_type + end + + # @api private + sig { params(depth: Integer).returns(String) } + def inspect(depth: 0) + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/type/unknown.rbi b/rbi/scrapegraphai/internal/type/unknown.rbi new file mode 100644 index 0000000..b0e0f7d --- /dev/null +++ b/rbi/scrapegraphai/internal/type/unknown.rbi @@ -0,0 +1,58 @@ +# typed: strong + +module Scrapegraphai + module Internal + module Type + # @api private + # + # When we don't know what to expect for the value. + class Unknown + extend Scrapegraphai::Internal::Type::Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + abstract! + + sig { params(other: T.anything).returns(T::Boolean) } + def self.===(other) + end + + sig { params(other: T.anything).returns(T::Boolean) } + def self.==(other) + end + + class << self + # @api private + # + # No coercion needed for Unknown type. + sig do + override + .params( + value: T.anything, + state: Scrapegraphai::Internal::Type::Converter::CoerceState + ) + .returns(T.anything) + end + def coerce(value, state:) + end + + # @api private + sig do + override + .params( + value: T.anything, + state: Scrapegraphai::Internal::Type::Converter::DumpState + ) + .returns(T.anything) + end + def dump(value, state:) + end + + # @api private + sig { returns(T.anything) } + def to_sorbet_type + end + end + end + end + end +end diff --git a/rbi/scrapegraphai/internal/util.rbi b/rbi/scrapegraphai/internal/util.rbi new file mode 100644 index 0000000..a023c10 --- /dev/null +++ b/rbi/scrapegraphai/internal/util.rbi @@ -0,0 +1,487 @@ +# typed: strong + +module Scrapegraphai + module Internal + # @api private + module Util + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + # @api private + sig { returns(Float) } + def self.monotonic_secs + end + + # @api private + sig do + params(ns: T.any(Module, T::Class[T.anything])).returns( + T::Enumerable[T.any(Module, T::Class[T.anything])] + ) + end + def self.walk_namespaces(ns) + end + + class << self + # @api private + sig { returns(String) } + def arch + end + + # @api private + sig { returns(String) } + def os + end + end + + class << self + # @api private + sig { params(input: T.anything).returns(T::Boolean) } + def primitive?(input) + end + + # @api private + sig do + params(input: T.any(String, T::Boolean)).returns( + T.any(T::Boolean, T.anything) + ) + end + def coerce_boolean(input) + end + + # @api private + sig do + params(input: T.any(String, T::Boolean)).returns( + T.nilable(T::Boolean) + ) + end + def coerce_boolean!(input) + end + + # @api private + sig do + params(input: T.any(String, Integer)).returns( + T.any(Integer, T.anything) + ) + end + def coerce_integer(input) + end + + # @api private + sig do + params(input: T.any(String, Integer, Float)).returns( + T.any(Float, T.anything) + ) + end + def coerce_float(input) + end + + # @api private + sig do + params(input: T.anything).returns( + T.any(T::Hash[T.anything, T.anything], T.anything) + ) + end + def coerce_hash(input) + end + + # @api private + sig do + params(input: T.anything).returns( + T.nilable(T::Hash[T.anything, T.anything]) + ) + end + def coerce_hash!(input) + end + end + + class << self + # @api private + sig do + params(lhs: T.anything, rhs: T.anything, concat: T::Boolean).returns( + T.anything + ) + end + private def deep_merge_lr(lhs, rhs, concat: false) + end + + # @api private + # + # Recursively merge one hash with another. If the values at a given key are not + # both hashes, just take the new value. + sig do + params( + values: T::Array[T.anything], + sentinel: T.nilable(T.anything), + concat: T::Boolean + ).returns(T.anything) + end + def deep_merge( + *values, + # the value to return if no values are provided. + sentinel: nil, + # whether to merge sequences by concatenation. + concat: false + ) + end + + # @api private + sig do + params( + data: + T.any( + Scrapegraphai::Internal::AnyHash, + T::Array[T.anything], + T.anything + ), + pick: + T.nilable( + T.any( + Symbol, + Integer, + T::Array[T.any(Symbol, Integer)], + T.proc.params(arg0: T.anything).returns(T.anything) + ) + ), + blk: T.nilable(T.proc.returns(T.anything)) + ).returns(T.nilable(T.anything)) + end + def dig(data, pick, &blk) + end + end + + class << self + # @api private + sig { params(uri: URI::Generic).returns(String) } + def uri_origin(uri) + end + + # @api private + sig { params(path: T.any(String, T::Array[String])).returns(String) } + def interpolate_path(path) + end + end + + class << self + # @api private + sig do + params(query: T.nilable(String)).returns( + T::Hash[String, T::Array[String]] + ) + end + def decode_query(query) + end + + # @api private + sig do + params( + query: + T.nilable( + T::Hash[String, T.nilable(T.any(T::Array[String], String))] + ) + ).returns(T.nilable(String)) + end + def encode_query(query) + end + end + + ParsedUri = + T.type_alias do + { + scheme: T.nilable(String), + host: T.nilable(String), + port: T.nilable(Integer), + path: T.nilable(String), + query: T::Hash[String, T::Array[String]] + } + end + + class << self + # @api private + sig do + params(url: T.any(URI::Generic, String)).returns( + Scrapegraphai::Internal::Util::ParsedUri + ) + end + def parse_uri(url) + end + + # @api private + sig do + params(parsed: Scrapegraphai::Internal::Util::ParsedUri).returns( + URI::Generic + ) + end + def unparse_uri(parsed) + end + + # @api private + sig do + params( + lhs: Scrapegraphai::Internal::Util::ParsedUri, + rhs: Scrapegraphai::Internal::Util::ParsedUri + ).returns(URI::Generic) + end + def join_parsed_uri(lhs, rhs) + end + end + + class << self + # @api private + sig do + params( + headers: + T::Hash[ + String, + T.nilable( + T.any( + String, + Integer, + T::Array[T.nilable(T.any(String, Integer))] + ) + ) + ] + ).returns(T::Hash[String, String]) + end + def normalized_headers(*headers) + end + end + + # @api private + # + # An adapter that satisfies the IO interface required by `::IO.copy_stream` + class ReadIOAdapter + # @api private + sig { returns(T.nilable(T::Boolean)) } + def close? + end + + # @api private + sig { void } + def close + end + + # @api private + sig { params(max_len: T.nilable(Integer)).returns(String) } + private def read_enum(max_len) + end + + # @api private + sig do + params( + max_len: T.nilable(Integer), + out_string: T.nilable(String) + ).returns(T.nilable(String)) + end + def read(max_len = nil, out_string = nil) + end + + # @api private + sig do + params( + src: T.any(String, Pathname, StringIO, T::Enumerable[String]), + blk: T.proc.params(arg0: String).void + ).returns(T.attached_class) + end + def self.new(src, &blk) + end + end + + class << self + sig do + params(blk: T.proc.params(y: Enumerator::Yielder).void).returns( + T::Enumerable[String] + ) + end + def writable_enum(&blk) + end + end + + JSON_CONTENT = + T.let(%r{^application/(?:vnd(?:\.[^.]+)*\+)?json(?!l)}, Regexp) + JSONL_CONTENT = + T.let(%r{^application/(:?x-(?:n|l)djson)|(:?(?:x-)?jsonl)}, Regexp) + + class << self + # @api private + sig do + params( + y: Enumerator::Yielder, + val: T.anything, + closing: T::Array[T.proc.void], + content_type: T.nilable(String) + ).void + end + private def write_multipart_content( + y, + val:, + closing:, + content_type: nil + ) + end + + # @api private + sig do + params( + y: Enumerator::Yielder, + boundary: String, + key: T.any(Symbol, String), + val: T.anything, + closing: T::Array[T.proc.void] + ).void + end + private def write_multipart_chunk(y, boundary:, key:, val:, closing:) + end + + # @api private + # + # https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.1.1.md#special-considerations-for-multipart-content + sig do + params(body: T.anything).returns([String, T::Enumerable[String]]) + end + private def encode_multipart_streaming(body) + end + + # @api private + sig do + params(headers: T::Hash[String, String], body: T.anything).returns( + T.anything + ) + end + def encode_content(headers, body) + end + + # @api private + # + # https://www.iana.org/assignments/character-sets/character-sets.xhtml + sig { params(content_type: String, text: String).void } + def force_charset!(content_type, text:) + end + + # @api private + # + # Assumes each chunk in stream has `Encoding::BINARY`. + sig do + params( + headers: T::Hash[String, String], + stream: T::Enumerable[String], + suppress_error: T::Boolean + ).returns(T.anything) + end + def decode_content(headers, stream:, suppress_error: false) + end + end + + class << self + # @api private + # + # https://doc.rust-lang.org/std/iter/trait.FusedIterator.html + sig do + params( + enum: T::Enumerable[T.anything], + external: T::Boolean, + close: T.proc.void + ).returns(T::Enumerable[T.anything]) + end + def fused_enum(enum, external: false, &close) + end + + # @api private + sig { params(enum: T.nilable(T::Enumerable[T.anything])).void } + def close_fused!(enum) + end + + # @api private + sig do + params( + enum: T.nilable(T::Enumerable[T.anything]), + blk: T.proc.params(arg0: Enumerator::Yielder).void + ).returns(T::Enumerable[T.anything]) + end + def chain_fused(enum, &blk) + end + end + + ServerSentEvent = + T.type_alias do + { + event: T.nilable(String), + data: T.nilable(String), + id: T.nilable(String), + retry: T.nilable(Integer) + } + end + + class << self + # @api private + # + # Assumes Strings have been forced into having `Encoding::BINARY`. + # + # This decoder is responsible for reassembling lines split across multiple + # fragments. + sig do + params(enum: T::Enumerable[String]).returns(T::Enumerable[String]) + end + def decode_lines(enum) + end + + # @api private + # + # https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream + # + # Assumes that `lines` has been decoded with `#decode_lines`. + sig do + params(lines: T::Enumerable[String]).returns( + T::Enumerable[Scrapegraphai::Internal::Util::ServerSentEvent] + ) + end + def decode_sse(lines) + end + end + + # @api private + module SorbetRuntimeSupport + class MissingSorbetRuntimeError < ::RuntimeError + end + + # @api private + sig { returns(T::Hash[Symbol, T.anything]) } + private def sorbet_runtime_constants + end + + # @api private + sig { params(name: Symbol).void } + def const_missing(name) + end + + # @api private + sig { params(name: Symbol).returns(T::Boolean) } + def sorbet_constant_defined?(name) + end + + # @api private + sig { params(name: Symbol, blk: T.proc.returns(T.anything)).void } + def define_sorbet_constant!(name, &blk) + end + + # @api private + sig { returns(T.anything) } + def to_sorbet_type + end + + class << self + # @api private + sig do + params( + type: + T.any( + Scrapegraphai::Internal::Util::SorbetRuntimeSupport, + T.anything + ) + ).returns(T.anything) + end + def to_sorbet_type(type) + end + end + end + end + end +end diff --git a/rbi/scrapegraphai/models.rbi b/rbi/scrapegraphai/models.rbi new file mode 100644 index 0000000..13aa3b9 --- /dev/null +++ b/rbi/scrapegraphai/models.rbi @@ -0,0 +1,44 @@ +# typed: strong + +module Scrapegraphai + CompletedMarkdownify = Scrapegraphai::Models::CompletedMarkdownify + + CompletedSearchScraper = Scrapegraphai::Models::CompletedSearchScraper + + CompletedSmartscraper = Scrapegraphai::Models::CompletedSmartscraper + + CrawlRetrieveResultsParams = Scrapegraphai::Models::CrawlRetrieveResultsParams + + CrawlStartParams = Scrapegraphai::Models::CrawlStartParams + + CreditRetrieveParams = Scrapegraphai::Models::CreditRetrieveParams + + FailedSmartscraper = Scrapegraphai::Models::FailedSmartscraper + + FeedbackSubmitParams = Scrapegraphai::Models::FeedbackSubmitParams + + GenerateSchemaCreateParams = Scrapegraphai::Models::GenerateSchemaCreateParams + + GenerateSchemaRetrieveParams = + Scrapegraphai::Models::GenerateSchemaRetrieveParams + + HealthzCheckParams = Scrapegraphai::Models::HealthzCheckParams + + MarkdownifyConvertParams = Scrapegraphai::Models::MarkdownifyConvertParams + + MarkdownifyRetrieveStatusParams = + Scrapegraphai::Models::MarkdownifyRetrieveStatusParams + + SearchscraperCreateParams = Scrapegraphai::Models::SearchscraperCreateParams + + SearchscraperRetrieveStatusParams = + Scrapegraphai::Models::SearchscraperRetrieveStatusParams + + SmartscraperCreateParams = Scrapegraphai::Models::SmartscraperCreateParams + + SmartscraperListParams = Scrapegraphai::Models::SmartscraperListParams + + SmartscraperRetrieveParams = Scrapegraphai::Models::SmartscraperRetrieveParams + + ValidateAPIKeyParams = Scrapegraphai::Models::ValidateAPIKeyParams +end diff --git a/rbi/scrapegraphai/models/completed_markdownify.rbi b/rbi/scrapegraphai/models/completed_markdownify.rbi new file mode 100644 index 0000000..1ca3cfd --- /dev/null +++ b/rbi/scrapegraphai/models/completed_markdownify.rbi @@ -0,0 +1,118 @@ +# typed: strong + +module Scrapegraphai + module Models + class CompletedMarkdownify < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::CompletedMarkdownify, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(String)) } + attr_reader :error + + sig { params(error: String).void } + attr_writer :error + + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + # Markdown content + sig { returns(T.nilable(String)) } + attr_accessor :result + + sig do + returns( + T.nilable(Scrapegraphai::CompletedMarkdownify::Status::TaggedSymbol) + ) + end + attr_reader :status + + sig do + params( + status: Scrapegraphai::CompletedMarkdownify::Status::OrSymbol + ).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :website_url + + sig { params(website_url: String).void } + attr_writer :website_url + + sig do + params( + error: String, + request_id: String, + result: T.nilable(String), + status: Scrapegraphai::CompletedMarkdownify::Status::OrSymbol, + website_url: String + ).returns(T.attached_class) + end + def self.new( + error: nil, + request_id: nil, + # Markdown content + result: nil, + status: nil, + website_url: nil + ) + end + + sig do + override.returns( + { + error: String, + request_id: String, + result: T.nilable(String), + status: Scrapegraphai::CompletedMarkdownify::Status::TaggedSymbol, + website_url: String + } + ) + end + def to_hash + end + + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all(Symbol, Scrapegraphai::CompletedMarkdownify::Status) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + QUEUED = + T.let( + :queued, + Scrapegraphai::CompletedMarkdownify::Status::TaggedSymbol + ) + PROCESSING = + T.let( + :processing, + Scrapegraphai::CompletedMarkdownify::Status::TaggedSymbol + ) + COMPLETED = + T.let( + :completed, + Scrapegraphai::CompletedMarkdownify::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[Scrapegraphai::CompletedMarkdownify::Status::TaggedSymbol] + ) + end + def self.values + end + end + end + end +end diff --git a/rbi/scrapegraphai/models/completed_search_scraper.rbi b/rbi/scrapegraphai/models/completed_search_scraper.rbi new file mode 100644 index 0000000..16258a3 --- /dev/null +++ b/rbi/scrapegraphai/models/completed_search_scraper.rbi @@ -0,0 +1,140 @@ +# typed: strong + +module Scrapegraphai + module Models + class CompletedSearchScraper < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::CompletedSearchScraper, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(String)) } + attr_accessor :error + + sig { returns(T.nilable(Integer)) } + attr_reader :num_results + + sig { params(num_results: Integer).void } + attr_writer :num_results + + # URLs of sources used + sig { returns(T.nilable(T::Array[String])) } + attr_reader :reference_urls + + sig { params(reference_urls: T::Array[String]).void } + attr_writer :reference_urls + + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + # Merged results from all scraped websites + sig { returns(T.nilable(T.anything)) } + attr_reader :result + + sig { params(result: T.anything).void } + attr_writer :result + + sig do + returns( + T.nilable(Scrapegraphai::CompletedSearchScraper::Status::TaggedSymbol) + ) + end + attr_reader :status + + sig do + params( + status: Scrapegraphai::CompletedSearchScraper::Status::OrSymbol + ).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :user_prompt + + sig { params(user_prompt: String).void } + attr_writer :user_prompt + + sig do + params( + error: T.nilable(String), + num_results: Integer, + reference_urls: T::Array[String], + request_id: String, + result: T.anything, + status: Scrapegraphai::CompletedSearchScraper::Status::OrSymbol, + user_prompt: String + ).returns(T.attached_class) + end + def self.new( + error: nil, + num_results: nil, + # URLs of sources used + reference_urls: nil, + request_id: nil, + # Merged results from all scraped websites + result: nil, + status: nil, + user_prompt: nil + ) + end + + sig do + override.returns( + { + error: T.nilable(String), + num_results: Integer, + reference_urls: T::Array[String], + request_id: String, + result: T.anything, + status: Scrapegraphai::CompletedSearchScraper::Status::TaggedSymbol, + user_prompt: String + } + ) + end + def to_hash + end + + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all(Symbol, Scrapegraphai::CompletedSearchScraper::Status) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + QUEUED = + T.let( + :queued, + Scrapegraphai::CompletedSearchScraper::Status::TaggedSymbol + ) + PROCESSING = + T.let( + :processing, + Scrapegraphai::CompletedSearchScraper::Status::TaggedSymbol + ) + COMPLETED = + T.let( + :completed, + Scrapegraphai::CompletedSearchScraper::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + Scrapegraphai::CompletedSearchScraper::Status::TaggedSymbol + ] + ) + end + def self.values + end + end + end + end +end diff --git a/rbi/scrapegraphai/models/completed_smartscraper.rbi b/rbi/scrapegraphai/models/completed_smartscraper.rbi new file mode 100644 index 0000000..ce0bb44 --- /dev/null +++ b/rbi/scrapegraphai/models/completed_smartscraper.rbi @@ -0,0 +1,131 @@ +# typed: strong + +module Scrapegraphai + module Models + class CompletedSmartscraper < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::CompletedSmartscraper, + Scrapegraphai::Internal::AnyHash + ) + end + + # Error message (empty on success) + sig { returns(T.nilable(String)) } + attr_reader :error + + sig { params(error: String).void } + attr_writer :error + + # Unique request identifier + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + # Extracted data based on prompt/schema + sig { returns(T.nilable(T.anything)) } + attr_accessor :result + + # Processing status + sig do + returns( + T.nilable(Scrapegraphai::CompletedSmartscraper::Status::TaggedSymbol) + ) + end + attr_reader :status + + sig do + params( + status: Scrapegraphai::CompletedSmartscraper::Status::OrSymbol + ).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :user_prompt + + sig { params(user_prompt: String).void } + attr_writer :user_prompt + + sig { returns(T.nilable(String)) } + attr_accessor :website_url + + sig do + params( + error: String, + request_id: String, + result: T.nilable(T.anything), + status: Scrapegraphai::CompletedSmartscraper::Status::OrSymbol, + user_prompt: String, + website_url: T.nilable(String) + ).returns(T.attached_class) + end + def self.new( + # Error message (empty on success) + error: nil, + # Unique request identifier + request_id: nil, + # Extracted data based on prompt/schema + result: nil, + # Processing status + status: nil, + user_prompt: nil, + website_url: nil + ) + end + + sig do + override.returns( + { + error: String, + request_id: String, + result: T.nilable(T.anything), + status: Scrapegraphai::CompletedSmartscraper::Status::TaggedSymbol, + user_prompt: String, + website_url: T.nilable(String) + } + ) + end + def to_hash + end + + # Processing status + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all(Symbol, Scrapegraphai::CompletedSmartscraper::Status) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + QUEUED = + T.let( + :queued, + Scrapegraphai::CompletedSmartscraper::Status::TaggedSymbol + ) + PROCESSING = + T.let( + :processing, + Scrapegraphai::CompletedSmartscraper::Status::TaggedSymbol + ) + COMPLETED = + T.let( + :completed, + Scrapegraphai::CompletedSmartscraper::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[Scrapegraphai::CompletedSmartscraper::Status::TaggedSymbol] + ) + end + def self.values + end + end + end + end +end diff --git a/rbi/scrapegraphai/models/crawl_retrieve_results_params.rbi b/rbi/scrapegraphai/models/crawl_retrieve_results_params.rbi new file mode 100644 index 0000000..07a5980 --- /dev/null +++ b/rbi/scrapegraphai/models/crawl_retrieve_results_params.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Models + class CrawlRetrieveResultsParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::CrawlRetrieveResultsParams, + Scrapegraphai::Internal::AnyHash + ) + end + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + T.attached_class + ) + end + def self.new(request_options: {}) + end + + sig do + override.returns({ request_options: Scrapegraphai::RequestOptions }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/crawl_retrieve_results_response.rbi b/rbi/scrapegraphai/models/crawl_retrieve_results_response.rbi new file mode 100644 index 0000000..0254953 --- /dev/null +++ b/rbi/scrapegraphai/models/crawl_retrieve_results_response.rbi @@ -0,0 +1,166 @@ +# typed: strong + +module Scrapegraphai + module Models + class CrawlRetrieveResultsResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::CrawlRetrieveResultsResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + # Successful crawl results + sig do + returns( + T.nilable( + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Result::Variants + ) + ) + end + attr_reader :result + + sig do + params( + result: + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Result::Variants + ).void + end + attr_writer :result + + sig do + returns( + T.nilable( + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::TaggedSymbol + ) + ) + end + attr_reader :status + + sig do + params( + status: + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::OrSymbol + ).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :task_id + + sig { params(task_id: String).void } + attr_writer :task_id + + # Error traceback for failed tasks + sig { returns(T.nilable(String)) } + attr_accessor :traceback + + sig do + params( + result: + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Result::Variants, + status: + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::OrSymbol, + task_id: String, + traceback: T.nilable(String) + ).returns(T.attached_class) + end + def self.new( + # Successful crawl results + result: nil, + status: nil, + task_id: nil, + # Error traceback for failed tasks + traceback: nil + ) + end + + sig do + override.returns( + { + result: + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Result::Variants, + status: + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::TaggedSymbol, + task_id: String, + traceback: T.nilable(String) + } + ) + end + def to_hash + end + + # Successful crawl results + module Result + extend Scrapegraphai::Internal::Type::Union + + Variants = T.type_alias { T.any(T.anything, String) } + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Result::Variants + ] + ) + end + def self.variants + end + end + + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + PENDING = + T.let( + :PENDING, + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::TaggedSymbol + ) + STARTED = + T.let( + :STARTED, + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::TaggedSymbol + ) + SUCCESS = + T.let( + :SUCCESS, + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::TaggedSymbol + ) + FAILURE = + T.let( + :FAILURE, + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::TaggedSymbol + ) + RETRY = + T.let( + :RETRY, + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::TaggedSymbol + ) + REVOKED = + T.let( + :REVOKED, + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status::TaggedSymbol + ] + ) + end + def self.values + end + end + end + end +end diff --git a/rbi/scrapegraphai/models/crawl_start_params.rbi b/rbi/scrapegraphai/models/crawl_start_params.rbi new file mode 100644 index 0000000..2beb927 --- /dev/null +++ b/rbi/scrapegraphai/models/crawl_start_params.rbi @@ -0,0 +1,171 @@ +# typed: strong + +module Scrapegraphai + module Models + class CrawlStartParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::CrawlStartParams, + Scrapegraphai::Internal::AnyHash + ) + end + + # Starting URL for crawling + sig { returns(String) } + attr_accessor :url + + # Maximum crawl depth from starting URL + sig { returns(T.nilable(Integer)) } + attr_reader :depth + + sig { params(depth: Integer).void } + attr_writer :depth + + # Use AI extraction (true) or markdown conversion (false) + sig { returns(T.nilable(T::Boolean)) } + attr_reader :extraction_mode + + sig { params(extraction_mode: T::Boolean).void } + attr_writer :extraction_mode + + # Maximum number of pages to crawl + sig { returns(T.nilable(Integer)) } + attr_reader :max_pages + + sig { params(max_pages: Integer).void } + attr_writer :max_pages + + # Extraction prompt (required if extraction_mode is true) + sig { returns(T.nilable(String)) } + attr_accessor :prompt + + # Enable heavy JavaScript rendering + sig { returns(T.nilable(T::Boolean)) } + attr_reader :render_heavy_js + + sig { params(render_heavy_js: T::Boolean).void } + attr_writer :render_heavy_js + + sig { returns(T.nilable(Scrapegraphai::CrawlStartParams::Rules)) } + attr_reader :rules + + sig { params(rules: Scrapegraphai::CrawlStartParams::Rules::OrHash).void } + attr_writer :rules + + # Output schema for extraction + sig { returns(T.nilable(T.anything)) } + attr_accessor :schema + + # Use sitemap for crawling + sig { returns(T.nilable(T::Boolean)) } + attr_reader :sitemap + + sig { params(sitemap: T::Boolean).void } + attr_writer :sitemap + + sig do + params( + url: String, + depth: Integer, + extraction_mode: T::Boolean, + max_pages: Integer, + prompt: T.nilable(String), + render_heavy_js: T::Boolean, + rules: Scrapegraphai::CrawlStartParams::Rules::OrHash, + schema: T.nilable(T.anything), + sitemap: T::Boolean, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(T.attached_class) + end + def self.new( + # Starting URL for crawling + url:, + # Maximum crawl depth from starting URL + depth: nil, + # Use AI extraction (true) or markdown conversion (false) + extraction_mode: nil, + # Maximum number of pages to crawl + max_pages: nil, + # Extraction prompt (required if extraction_mode is true) + prompt: nil, + # Enable heavy JavaScript rendering + render_heavy_js: nil, + rules: nil, + # Output schema for extraction + schema: nil, + # Use sitemap for crawling + sitemap: nil, + request_options: {} + ) + end + + sig do + override.returns( + { + url: String, + depth: Integer, + extraction_mode: T::Boolean, + max_pages: Integer, + prompt: T.nilable(String), + render_heavy_js: T::Boolean, + rules: Scrapegraphai::CrawlStartParams::Rules, + schema: T.nilable(T.anything), + sitemap: T::Boolean, + request_options: Scrapegraphai::RequestOptions + } + ) + end + def to_hash + end + + class Rules < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::CrawlStartParams::Rules, + Scrapegraphai::Internal::AnyHash + ) + end + + # URL patterns to exclude from crawling + sig { returns(T.nilable(T::Array[String])) } + attr_reader :exclude + + sig { params(exclude: T::Array[String]).void } + attr_writer :exclude + + # Restrict crawling to same domain + sig { returns(T.nilable(T::Boolean)) } + attr_reader :same_domain + + sig { params(same_domain: T::Boolean).void } + attr_writer :same_domain + + sig do + params(exclude: T::Array[String], same_domain: T::Boolean).returns( + T.attached_class + ) + end + def self.new( + # URL patterns to exclude from crawling + exclude: nil, + # Restrict crawling to same domain + same_domain: nil + ) + end + + sig do + override.returns( + { exclude: T::Array[String], same_domain: T::Boolean } + ) + end + def to_hash + end + end + end + end +end diff --git a/rbi/scrapegraphai/models/crawl_start_response.rbi b/rbi/scrapegraphai/models/crawl_start_response.rbi new file mode 100644 index 0000000..405c4e9 --- /dev/null +++ b/rbi/scrapegraphai/models/crawl_start_response.rbi @@ -0,0 +1,33 @@ +# typed: strong + +module Scrapegraphai + module Models + class CrawlStartResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::CrawlStartResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + # Celery task identifier + sig { returns(T.nilable(String)) } + attr_reader :task_id + + sig { params(task_id: String).void } + attr_writer :task_id + + sig { params(task_id: String).returns(T.attached_class) } + def self.new( + # Celery task identifier + task_id: nil + ) + end + + sig { override.returns({ task_id: String }) } + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/credit_retrieve_params.rbi b/rbi/scrapegraphai/models/credit_retrieve_params.rbi new file mode 100644 index 0000000..6a0ce34 --- /dev/null +++ b/rbi/scrapegraphai/models/credit_retrieve_params.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Models + class CreditRetrieveParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::CreditRetrieveParams, + Scrapegraphai::Internal::AnyHash + ) + end + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + T.attached_class + ) + end + def self.new(request_options: {}) + end + + sig do + override.returns({ request_options: Scrapegraphai::RequestOptions }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/credit_retrieve_response.rbi b/rbi/scrapegraphai/models/credit_retrieve_response.rbi new file mode 100644 index 0000000..f6a38f3 --- /dev/null +++ b/rbi/scrapegraphai/models/credit_retrieve_response.rbi @@ -0,0 +1,50 @@ +# typed: strong + +module Scrapegraphai + module Models + class CreditRetrieveResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::CreditRetrieveResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + # Number of credits remaining + sig { returns(T.nilable(Integer)) } + attr_reader :remaining_credits + + sig { params(remaining_credits: Integer).void } + attr_writer :remaining_credits + + # Total credits consumed + sig { returns(T.nilable(Integer)) } + attr_reader :total_credits_used + + sig { params(total_credits_used: Integer).void } + attr_writer :total_credits_used + + sig do + params(remaining_credits: Integer, total_credits_used: Integer).returns( + T.attached_class + ) + end + def self.new( + # Number of credits remaining + remaining_credits: nil, + # Total credits consumed + total_credits_used: nil + ) + end + + sig do + override.returns( + { remaining_credits: Integer, total_credits_used: Integer } + ) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/failed_smartscraper.rbi b/rbi/scrapegraphai/models/failed_smartscraper.rbi new file mode 100644 index 0000000..3b0ea50 --- /dev/null +++ b/rbi/scrapegraphai/models/failed_smartscraper.rbi @@ -0,0 +1,112 @@ +# typed: strong + +module Scrapegraphai + module Models + class FailedSmartscraper < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::FailedSmartscraper, + Scrapegraphai::Internal::AnyHash + ) + end + + # Error description + sig { returns(T.nilable(String)) } + attr_reader :error + + sig { params(error: String).void } + attr_writer :error + + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + sig { returns(T.nilable(T.anything)) } + attr_accessor :result + + sig do + returns( + T.nilable(Scrapegraphai::FailedSmartscraper::Status::TaggedSymbol) + ) + end + attr_reader :status + + sig do + params(status: Scrapegraphai::FailedSmartscraper::Status::OrSymbol).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :user_prompt + + sig { params(user_prompt: String).void } + attr_writer :user_prompt + + sig { returns(T.nilable(String)) } + attr_accessor :website_url + + sig do + params( + error: String, + request_id: String, + result: T.nilable(T.anything), + status: Scrapegraphai::FailedSmartscraper::Status::OrSymbol, + user_prompt: String, + website_url: T.nilable(String) + ).returns(T.attached_class) + end + def self.new( + # Error description + error: nil, + request_id: nil, + result: nil, + status: nil, + user_prompt: nil, + website_url: nil + ) + end + + sig do + override.returns( + { + error: String, + request_id: String, + result: T.nilable(T.anything), + status: Scrapegraphai::FailedSmartscraper::Status::TaggedSymbol, + user_prompt: String, + website_url: T.nilable(String) + } + ) + end + def to_hash + end + + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all(Symbol, Scrapegraphai::FailedSmartscraper::Status) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + FAILED = + T.let( + :failed, + Scrapegraphai::FailedSmartscraper::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[Scrapegraphai::FailedSmartscraper::Status::TaggedSymbol] + ) + end + def self.values + end + end + end + end +end diff --git a/rbi/scrapegraphai/models/feedback_submit_params.rbi b/rbi/scrapegraphai/models/feedback_submit_params.rbi new file mode 100644 index 0000000..ec74826 --- /dev/null +++ b/rbi/scrapegraphai/models/feedback_submit_params.rbi @@ -0,0 +1,62 @@ +# typed: strong + +module Scrapegraphai + module Models + class FeedbackSubmitParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::FeedbackSubmitParams, + Scrapegraphai::Internal::AnyHash + ) + end + + # Rating score + sig { returns(Integer) } + attr_accessor :rating + + # Request to provide feedback for + sig { returns(String) } + attr_accessor :request_id + + # Optional feedback comments + sig { returns(T.nilable(String)) } + attr_accessor :feedback_text + + sig do + params( + rating: Integer, + request_id: String, + feedback_text: T.nilable(String), + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(T.attached_class) + end + def self.new( + # Rating score + rating:, + # Request to provide feedback for + request_id:, + # Optional feedback comments + feedback_text: nil, + request_options: {} + ) + end + + sig do + override.returns( + { + rating: Integer, + request_id: String, + feedback_text: T.nilable(String), + request_options: Scrapegraphai::RequestOptions + } + ) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/feedback_submit_response.rbi b/rbi/scrapegraphai/models/feedback_submit_response.rbi new file mode 100644 index 0000000..0792327 --- /dev/null +++ b/rbi/scrapegraphai/models/feedback_submit_response.rbi @@ -0,0 +1,68 @@ +# typed: strong + +module Scrapegraphai + module Models + class FeedbackSubmitResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::FeedbackSubmitResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(String)) } + attr_reader :feedback_id + + sig { params(feedback_id: String).void } + attr_writer :feedback_id + + sig { returns(T.nilable(Time)) } + attr_reader :feedback_timestamp + + sig { params(feedback_timestamp: Time).void } + attr_writer :feedback_timestamp + + sig { returns(T.nilable(String)) } + attr_reader :message + + sig { params(message: String).void } + attr_writer :message + + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + sig do + params( + feedback_id: String, + feedback_timestamp: Time, + message: String, + request_id: String + ).returns(T.attached_class) + end + def self.new( + feedback_id: nil, + feedback_timestamp: nil, + message: nil, + request_id: nil + ) + end + + sig do + override.returns( + { + feedback_id: String, + feedback_timestamp: Time, + message: String, + request_id: String + } + ) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/generate_schema_create_params.rbi b/rbi/scrapegraphai/models/generate_schema_create_params.rbi new file mode 100644 index 0000000..b5f00c5 --- /dev/null +++ b/rbi/scrapegraphai/models/generate_schema_create_params.rbi @@ -0,0 +1,54 @@ +# typed: strong + +module Scrapegraphai + module Models + class GenerateSchemaCreateParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::GenerateSchemaCreateParams, + Scrapegraphai::Internal::AnyHash + ) + end + + # Natural language description of desired schema + sig { returns(String) } + attr_accessor :user_prompt + + # Existing schema to modify or extend + sig { returns(T.nilable(T.anything)) } + attr_accessor :existing_schema + + sig do + params( + user_prompt: String, + existing_schema: T.nilable(T.anything), + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(T.attached_class) + end + def self.new( + # Natural language description of desired schema + user_prompt:, + # Existing schema to modify or extend + existing_schema: nil, + request_options: {} + ) + end + + sig do + override.returns( + { + user_prompt: String, + existing_schema: T.nilable(T.anything), + request_options: Scrapegraphai::RequestOptions + } + ) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/generate_schema_create_response.rbi b/rbi/scrapegraphai/models/generate_schema_create_response.rbi new file mode 100644 index 0000000..41deacc --- /dev/null +++ b/rbi/scrapegraphai/models/generate_schema_create_response.rbi @@ -0,0 +1,129 @@ +# typed: strong + +module Scrapegraphai + module Models + class GenerateSchemaCreateResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::GenerateSchemaCreateResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(String)) } + attr_accessor :error + + # Generated JSON schema + sig { returns(T.nilable(T.anything)) } + attr_reader :generated_schema + + sig { params(generated_schema: T.anything).void } + attr_writer :generated_schema + + # Enhanced search prompt generated from user input + sig { returns(T.nilable(String)) } + attr_reader :refined_prompt + + sig { params(refined_prompt: String).void } + attr_writer :refined_prompt + + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + sig do + returns( + T.nilable( + Scrapegraphai::Models::GenerateSchemaCreateResponse::Status::TaggedSymbol + ) + ) + end + attr_reader :status + + sig do + params( + status: + Scrapegraphai::Models::GenerateSchemaCreateResponse::Status::OrSymbol + ).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :user_prompt + + sig { params(user_prompt: String).void } + attr_writer :user_prompt + + sig do + params( + error: T.nilable(String), + generated_schema: T.anything, + refined_prompt: String, + request_id: String, + status: + Scrapegraphai::Models::GenerateSchemaCreateResponse::Status::OrSymbol, + user_prompt: String + ).returns(T.attached_class) + end + def self.new( + error: nil, + # Generated JSON schema + generated_schema: nil, + # Enhanced search prompt generated from user input + refined_prompt: nil, + request_id: nil, + status: nil, + user_prompt: nil + ) + end + + sig do + override.returns( + { + error: T.nilable(String), + generated_schema: T.anything, + refined_prompt: String, + request_id: String, + status: + Scrapegraphai::Models::GenerateSchemaCreateResponse::Status::TaggedSymbol, + user_prompt: String + } + ) + end + def to_hash + end + + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + Scrapegraphai::Models::GenerateSchemaCreateResponse::Status + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + COMPLETED = + T.let( + :completed, + Scrapegraphai::Models::GenerateSchemaCreateResponse::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::GenerateSchemaCreateResponse::Status::TaggedSymbol + ] + ) + end + def self.values + end + end + end + end +end diff --git a/rbi/scrapegraphai/models/generate_schema_retrieve_params.rbi b/rbi/scrapegraphai/models/generate_schema_retrieve_params.rbi new file mode 100644 index 0000000..ea5d076 --- /dev/null +++ b/rbi/scrapegraphai/models/generate_schema_retrieve_params.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Models + class GenerateSchemaRetrieveParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::GenerateSchemaRetrieveParams, + Scrapegraphai::Internal::AnyHash + ) + end + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + T.attached_class + ) + end + def self.new(request_options: {}) + end + + sig do + override.returns({ request_options: Scrapegraphai::RequestOptions }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/generate_schema_retrieve_response.rbi b/rbi/scrapegraphai/models/generate_schema_retrieve_response.rbi new file mode 100644 index 0000000..0ccad0a --- /dev/null +++ b/rbi/scrapegraphai/models/generate_schema_retrieve_response.rbi @@ -0,0 +1,264 @@ +# typed: strong + +module Scrapegraphai + module Models + module GenerateSchemaRetrieveResponse + extend Scrapegraphai::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse, + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse + ) + end + + class CompletedSchemaGenerationResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(String)) } + attr_accessor :error + + sig { returns(T.nilable(T.anything)) } + attr_reader :generated_schema + + sig { params(generated_schema: T.anything).void } + attr_writer :generated_schema + + sig { returns(T.nilable(String)) } + attr_reader :refined_prompt + + sig { params(refined_prompt: String).void } + attr_writer :refined_prompt + + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + sig do + returns( + T.nilable( + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status::TaggedSymbol + ) + ) + end + attr_reader :status + + sig do + params( + status: + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status::OrSymbol + ).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :user_prompt + + sig { params(user_prompt: String).void } + attr_writer :user_prompt + + sig do + params( + error: T.nilable(String), + generated_schema: T.anything, + refined_prompt: String, + request_id: String, + status: + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status::OrSymbol, + user_prompt: String + ).returns(T.attached_class) + end + def self.new( + error: nil, + generated_schema: nil, + refined_prompt: nil, + request_id: nil, + status: nil, + user_prompt: nil + ) + end + + sig do + override.returns( + { + error: T.nilable(String), + generated_schema: T.anything, + refined_prompt: String, + request_id: String, + status: + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status::TaggedSymbol, + user_prompt: String + } + ) + end + def to_hash + end + + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + COMPLETED = + T.let( + :completed, + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::Status::TaggedSymbol + ] + ) + end + def self.values + end + end + end + + class FailedSchemaGenerationResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(String)) } + attr_reader :error + + sig { params(error: String).void } + attr_writer :error + + sig { returns(T.nilable(T.anything)) } + attr_accessor :generated_schema + + sig { returns(T.nilable(String)) } + attr_accessor :refined_prompt + + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + sig do + returns( + T.nilable( + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status::TaggedSymbol + ) + ) + end + attr_reader :status + + sig do + params( + status: + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status::OrSymbol + ).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :user_prompt + + sig { params(user_prompt: String).void } + attr_writer :user_prompt + + sig do + params( + error: String, + generated_schema: T.nilable(T.anything), + refined_prompt: T.nilable(String), + request_id: String, + status: + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status::OrSymbol, + user_prompt: String + ).returns(T.attached_class) + end + def self.new( + error: nil, + generated_schema: nil, + refined_prompt: nil, + request_id: nil, + status: nil, + user_prompt: nil + ) + end + + sig do + override.returns( + { + error: String, + generated_schema: T.nilable(T.anything), + refined_prompt: T.nilable(String), + request_id: String, + status: + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status::TaggedSymbol, + user_prompt: String + } + ) + end + def to_hash + end + + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + FAILED = + T.let( + :failed, + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::Status::TaggedSymbol + ] + ) + end + def self.values + end + end + end + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::Variants + ] + ) + end + def self.variants + end + end + end +end diff --git a/rbi/scrapegraphai/models/healthz_check_params.rbi b/rbi/scrapegraphai/models/healthz_check_params.rbi new file mode 100644 index 0000000..e5599ed --- /dev/null +++ b/rbi/scrapegraphai/models/healthz_check_params.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Models + class HealthzCheckParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::HealthzCheckParams, + Scrapegraphai::Internal::AnyHash + ) + end + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + T.attached_class + ) + end + def self.new(request_options: {}) + end + + sig do + override.returns({ request_options: Scrapegraphai::RequestOptions }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/healthz_check_response.rbi b/rbi/scrapegraphai/models/healthz_check_response.rbi new file mode 100644 index 0000000..8708b0f --- /dev/null +++ b/rbi/scrapegraphai/models/healthz_check_response.rbi @@ -0,0 +1,41 @@ +# typed: strong + +module Scrapegraphai + module Models + class HealthzCheckResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::HealthzCheckResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(T::Hash[Symbol, String])) } + attr_reader :services + + sig { params(services: T::Hash[Symbol, String]).void } + attr_writer :services + + sig { returns(T.nilable(String)) } + attr_reader :status + + sig { params(status: String).void } + attr_writer :status + + sig do + params(services: T::Hash[Symbol, String], status: String).returns( + T.attached_class + ) + end + def self.new(services: nil, status: nil) + end + + sig do + override.returns({ services: T::Hash[Symbol, String], status: String }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/markdownify_convert_params.rbi b/rbi/scrapegraphai/models/markdownify_convert_params.rbi new file mode 100644 index 0000000..27e21f7 --- /dev/null +++ b/rbi/scrapegraphai/models/markdownify_convert_params.rbi @@ -0,0 +1,66 @@ +# typed: strong + +module Scrapegraphai + module Models + class MarkdownifyConvertParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::MarkdownifyConvertParams, + Scrapegraphai::Internal::AnyHash + ) + end + + # URL to convert to markdown + sig { returns(String) } + attr_accessor :website_url + + sig { returns(T.nilable(T::Hash[Symbol, String])) } + attr_reader :headers + + sig { params(headers: T::Hash[Symbol, String]).void } + attr_writer :headers + + # Interaction steps before conversion + sig { returns(T.nilable(T::Array[String])) } + attr_reader :steps + + sig { params(steps: T::Array[String]).void } + attr_writer :steps + + sig do + params( + website_url: String, + headers: T::Hash[Symbol, String], + steps: T::Array[String], + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(T.attached_class) + end + def self.new( + # URL to convert to markdown + website_url:, + headers: nil, + # Interaction steps before conversion + steps: nil, + request_options: {} + ) + end + + sig do + override.returns( + { + website_url: String, + headers: T::Hash[Symbol, String], + steps: T::Array[String], + request_options: Scrapegraphai::RequestOptions + } + ) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/markdownify_retrieve_status_params.rbi b/rbi/scrapegraphai/models/markdownify_retrieve_status_params.rbi new file mode 100644 index 0000000..98ed031 --- /dev/null +++ b/rbi/scrapegraphai/models/markdownify_retrieve_status_params.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Models + class MarkdownifyRetrieveStatusParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::MarkdownifyRetrieveStatusParams, + Scrapegraphai::Internal::AnyHash + ) + end + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + T.attached_class + ) + end + def self.new(request_options: {}) + end + + sig do + override.returns({ request_options: Scrapegraphai::RequestOptions }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/markdownify_retrieve_status_response.rbi b/rbi/scrapegraphai/models/markdownify_retrieve_status_response.rbi new file mode 100644 index 0000000..174ebfa --- /dev/null +++ b/rbi/scrapegraphai/models/markdownify_retrieve_status_response.rbi @@ -0,0 +1,138 @@ +# typed: strong + +module Scrapegraphai + module Models + module MarkdownifyRetrieveStatusResponse + extend Scrapegraphai::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + Scrapegraphai::CompletedMarkdownify, + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse + ) + end + + class FailedMarkdownifyResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(String)) } + attr_reader :error + + sig { params(error: String).void } + attr_writer :error + + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + sig { returns(T.nilable(String)) } + attr_accessor :result + + sig do + returns( + T.nilable( + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status::TaggedSymbol + ) + ) + end + attr_reader :status + + sig do + params( + status: + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status::OrSymbol + ).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :website_url + + sig { params(website_url: String).void } + attr_writer :website_url + + sig do + params( + error: String, + request_id: String, + result: T.nilable(String), + status: + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status::OrSymbol, + website_url: String + ).returns(T.attached_class) + end + def self.new( + error: nil, + request_id: nil, + result: nil, + status: nil, + website_url: nil + ) + end + + sig do + override.returns( + { + error: String, + request_id: String, + result: T.nilable(String), + status: + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status::TaggedSymbol, + website_url: String + } + ) + end + def to_hash + end + + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + FAILED = + T.let( + :failed, + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::Status::TaggedSymbol + ] + ) + end + def self.values + end + end + end + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::Variants + ] + ) + end + def self.variants + end + end + end +end diff --git a/rbi/scrapegraphai/models/searchscraper_create_params.rbi b/rbi/scrapegraphai/models/searchscraper_create_params.rbi new file mode 100644 index 0000000..848b49f --- /dev/null +++ b/rbi/scrapegraphai/models/searchscraper_create_params.rbi @@ -0,0 +1,77 @@ +# typed: strong + +module Scrapegraphai + module Models + class SearchscraperCreateParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::SearchscraperCreateParams, + Scrapegraphai::Internal::AnyHash + ) + end + + # Search query and extraction instruction + sig { returns(String) } + attr_accessor :user_prompt + + sig { returns(T.nilable(T::Hash[Symbol, String])) } + attr_reader :headers + + sig { params(headers: T::Hash[Symbol, String]).void } + attr_writer :headers + + # Number of websites to scrape from search results + sig { returns(T.nilable(Integer)) } + attr_reader :num_results + + sig { params(num_results: Integer).void } + attr_writer :num_results + + # JSON schema for structured output + sig { returns(T.nilable(T.anything)) } + attr_reader :output_schema + + sig { params(output_schema: T.anything).void } + attr_writer :output_schema + + sig do + params( + user_prompt: String, + headers: T::Hash[Symbol, String], + num_results: Integer, + output_schema: T.anything, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(T.attached_class) + end + def self.new( + # Search query and extraction instruction + user_prompt:, + headers: nil, + # Number of websites to scrape from search results + num_results: nil, + # JSON schema for structured output + output_schema: nil, + request_options: {} + ) + end + + sig do + override.returns( + { + user_prompt: String, + headers: T::Hash[Symbol, String], + num_results: Integer, + output_schema: T.anything, + request_options: Scrapegraphai::RequestOptions + } + ) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/searchscraper_retrieve_status_params.rbi b/rbi/scrapegraphai/models/searchscraper_retrieve_status_params.rbi new file mode 100644 index 0000000..3af841b --- /dev/null +++ b/rbi/scrapegraphai/models/searchscraper_retrieve_status_params.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Models + class SearchscraperRetrieveStatusParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::SearchscraperRetrieveStatusParams, + Scrapegraphai::Internal::AnyHash + ) + end + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + T.attached_class + ) + end + def self.new(request_options: {}) + end + + sig do + override.returns({ request_options: Scrapegraphai::RequestOptions }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/searchscraper_retrieve_status_response.rbi b/rbi/scrapegraphai/models/searchscraper_retrieve_status_response.rbi new file mode 100644 index 0000000..e9ee0d5 --- /dev/null +++ b/rbi/scrapegraphai/models/searchscraper_retrieve_status_response.rbi @@ -0,0 +1,156 @@ +# typed: strong + +module Scrapegraphai + module Models + module SearchscraperRetrieveStatusResponse + extend Scrapegraphai::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + Scrapegraphai::CompletedSearchScraper, + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse + ) + end + + class FailedSearchScraperResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(String)) } + attr_reader :error + + sig { params(error: String).void } + attr_writer :error + + sig { returns(T.nilable(Integer)) } + attr_reader :num_results + + sig { params(num_results: Integer).void } + attr_writer :num_results + + sig { returns(T.nilable(T::Array[String])) } + attr_reader :reference_urls + + sig { params(reference_urls: T::Array[String]).void } + attr_writer :reference_urls + + sig { returns(T.nilable(String)) } + attr_reader :request_id + + sig { params(request_id: String).void } + attr_writer :request_id + + sig { returns(T.nilable(T.anything)) } + attr_accessor :result + + sig do + returns( + T.nilable( + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status::TaggedSymbol + ) + ) + end + attr_reader :status + + sig do + params( + status: + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status::OrSymbol + ).void + end + attr_writer :status + + sig { returns(T.nilable(String)) } + attr_reader :user_prompt + + sig { params(user_prompt: String).void } + attr_writer :user_prompt + + sig do + params( + error: String, + num_results: Integer, + reference_urls: T::Array[String], + request_id: String, + result: T.nilable(T.anything), + status: + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status::OrSymbol, + user_prompt: String + ).returns(T.attached_class) + end + def self.new( + error: nil, + num_results: nil, + reference_urls: nil, + request_id: nil, + result: nil, + status: nil, + user_prompt: nil + ) + end + + sig do + override.returns( + { + error: String, + num_results: Integer, + reference_urls: T::Array[String], + request_id: String, + result: T.nilable(T.anything), + status: + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status::TaggedSymbol, + user_prompt: String + } + ) + end + def to_hash + end + + module Status + extend Scrapegraphai::Internal::Type::Enum + + TaggedSymbol = + T.type_alias do + T.all( + Symbol, + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status + ) + end + OrSymbol = T.type_alias { T.any(Symbol, String) } + + FAILED = + T.let( + :failed, + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status::TaggedSymbol + ) + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::Status::TaggedSymbol + ] + ) + end + def self.values + end + end + end + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::Variants + ] + ) + end + def self.variants + end + end + end +end diff --git a/rbi/scrapegraphai/models/smartscraper_create_params.rbi b/rbi/scrapegraphai/models/smartscraper_create_params.rbi new file mode 100644 index 0000000..f683426 --- /dev/null +++ b/rbi/scrapegraphai/models/smartscraper_create_params.rbi @@ -0,0 +1,145 @@ +# typed: strong + +module Scrapegraphai + module Models + class SmartscraperCreateParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::SmartscraperCreateParams, + Scrapegraphai::Internal::AnyHash + ) + end + + # Extraction instruction for the LLM + sig { returns(String) } + attr_accessor :user_prompt + + # Cookies to include in the request + sig { returns(T.nilable(T::Hash[Symbol, String])) } + attr_reader :cookies + + sig { params(cookies: T::Hash[Symbol, String]).void } + attr_writer :cookies + + # HTTP headers to include in the request + sig { returns(T.nilable(T::Hash[Symbol, String])) } + attr_reader :headers + + sig { params(headers: T::Hash[Symbol, String]).void } + attr_writer :headers + + # Number of infinite scroll operations to perform + sig { returns(T.nilable(Integer)) } + attr_reader :number_of_scrolls + + sig { params(number_of_scrolls: Integer).void } + attr_writer :number_of_scrolls + + # JSON schema defining the expected output structure + sig { returns(T.nilable(T.anything)) } + attr_reader :output_schema + + sig { params(output_schema: T.anything).void } + attr_writer :output_schema + + # Enable heavy JavaScript rendering + sig { returns(T.nilable(T::Boolean)) } + attr_reader :render_heavy_js + + sig { params(render_heavy_js: T::Boolean).void } + attr_writer :render_heavy_js + + # Website interaction steps (e.g., clicking buttons) + sig { returns(T.nilable(T::Array[String])) } + attr_reader :steps + + sig { params(steps: T::Array[String]).void } + attr_writer :steps + + # Number of pages to process for pagination + sig { returns(T.nilable(Integer)) } + attr_reader :total_pages + + sig { params(total_pages: Integer).void } + attr_writer :total_pages + + # HTML content to process (max 2MB, mutually exclusive with website_url) + sig { returns(T.nilable(String)) } + attr_reader :website_html + + sig { params(website_html: String).void } + attr_writer :website_html + + # URL to scrape (mutually exclusive with website_html) + sig { returns(T.nilable(String)) } + attr_reader :website_url + + sig { params(website_url: String).void } + attr_writer :website_url + + sig do + params( + user_prompt: String, + cookies: T::Hash[Symbol, String], + headers: T::Hash[Symbol, String], + number_of_scrolls: Integer, + output_schema: T.anything, + render_heavy_js: T::Boolean, + steps: T::Array[String], + total_pages: Integer, + website_html: String, + website_url: String, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(T.attached_class) + end + def self.new( + # Extraction instruction for the LLM + user_prompt:, + # Cookies to include in the request + cookies: nil, + # HTTP headers to include in the request + headers: nil, + # Number of infinite scroll operations to perform + number_of_scrolls: nil, + # JSON schema defining the expected output structure + output_schema: nil, + # Enable heavy JavaScript rendering + render_heavy_js: nil, + # Website interaction steps (e.g., clicking buttons) + steps: nil, + # Number of pages to process for pagination + total_pages: nil, + # HTML content to process (max 2MB, mutually exclusive with website_url) + website_html: nil, + # URL to scrape (mutually exclusive with website_html) + website_url: nil, + request_options: {} + ) + end + + sig do + override.returns( + { + user_prompt: String, + cookies: T::Hash[Symbol, String], + headers: T::Hash[Symbol, String], + number_of_scrolls: Integer, + output_schema: T.anything, + render_heavy_js: T::Boolean, + steps: T::Array[String], + total_pages: Integer, + website_html: String, + website_url: String, + request_options: Scrapegraphai::RequestOptions + } + ) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/smartscraper_list_params.rbi b/rbi/scrapegraphai/models/smartscraper_list_params.rbi new file mode 100644 index 0000000..f0fc6c2 --- /dev/null +++ b/rbi/scrapegraphai/models/smartscraper_list_params.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Models + class SmartscraperListParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::SmartscraperListParams, + Scrapegraphai::Internal::AnyHash + ) + end + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + T.attached_class + ) + end + def self.new(request_options: {}) + end + + sig do + override.returns({ request_options: Scrapegraphai::RequestOptions }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/smartscraper_list_response.rbi b/rbi/scrapegraphai/models/smartscraper_list_response.rbi new file mode 100644 index 0000000..b7356a8 --- /dev/null +++ b/rbi/scrapegraphai/models/smartscraper_list_response.rbi @@ -0,0 +1,25 @@ +# typed: strong + +module Scrapegraphai + module Models + module SmartscraperListResponse + extend Scrapegraphai::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + Scrapegraphai::CompletedSmartscraper, + Scrapegraphai::FailedSmartscraper + ) + end + + sig do + override.returns( + T::Array[Scrapegraphai::Models::SmartscraperListResponse::Variants] + ) + end + def self.variants + end + end + end +end diff --git a/rbi/scrapegraphai/models/smartscraper_retrieve_params.rbi b/rbi/scrapegraphai/models/smartscraper_retrieve_params.rbi new file mode 100644 index 0000000..4d9a20c --- /dev/null +++ b/rbi/scrapegraphai/models/smartscraper_retrieve_params.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Models + class SmartscraperRetrieveParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::SmartscraperRetrieveParams, + Scrapegraphai::Internal::AnyHash + ) + end + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + T.attached_class + ) + end + def self.new(request_options: {}) + end + + sig do + override.returns({ request_options: Scrapegraphai::RequestOptions }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/smartscraper_retrieve_response.rbi b/rbi/scrapegraphai/models/smartscraper_retrieve_response.rbi new file mode 100644 index 0000000..aa7b4ee --- /dev/null +++ b/rbi/scrapegraphai/models/smartscraper_retrieve_response.rbi @@ -0,0 +1,27 @@ +# typed: strong + +module Scrapegraphai + module Models + module SmartscraperRetrieveResponse + extend Scrapegraphai::Internal::Type::Union + + Variants = + T.type_alias do + T.any( + Scrapegraphai::CompletedSmartscraper, + Scrapegraphai::FailedSmartscraper + ) + end + + sig do + override.returns( + T::Array[ + Scrapegraphai::Models::SmartscraperRetrieveResponse::Variants + ] + ) + end + def self.variants + end + end + end +end diff --git a/rbi/scrapegraphai/models/validate_api_key_params.rbi b/rbi/scrapegraphai/models/validate_api_key_params.rbi new file mode 100644 index 0000000..ec93983 --- /dev/null +++ b/rbi/scrapegraphai/models/validate_api_key_params.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Models + class ValidateAPIKeyParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + OrHash = + T.type_alias do + T.any( + Scrapegraphai::ValidateAPIKeyParams, + Scrapegraphai::Internal::AnyHash + ) + end + + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + T.attached_class + ) + end + def self.new(request_options: {}) + end + + sig do + override.returns({ request_options: Scrapegraphai::RequestOptions }) + end + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/models/validate_api_key_response.rbi b/rbi/scrapegraphai/models/validate_api_key_response.rbi new file mode 100644 index 0000000..1cfc6d1 --- /dev/null +++ b/rbi/scrapegraphai/models/validate_api_key_response.rbi @@ -0,0 +1,29 @@ +# typed: strong + +module Scrapegraphai + module Models + class ValidateAPIKeyResponse < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any( + Scrapegraphai::Models::ValidateAPIKeyResponse, + Scrapegraphai::Internal::AnyHash + ) + end + + sig { returns(T.nilable(String)) } + attr_reader :email + + sig { params(email: String).void } + attr_writer :email + + sig { params(email: String).returns(T.attached_class) } + def self.new(email: nil) + end + + sig { override.returns({ email: String }) } + def to_hash + end + end + end +end diff --git a/rbi/scrapegraphai/request_options.rbi b/rbi/scrapegraphai/request_options.rbi new file mode 100644 index 0000000..16efe09 --- /dev/null +++ b/rbi/scrapegraphai/request_options.rbi @@ -0,0 +1,59 @@ +# typed: strong + +module Scrapegraphai + # Specify HTTP behaviour to use for a specific request. These options supplement + # or override those provided at the client level. + # + # When making a request, you can pass an actual {RequestOptions} instance, or + # simply pass a Hash with symbol keys matching the attributes on this class. + class RequestOptions < Scrapegraphai::Internal::Type::BaseModel + OrHash = + T.type_alias do + T.any(Scrapegraphai::RequestOptions, Scrapegraphai::Internal::AnyHash) + end + + # @api private + sig { params(opts: Scrapegraphai::RequestOptions::OrHash).void } + def self.validate!(opts) + end + + # Idempotency key to send with request and all associated retries. Will only be + # sent for write requests. + sig { returns(T.nilable(String)) } + attr_accessor :idempotency_key + + # Extra query params to send with the request. These are `.merge`’d into any + # `query` given at the client level. + sig do + returns( + T.nilable(T::Hash[String, T.nilable(T.any(T::Array[String], String))]) + ) + end + attr_accessor :extra_query + + # Extra headers to send with the request. These are `.merged`’d into any + # `extra_headers` given at the client level. + sig { returns(T.nilable(T::Hash[String, T.nilable(String)])) } + attr_accessor :extra_headers + + # Extra data to send with the request. These are deep merged into any data + # generated as part of the normal request. + sig { returns(T.nilable(T.anything)) } + attr_accessor :extra_body + + # Maximum number of retries to attempt after a failed initial request. + sig { returns(T.nilable(Integer)) } + attr_accessor :max_retries + + # Request timeout in seconds. + sig { returns(T.nilable(Float)) } + attr_accessor :timeout + + # Returns a new instance of RequestOptions. + sig do + params(values: Scrapegraphai::Internal::AnyHash).returns(T.attached_class) + end + def self.new(values = {}) + end + end +end diff --git a/rbi/scrapegraphai/resources/crawl.rbi b/rbi/scrapegraphai/resources/crawl.rbi new file mode 100644 index 0000000..c9d97a2 --- /dev/null +++ b/rbi/scrapegraphai/resources/crawl.rbi @@ -0,0 +1,65 @@ +# typed: strong + +module Scrapegraphai + module Resources + class Crawl + # Retrieve the status and results of a crawling job + sig do + params( + task_id: String, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(Scrapegraphai::Models::CrawlRetrieveResultsResponse) + end + def retrieve_results( + # Celery task identifier + task_id, + request_options: {} + ) + end + + # Initiate comprehensive website crawling with sitemap support. Supports both AI + # extraction mode and markdown conversion mode. Returns a task ID for async + # processing. + sig do + params( + url: String, + depth: Integer, + extraction_mode: T::Boolean, + max_pages: Integer, + prompt: T.nilable(String), + render_heavy_js: T::Boolean, + rules: Scrapegraphai::CrawlStartParams::Rules::OrHash, + schema: T.nilable(T.anything), + sitemap: T::Boolean, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(Scrapegraphai::Models::CrawlStartResponse) + end + def start( + # Starting URL for crawling + url:, + # Maximum crawl depth from starting URL + depth: nil, + # Use AI extraction (true) or markdown conversion (false) + extraction_mode: nil, + # Maximum number of pages to crawl + max_pages: nil, + # Extraction prompt (required if extraction_mode is true) + prompt: nil, + # Enable heavy JavaScript rendering + render_heavy_js: nil, + rules: nil, + # Output schema for extraction + schema: nil, + # Use sitemap for crawling + sitemap: nil, + request_options: {} + ) + end + + # @api private + sig { params(client: Scrapegraphai::Client).returns(T.attached_class) } + def self.new(client:) + end + end + end +end diff --git a/rbi/scrapegraphai/resources/credits.rbi b/rbi/scrapegraphai/resources/credits.rbi new file mode 100644 index 0000000..c2cc8b6 --- /dev/null +++ b/rbi/scrapegraphai/resources/credits.rbi @@ -0,0 +1,21 @@ +# typed: strong + +module Scrapegraphai + module Resources + class Credits + # Retrieve the current credit balance and usage for the authenticated user + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + Scrapegraphai::Models::CreditRetrieveResponse + ) + end + def retrieve(request_options: {}) + end + + # @api private + sig { params(client: Scrapegraphai::Client).returns(T.attached_class) } + def self.new(client:) + end + end + end +end diff --git a/rbi/scrapegraphai/resources/feedback.rbi b/rbi/scrapegraphai/resources/feedback.rbi new file mode 100644 index 0000000..c6e0680 --- /dev/null +++ b/rbi/scrapegraphai/resources/feedback.rbi @@ -0,0 +1,32 @@ +# typed: strong + +module Scrapegraphai + module Resources + class Feedback + # Submit feedback for a specific request + sig do + params( + rating: Integer, + request_id: String, + feedback_text: T.nilable(String), + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(Scrapegraphai::Models::FeedbackSubmitResponse) + end + def submit( + # Rating score + rating:, + # Request to provide feedback for + request_id:, + # Optional feedback comments + feedback_text: nil, + request_options: {} + ) + end + + # @api private + sig { params(client: Scrapegraphai::Client).returns(T.attached_class) } + def self.new(client:) + end + end + end +end diff --git a/rbi/scrapegraphai/resources/generate_schema.rbi b/rbi/scrapegraphai/resources/generate_schema.rbi new file mode 100644 index 0000000..96833ea --- /dev/null +++ b/rbi/scrapegraphai/resources/generate_schema.rbi @@ -0,0 +1,46 @@ +# typed: strong + +module Scrapegraphai + module Resources + class GenerateSchema + # Generate or modify JSON schemas based on natural language descriptions. Can + # create new schemas or extend existing ones. + sig do + params( + user_prompt: String, + existing_schema: T.nilable(T.anything), + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(Scrapegraphai::Models::GenerateSchemaCreateResponse) + end + def create( + # Natural language description of desired schema + user_prompt:, + # Existing schema to modify or extend + existing_schema: nil, + request_options: {} + ) + end + + # Retrieve the status and results of a schema generation request + sig do + params( + request_id: String, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns( + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::Variants + ) + end + def retrieve( + # Unique request identifier + request_id, + request_options: {} + ) + end + + # @api private + sig { params(client: Scrapegraphai::Client).returns(T.attached_class) } + def self.new(client:) + end + end + end +end diff --git a/rbi/scrapegraphai/resources/healthz.rbi b/rbi/scrapegraphai/resources/healthz.rbi new file mode 100644 index 0000000..37f0ccd --- /dev/null +++ b/rbi/scrapegraphai/resources/healthz.rbi @@ -0,0 +1,21 @@ +# typed: strong + +module Scrapegraphai + module Resources + class Healthz + # Check the health status of the service + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + Scrapegraphai::Models::HealthzCheckResponse + ) + end + def check(request_options: {}) + end + + # @api private + sig { params(client: Scrapegraphai::Client).returns(T.attached_class) } + def self.new(client:) + end + end + end +end diff --git a/rbi/scrapegraphai/resources/markdownify.rbi b/rbi/scrapegraphai/resources/markdownify.rbi new file mode 100644 index 0000000..0a6fc01 --- /dev/null +++ b/rbi/scrapegraphai/resources/markdownify.rbi @@ -0,0 +1,43 @@ +# typed: strong + +module Scrapegraphai + module Resources + class Markdownify + # Convert web page content to clean Markdown format + sig do + params( + website_url: String, + headers: T::Hash[Symbol, String], + steps: T::Array[String], + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(Scrapegraphai::CompletedMarkdownify) + end + def convert( + # URL to convert to markdown + website_url:, + headers: nil, + # Interaction steps before conversion + steps: nil, + request_options: {} + ) + end + + # Retrieve the status and results of a markdown conversion + sig do + params( + request_id: String, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns( + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::Variants + ) + end + def retrieve_status(request_id, request_options: {}) + end + + # @api private + sig { params(client: Scrapegraphai::Client).returns(T.attached_class) } + def self.new(client:) + end + end + end +end diff --git a/rbi/scrapegraphai/resources/searchscraper.rbi b/rbi/scrapegraphai/resources/searchscraper.rbi new file mode 100644 index 0000000..05c53ff --- /dev/null +++ b/rbi/scrapegraphai/resources/searchscraper.rbi @@ -0,0 +1,52 @@ +# typed: strong + +module Scrapegraphai + module Resources + class Searchscraper + # Performs web search, selects relevant URLs, and extracts structured data from + # multiple websites. Uses LLM to refine search queries and merge results from + # different sources. + sig do + params( + user_prompt: String, + headers: T::Hash[Symbol, String], + num_results: Integer, + output_schema: T.anything, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(Scrapegraphai::CompletedSearchScraper) + end + def create( + # Search query and extraction instruction + user_prompt:, + headers: nil, + # Number of websites to scrape from search results + num_results: nil, + # JSON schema for structured output + output_schema: nil, + request_options: {} + ) + end + + # Retrieve the status and results of a search scraping operation + sig do + params( + request_id: String, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns( + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::Variants + ) + end + def retrieve_status( + # Unique request identifier + request_id, + request_options: {} + ) + end + + # @api private + sig { params(client: Scrapegraphai::Client).returns(T.attached_class) } + def self.new(client:) + end + end + end +end diff --git a/rbi/scrapegraphai/resources/smartscraper.rbi b/rbi/scrapegraphai/resources/smartscraper.rbi new file mode 100644 index 0000000..50e098a --- /dev/null +++ b/rbi/scrapegraphai/resources/smartscraper.rbi @@ -0,0 +1,77 @@ +# typed: strong + +module Scrapegraphai + module Resources + class Smartscraper + # Main scraping endpoint with LLM-powered content analysis. Supports various + # fetching providers, infinite scrolling, pagination, and custom output schemas. + sig do + params( + user_prompt: String, + cookies: T::Hash[Symbol, String], + headers: T::Hash[Symbol, String], + number_of_scrolls: Integer, + output_schema: T.anything, + render_heavy_js: T::Boolean, + steps: T::Array[String], + total_pages: Integer, + website_html: String, + website_url: String, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(Scrapegraphai::CompletedSmartscraper) + end + def create( + # Extraction instruction for the LLM + user_prompt:, + # Cookies to include in the request + cookies: nil, + # HTTP headers to include in the request + headers: nil, + # Number of infinite scroll operations to perform + number_of_scrolls: nil, + # JSON schema defining the expected output structure + output_schema: nil, + # Enable heavy JavaScript rendering + render_heavy_js: nil, + # Website interaction steps (e.g., clicking buttons) + steps: nil, + # Number of pages to process for pagination + total_pages: nil, + # HTML content to process (max 2MB, mutually exclusive with website_url) + website_html: nil, + # URL to scrape (mutually exclusive with website_html) + website_url: nil, + request_options: {} + ) + end + + # Retrieve the status and results of a scraping operation + sig do + params( + request_id: String, + request_options: Scrapegraphai::RequestOptions::OrHash + ).returns(Scrapegraphai::Models::SmartscraperRetrieveResponse::Variants) + end + def retrieve( + # Unique request identifier + request_id, + request_options: {} + ) + end + + # Retrieve the status and results of a scraping operation + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + Scrapegraphai::Models::SmartscraperListResponse::Variants + ) + end + def list(request_options: {}) + end + + # @api private + sig { params(client: Scrapegraphai::Client).returns(T.attached_class) } + def self.new(client:) + end + end + end +end diff --git a/rbi/scrapegraphai/resources/validate.rbi b/rbi/scrapegraphai/resources/validate.rbi new file mode 100644 index 0000000..86f0a8d --- /dev/null +++ b/rbi/scrapegraphai/resources/validate.rbi @@ -0,0 +1,21 @@ +# typed: strong + +module Scrapegraphai + module Resources + class Validate + # Validate the API key and retrieve associated user email + sig do + params(request_options: Scrapegraphai::RequestOptions::OrHash).returns( + Scrapegraphai::Models::ValidateAPIKeyResponse + ) + end + def api_key(request_options: {}) + end + + # @api private + sig { params(client: Scrapegraphai::Client).returns(T.attached_class) } + def self.new(client:) + end + end + end +end diff --git a/rbi/scrapegraphai/version.rbi b/rbi/scrapegraphai/version.rbi new file mode 100644 index 0000000..51ce453 --- /dev/null +++ b/rbi/scrapegraphai/version.rbi @@ -0,0 +1,5 @@ +# typed: strong + +module Scrapegraphai + VERSION = T.let(T.unsafe(nil), String) +end diff --git a/release-please-config.json b/release-please-config.json new file mode 100644 index 0000000..3f5720a --- /dev/null +++ b/release-please-config.json @@ -0,0 +1,70 @@ +{ + "packages": { + ".": {} + }, + "$schema": "https://raw.githubusercontent.com/stainless-api/release-please/main/schemas/config.json", + "include-v-in-tag": true, + "include-component-in-tag": false, + "versioning": "prerelease", + "prerelease": true, + "bump-minor-pre-major": true, + "bump-patch-for-minor-pre-major": false, + "pull-request-header": "Automated Release PR", + "pull-request-title-pattern": "release: ${version}", + "changelog-sections": [ + { + "type": "feat", + "section": "Features" + }, + { + "type": "fix", + "section": "Bug Fixes" + }, + { + "type": "perf", + "section": "Performance Improvements" + }, + { + "type": "revert", + "section": "Reverts" + }, + { + "type": "chore", + "section": "Chores" + }, + { + "type": "docs", + "section": "Documentation" + }, + { + "type": "style", + "section": "Styles" + }, + { + "type": "refactor", + "section": "Refactors" + }, + { + "type": "test", + "section": "Tests", + "hidden": true + }, + { + "type": "build", + "section": "Build System" + }, + { + "type": "ci", + "section": "Continuous Integration", + "hidden": true + } + ], + "release-type": "ruby", + "version-file": "lib/scrapegraphai/version.rb", + "extra-files": [ + { + "type": "ruby-readme", + "path": "README.md" + } + ] +} \ No newline at end of file diff --git a/scrapegraphai.gemspec b/scrapegraphai.gemspec new file mode 100644 index 0000000..e15938e --- /dev/null +++ b/scrapegraphai.gemspec @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +require_relative "lib/scrapegraphai/version" + +Gem::Specification.new do |s| + s.name = "scrapegraphai" + s.version = Scrapegraphai::VERSION + s.summary = "Ruby library to access the Scrapegraphai API" + s.authors = ["Scrapegraphai"] + s.email = "" + s.homepage = "https://gemdocs.org/gems/scrapegraphai" + s.metadata["homepage_uri"] = s.homepage + s.metadata["source_code_uri"] = "https://github.com/ScrapeGraphAI/scrapegraphai-ruby" + s.metadata["rubygems_mfa_required"] = false.to_s + s.required_ruby_version = ">= 3.2.0" + + s.files = Dir[ + "lib/**/*.rb", + "rbi/**/*.rbi", + "sig/**/*.rbs", + "manifest.yaml", + "SECURITY.md", + "CHANGELOG.md", + ".ignore" + ] + s.extra_rdoc_files = ["README.md"] + s.add_dependency "connection_pool" +end diff --git a/scripts/bootstrap b/scripts/bootstrap new file mode 100755 index 0000000..3487864 --- /dev/null +++ b/scripts/bootstrap @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -e + +cd -- "$(dirname -- "$0")/.." + +if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "$SKIP_BREW" != "1" ] && [ -t 0 ]; then + brew bundle check >/dev/null 2>&1 || { + echo -n "==> Install Homebrew dependencies? (y/N): " + read -r response + case "$response" in + [yY][eE][sS]|[yY]) + brew bundle + ;; + *) + ;; + esac + echo + } +fi + +echo "==> Installing Ruby dependencies…" + +exec -- bundle install "$@" diff --git a/scripts/fast-format b/scripts/fast-format new file mode 100755 index 0000000..6d5973f --- /dev/null +++ b/scripts/fast-format @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -euo pipefail + +echo "Script started with $# arguments" +echo "Arguments: $*" +echo "Script location: $(dirname "$0")" + +cd -- "$(dirname "$0")/.." +echo "Changed to directory: $PWD" + +if [ $# -eq 0 ]; then + echo "Usage: $0 [additional-formatter-args...]" + echo "The file should contain one file path per line" + exit 1 +fi + +exec -- bundle exec rake format FORMAT_FILE="$1" diff --git a/scripts/format b/scripts/format new file mode 100755 index 0000000..177d1e6 --- /dev/null +++ b/scripts/format @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -e + +cd -- "$(dirname -- "$0")/.." + +echo "==> Running formatters" + +exec -- bundle exec rake format "$@" diff --git a/scripts/lint b/scripts/lint new file mode 100755 index 0000000..08b0dbe --- /dev/null +++ b/scripts/lint @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -e + +cd -- "$(dirname -- "$0")/.." + +echo "==> Running linters" + +exec -- bundle exec rake lint "$@" diff --git a/scripts/mock b/scripts/mock new file mode 100755 index 0000000..0b28f6e --- /dev/null +++ b/scripts/mock @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -e + +cd "$(dirname "$0")/.." + +if [[ -n "$1" && "$1" != '--'* ]]; then + URL="$1" + shift +else + URL="$(grep 'openapi_spec_url' .stats.yml | cut -d' ' -f2)" +fi + +# Check if the URL is empty +if [ -z "$URL" ]; then + echo "Error: No OpenAPI spec path/url provided or found in .stats.yml" + exit 1 +fi + +echo "==> Starting mock server with URL ${URL}" + +# Run prism mock on the given spec +if [ "$1" == "--daemon" ]; then + npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL" &> .prism.log & + + # Wait for server to come online + echo -n "Waiting for server" + while ! grep -q "✖ fatal\|Prism is listening" ".prism.log" ; do + echo -n "." + sleep 0.1 + done + + if grep -q "✖ fatal" ".prism.log"; then + cat .prism.log + exit 1 + fi + + echo +else + npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL" +fi diff --git a/scripts/test b/scripts/test new file mode 100755 index 0000000..e0dc137 --- /dev/null +++ b/scripts/test @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +set -e + +cd -- "$(dirname -- "$0")/.." + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +NC='\033[0m' # No Color + +function prism_is_running() { + curl --silent "http://localhost:4010" >/dev/null 2>&1 +} + +kill_server_on_port() { + pids=$(lsof -t -i tcp:"$1" || echo "") + if [ "$pids" != "" ]; then + kill "$pids" + echo "Stopped $pids." + fi +} + +function is_overriding_api_base_url() { + [ -n "$TEST_API_BASE_URL" ] +} + +if ! is_overriding_api_base_url && ! prism_is_running ; then + # When we exit this script, make sure to kill the background mock server process + trap 'kill_server_on_port 4010' EXIT + + # Start the dev server + ./scripts/mock --daemon +fi + +if is_overriding_api_base_url ; then + echo -e "${GREEN}✔ Running tests against ${TEST_API_BASE_URL}${NC}" + echo +elif ! prism_is_running ; then + echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Prism server" + echo -e "running against your OpenAPI spec." + echo + echo -e "To run the server, pass in the path or url of your OpenAPI" + echo -e "spec to the prism command:" + echo + echo -e " \$ ${YELLOW}npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock path/to/your.openapi.yml${NC}" + echo + + exit 1 +else + echo -e "${GREEN}✔ Mock prism server is running with your OpenAPI spec${NC}" + echo +fi + +echo "==> Running tests" +bundle exec rake test "$@" diff --git a/sig/scrapegraphai/client.rbs b/sig/scrapegraphai/client.rbs new file mode 100644 index 0000000..890a767 --- /dev/null +++ b/sig/scrapegraphai/client.rbs @@ -0,0 +1,48 @@ +module Scrapegraphai + class Client < Scrapegraphai::Internal::Transport::BaseClient + DEFAULT_MAX_RETRIES: 2 + + DEFAULT_TIMEOUT_IN_SECONDS: Float + + DEFAULT_INITIAL_RETRY_DELAY: Float + + DEFAULT_MAX_RETRY_DELAY: Float + + ENVIRONMENTS: { + production: "https://api.scrapegraphai.com/v1", + :environment_1 => "http://localhost:8001/v1" + } + + attr_reader api_key: String + + attr_reader smartscraper: Scrapegraphai::Resources::Smartscraper + + attr_reader markdownify: Scrapegraphai::Resources::Markdownify + + attr_reader searchscraper: Scrapegraphai::Resources::Searchscraper + + attr_reader generate_schema: Scrapegraphai::Resources::GenerateSchema + + attr_reader crawl: Scrapegraphai::Resources::Crawl + + attr_reader credits: Scrapegraphai::Resources::Credits + + attr_reader validate: Scrapegraphai::Resources::Validate + + attr_reader feedback: Scrapegraphai::Resources::Feedback + + attr_reader healthz: Scrapegraphai::Resources::Healthz + + private def auth_headers: -> ::Hash[String, String] + + def initialize: ( + ?api_key: String?, + ?environment: :production | :environment_1 | nil, + ?base_url: String?, + ?max_retries: Integer, + ?timeout: Float, + ?initial_retry_delay: Float, + ?max_retry_delay: Float + ) -> void + end +end diff --git a/sig/scrapegraphai/errors.rbs b/sig/scrapegraphai/errors.rbs new file mode 100644 index 0000000..c0afc3e --- /dev/null +++ b/sig/scrapegraphai/errors.rbs @@ -0,0 +1,117 @@ +module Scrapegraphai + module Errors + class Error < StandardError + attr_accessor cause: StandardError? + end + + class ConversionError < Scrapegraphai::Errors::Error + def cause: -> StandardError? + + def initialize: ( + on: Class, + method: Symbol, + target: top, + value: top, + ?cause: StandardError? + ) -> void + end + + class APIError < Scrapegraphai::Errors::Error + attr_accessor url: URI::Generic + + attr_accessor status: Integer? + + attr_accessor headers: ::Hash[String, String]? + + attr_accessor body: top? + + def initialize: ( + url: URI::Generic, + ?status: Integer?, + ?headers: ::Hash[String, String]?, + ?body: Object?, + ?request: nil, + ?response: nil, + ?message: String? + ) -> void + end + + class APIConnectionError < Scrapegraphai::Errors::APIError + def initialize: ( + url: URI::Generic, + ?status: nil, + ?headers: ::Hash[String, String]?, + ?body: nil, + ?request: nil, + ?response: nil, + ?message: String? + ) -> void + end + + class APITimeoutError < Scrapegraphai::Errors::APIConnectionError + def initialize: ( + url: URI::Generic, + ?status: nil, + ?headers: ::Hash[String, String]?, + ?body: nil, + ?request: nil, + ?response: nil, + ?message: String? + ) -> void + end + + class APIStatusError < Scrapegraphai::Errors::APIError + def self.for: ( + url: URI::Generic, + status: Integer, + headers: ::Hash[String, String]?, + body: Object?, + request: nil, + response: nil, + ?message: String? + ) -> instance + + def initialize: ( + url: URI::Generic, + status: Integer, + headers: ::Hash[String, String]?, + body: Object?, + request: nil, + response: nil, + ?message: String? + ) -> void + end + + class BadRequestError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS: 400 + end + + class AuthenticationError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS: 401 + end + + class PermissionDeniedError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS: 403 + end + + class NotFoundError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS: 404 + end + + class ConflictError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS: 409 + end + + class UnprocessableEntityError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS: 422 + end + + class RateLimitError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS: 429 + end + + class InternalServerError < Scrapegraphai::Errors::APIStatusError + HTTP_STATUS: Range[Integer] + end + end +end diff --git a/sig/scrapegraphai/file_part.rbs b/sig/scrapegraphai/file_part.rbs new file mode 100644 index 0000000..cc19b4e --- /dev/null +++ b/sig/scrapegraphai/file_part.rbs @@ -0,0 +1,21 @@ +module Scrapegraphai + class FilePart + attr_reader content: Pathname | StringIO | IO | String + + attr_reader content_type: String? + + attr_reader filename: String? + + private def read: -> String + + def to_json: (*top a) -> String + + def to_yaml: (*top a) -> String + + def initialize: ( + Pathname | StringIO | IO | String content, + ?filename: (Pathname | String)?, + ?content_type: String? + ) -> void + end +end diff --git a/sig/scrapegraphai/internal.rbs b/sig/scrapegraphai/internal.rbs new file mode 100644 index 0000000..4828b95 --- /dev/null +++ b/sig/scrapegraphai/internal.rbs @@ -0,0 +1,10 @@ +module Scrapegraphai + module Internal + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + type file_input = + Pathname | StringIO | IO | String | Scrapegraphai::FilePart + + OMIT: Object + end +end diff --git a/sig/scrapegraphai/internal/transport/base_client.rbs b/sig/scrapegraphai/internal/transport/base_client.rbs new file mode 100644 index 0000000..ffde0ca --- /dev/null +++ b/sig/scrapegraphai/internal/transport/base_client.rbs @@ -0,0 +1,131 @@ +module Scrapegraphai + module Internal + module Transport + class BaseClient + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + type request_components = + { + method: Symbol, + path: String | ::Array[String], + query: ::Hash[String, (::Array[String] | String)?]?, + headers: ::Hash[String, (String + | Integer + | ::Array[(String | Integer)?])?]?, + body: top?, + unwrap: (Symbol + | Integer + | ::Array[(Symbol | Integer)] + | (^(top arg0) -> top))?, + page: Class?, + stream: Class?, + model: Scrapegraphai::Internal::Type::Converter::input?, + options: Scrapegraphai::request_opts? + } + type request_input = + { + method: Symbol, + url: URI::Generic, + headers: ::Hash[String, String], + body: top, + max_retries: Integer, + timeout: Float + } + + MAX_REDIRECTS: 20 + + PLATFORM_HEADERS: ::Hash[String, String] + + def self.validate!: ( + Scrapegraphai::Internal::Transport::BaseClient::request_components req + ) -> void + + def self.should_retry?: ( + Integer status, + headers: ::Hash[String, String] + ) -> bool + + def self.follow_redirect: ( + Scrapegraphai::Internal::Transport::BaseClient::request_input request, + status: Integer, + response_headers: ::Hash[String, String] + ) -> Scrapegraphai::Internal::Transport::BaseClient::request_input + + def self.reap_connection!: ( + Integer | Scrapegraphai::Errors::APIConnectionError status, + stream: Enumerable[String]? + ) -> void + + attr_reader base_url: URI::Generic + + attr_reader timeout: Float + + attr_reader max_retries: Integer + + attr_reader initial_retry_delay: Float + + attr_reader max_retry_delay: Float + + attr_reader headers: ::Hash[String, String] + + attr_reader idempotency_header: String? + + # @api private + attr_reader requester: Scrapegraphai::Internal::Transport::PooledNetRequester + + def initialize: ( + base_url: String, + ?timeout: Float, + ?max_retries: Integer, + ?initial_retry_delay: Float, + ?max_retry_delay: Float, + ?headers: ::Hash[String, (String + | Integer + | ::Array[(String | Integer)?])?], + ?idempotency_header: String? + ) -> void + + private def auth_headers: -> ::Hash[String, String] + + private def generate_idempotency_key: -> String + + private def build_request: ( + Scrapegraphai::Internal::Transport::BaseClient::request_components req, + Scrapegraphai::request_options opts + ) -> Scrapegraphai::Internal::Transport::BaseClient::request_input + + private def retry_delay: ( + ::Hash[String, String] headers, + retry_count: Integer + ) -> Float + + def send_request: ( + Scrapegraphai::Internal::Transport::BaseClient::request_input request, + redirect_count: Integer, + retry_count: Integer, + send_retry_header: bool + ) -> [Integer, top, Enumerable[String]] + + def request: ( + Symbol method, + String | ::Array[String] path, + ?query: ::Hash[String, (::Array[String] | String)?]?, + ?headers: ::Hash[String, (String + | Integer + | ::Array[(String | Integer)?])?]?, + ?body: top?, + ?unwrap: (Symbol + | Integer + | ::Array[(Symbol | Integer)] + | (^(top arg0) -> top))?, + ?page: Class?, + ?stream: Class?, + ?model: Scrapegraphai::Internal::Type::Converter::input?, + ?options: Scrapegraphai::request_opts? + ) -> top + + def inspect: -> String + end + end + end +end diff --git a/sig/scrapegraphai/internal/transport/pooled_net_requester.rbs b/sig/scrapegraphai/internal/transport/pooled_net_requester.rbs new file mode 100644 index 0000000..330a0ca --- /dev/null +++ b/sig/scrapegraphai/internal/transport/pooled_net_requester.rbs @@ -0,0 +1,45 @@ +module Scrapegraphai + module Internal + module Transport + class PooledNetRequester + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + type request = + { + method: Symbol, + url: URI::Generic, + headers: ::Hash[String, String], + body: top, + deadline: Float + } + + KEEP_ALIVE_TIMEOUT: 30 + + DEFAULT_MAX_CONNECTIONS: Integer + + def self.connect: (URI::Generic url) -> top + + def self.calibrate_socket_timeout: (top conn, Float deadline) -> void + + def self.build_request: ( + Scrapegraphai::Internal::Transport::PooledNetRequester::request request + ) { + (String arg0) -> void + } -> [top, (^-> void)] + + private def with_pool: ( + URI::Generic url, + deadline: Float + ) { + (top arg0) -> void + } -> void + + def execute: ( + Scrapegraphai::Internal::Transport::PooledNetRequester::request request + ) -> [Integer, top, Enumerable[String]] + + def initialize: (?size: Integer) -> void + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/array_of.rbs b/sig/scrapegraphai/internal/type/array_of.rbs new file mode 100644 index 0000000..d3fcca4 --- /dev/null +++ b/sig/scrapegraphai/internal/type/array_of.rbs @@ -0,0 +1,48 @@ +module Scrapegraphai + module Internal + module Type + class ArrayOf[Elem] + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + def self.[]: ( + ::Hash[Symbol, top] + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input type_info, + ?::Hash[Symbol, top] spec + ) -> instance + + def ===: (top other) -> bool + + def ==: (top other) -> bool + + def hash: -> Integer + + def coerce: ( + ::Array[top] | top value, + state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> (::Array[top] | top) + + def dump: ( + ::Array[top] | top value, + state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> (::Array[top] | top) + + def to_sorbet_type: -> top + + def item_type: -> Elem + + def nilable?: -> bool + + def initialize: ( + ::Hash[Symbol, top] + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input type_info, + ?::Hash[Symbol, top] spec + ) -> void + + def inspect: (?depth: Integer) -> String + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/base_model.rbs b/sig/scrapegraphai/internal/type/base_model.rbs new file mode 100644 index 0000000..79568bd --- /dev/null +++ b/sig/scrapegraphai/internal/type/base_model.rbs @@ -0,0 +1,104 @@ +module Scrapegraphai + module Internal + module Type + class BaseModel + extend Scrapegraphai::Internal::Type::Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + type known_field = + { mode: (:coerce | :dump)?, required: bool, nilable: bool } + + def self.inherited: (self child) -> void + + def self.known_fields: -> ::Hash[Symbol, (Scrapegraphai::Internal::Type::BaseModel::known_field + & { type_fn: (^-> Scrapegraphai::Internal::Type::Converter::input) })] + + def self.fields: -> ::Hash[Symbol, (Scrapegraphai::Internal::Type::BaseModel::known_field + & { type: Scrapegraphai::Internal::Type::Converter::input })] + + private def self.add_field: ( + Symbol name_sym, + required: bool, + type_info: { + const: (nil | bool | Integer | Float | Symbol)?, + enum: ^-> Scrapegraphai::Internal::Type::Converter::input?, + union: ^-> Scrapegraphai::Internal::Type::Converter::input?, + api_name: Symbol + } + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input, + spec: ::Hash[Symbol, top] + ) -> void + + def self.required: ( + Symbol name_sym, + ::Hash[Symbol, top] + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input type_info, + ?::Hash[Symbol, top] spec + ) -> void + + def self.optional: ( + Symbol name_sym, + ::Hash[Symbol, top] + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input type_info, + ?::Hash[Symbol, top] spec + ) -> void + + private def self.request_only: { -> void } -> void + + private def self.response_only: { -> void } -> void + + def self.==: (top other) -> bool + + def self.hash: -> Integer + + def ==: (top other) -> bool + + def hash: -> Integer + + def self.coerce: ( + Scrapegraphai::Internal::Type::BaseModel + | ::Hash[top, top] + | top value, + state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> (instance | top) + + def self.dump: ( + instance | top value, + state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> (::Hash[top, top] | top) + + def self.to_sorbet_type: -> top + + def self.recursively_to_h: ( + Scrapegraphai::Internal::Type::BaseModel model, + convert: bool + ) -> ::Hash[Symbol, top] + + def []: (Symbol key) -> top? + + def to_h: -> ::Hash[Symbol, top] + + alias to_hash to_h + + def deep_to_h: -> ::Hash[Symbol, top] + + def deconstruct_keys: (::Array[Symbol]? keys) -> ::Hash[Symbol, top] + + def to_json: (*top a) -> String + + def to_yaml: (*top a) -> String + + def initialize: (?::Hash[Symbol, top] | instance data) -> void + + def self.inspect: (?depth: Integer) -> String + + def to_s: -> String + + def inspect: -> String + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/base_page.rbs b/sig/scrapegraphai/internal/type/base_page.rbs new file mode 100644 index 0000000..a5346fa --- /dev/null +++ b/sig/scrapegraphai/internal/type/base_page.rbs @@ -0,0 +1,24 @@ +module Scrapegraphai + module Internal + module Type + module BasePage[Elem] + def next_page?: -> bool + + def next_page: -> instance + + def auto_paging_each: { (Elem arg0) -> void } -> void + + def to_enum: -> Enumerable[Elem] + + alias enum_for to_enum + + def initialize: ( + client: Scrapegraphai::Internal::Transport::BaseClient, + req: Scrapegraphai::Internal::Transport::BaseClient::request_components, + headers: ::Hash[String, String], + page_data: top + ) -> void + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/boolean.rbs b/sig/scrapegraphai/internal/type/boolean.rbs new file mode 100644 index 0000000..60d46a5 --- /dev/null +++ b/sig/scrapegraphai/internal/type/boolean.rbs @@ -0,0 +1,26 @@ +module Scrapegraphai + module Internal + module Type + class Boolean + extend Scrapegraphai::Internal::Type::Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + def self.===: (top other) -> bool + + def self.==: (top other) -> bool + + def self.coerce: ( + bool | top value, + state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> (bool | top) + + def self.dump: ( + bool | top value, + state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> (bool | top) + + def self.to_sorbet_type: -> top + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/converter.rbs b/sig/scrapegraphai/internal/type/converter.rbs new file mode 100644 index 0000000..5737bb0 --- /dev/null +++ b/sig/scrapegraphai/internal/type/converter.rbs @@ -0,0 +1,79 @@ +module Scrapegraphai + module Internal + module Type + module Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + type input = Scrapegraphai::Internal::Type::Converter | Class + + type coerce_state = + { + translate_names: bool, + strictness: bool, + exactness: { yes: Integer, no: Integer, maybe: Integer }, + error: Class, + branched: Integer + } + + type dump_state = { can_retry: bool } + + def coerce: ( + top value, + state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> top + + def dump: ( + top value, + state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> top + + def inspect: (?depth: Integer) -> String + + def self.type_info: ( + { + const: (nil | bool | Integer | Float | Symbol)?, + enum: ^-> Scrapegraphai::Internal::Type::Converter::input?, + union: ^-> Scrapegraphai::Internal::Type::Converter::input? + } + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input spec + ) -> (^-> top) + + def self.meta_info: ( + { + const: (nil | bool | Integer | Float | Symbol)?, + enum: ^-> Scrapegraphai::Internal::Type::Converter::input?, + union: ^-> Scrapegraphai::Internal::Type::Converter::input? + } + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input type_info, + { + const: (nil | bool | Integer | Float | Symbol)?, + enum: ^-> Scrapegraphai::Internal::Type::Converter::input?, + union: ^-> Scrapegraphai::Internal::Type::Converter::input? + } + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input spec + ) -> ::Hash[Symbol, top] + + def self.new_coerce_state: ( + ?translate_names: bool + ) -> Scrapegraphai::Internal::Type::Converter::coerce_state + + def self.coerce: ( + Scrapegraphai::Internal::Type::Converter::input target, + top value, + ?state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> top + + def self.dump: ( + Scrapegraphai::Internal::Type::Converter::input target, + top value, + ?state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> top + + def self.inspect: (top target, depth: Integer) -> String + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/enum.rbs b/sig/scrapegraphai/internal/type/enum.rbs new file mode 100644 index 0000000..ce86168 --- /dev/null +++ b/sig/scrapegraphai/internal/type/enum.rbs @@ -0,0 +1,32 @@ +module Scrapegraphai + module Internal + module Type + module Enum + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + def self.values: -> ::Array[(nil | bool | Integer | Float | Symbol)] + + def ===: (top other) -> bool + + def ==: (top other) -> bool + + def hash: -> Integer + + def coerce: ( + String | Symbol | top value, + state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> (Symbol | top) + + def dump: ( + Symbol | top value, + state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> (Symbol | top) + + def to_sorbet_type: -> top + + def inspect: (?depth: Integer) -> String + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/file_input.rbs b/sig/scrapegraphai/internal/type/file_input.rbs new file mode 100644 index 0000000..b441571 --- /dev/null +++ b/sig/scrapegraphai/internal/type/file_input.rbs @@ -0,0 +1,25 @@ +module Scrapegraphai + module Internal + module Type + class FileInput + extend Scrapegraphai::Internal::Type::Converter + + def self.===: (top other) -> bool + + def self.==: (top other) -> bool + + def self.coerce: ( + StringIO | String | top value, + state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> (StringIO | top) + + def self.dump: ( + Pathname | StringIO | IO | String | top value, + state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> (Pathname | StringIO | IO | String | top) + + def self.to_sorbet_type: -> top + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/hash_of.rbs b/sig/scrapegraphai/internal/type/hash_of.rbs new file mode 100644 index 0000000..3cc1085 --- /dev/null +++ b/sig/scrapegraphai/internal/type/hash_of.rbs @@ -0,0 +1,48 @@ +module Scrapegraphai + module Internal + module Type + class HashOf[Elem] + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + def self.[]: ( + ::Hash[Symbol, top] + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input type_info, + ?::Hash[Symbol, top] spec + ) -> instance + + def ===: (top other) -> bool + + def ==: (top other) -> bool + + def hash: -> Integer + + def coerce: ( + ::Hash[top, top] | top value, + state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> (::Hash[Symbol, top] | top) + + def dump: ( + ::Hash[top, top] | top value, + state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> (::Hash[Symbol, top] | top) + + def to_sorbet_type: -> top + + def item_type: -> Elem + + def nilable?: -> bool + + def initialize: ( + ::Hash[Symbol, top] + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input type_info, + ?::Hash[Symbol, top] spec + ) -> void + + def inspect: (?depth: Integer) -> String + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/request_parameters.rbs b/sig/scrapegraphai/internal/type/request_parameters.rbs new file mode 100644 index 0000000..85b3494 --- /dev/null +++ b/sig/scrapegraphai/internal/type/request_parameters.rbs @@ -0,0 +1,19 @@ +module Scrapegraphai + module Internal + module Type + type request_parameters = { request_options: Scrapegraphai::request_opts } + + module RequestParameters + attr_reader request_options: Scrapegraphai::request_opts + + def request_options=: ( + Scrapegraphai::request_opts + ) -> Scrapegraphai::request_opts + + module Converter + def dump_request: (top params) -> [top, ::Hash[Symbol, top]] + end + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/union.rbs b/sig/scrapegraphai/internal/type/union.rbs new file mode 100644 index 0000000..a0d2969 --- /dev/null +++ b/sig/scrapegraphai/internal/type/union.rbs @@ -0,0 +1,52 @@ +module Scrapegraphai + module Internal + module Type + module Union + include Scrapegraphai::Internal::Type::Converter + include Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + private def self.known_variants: -> ::Array[[Symbol?, (^-> Scrapegraphai::Internal::Type::Converter::input), ::Hash[Symbol, top]]] + + def self.derefed_variants: -> ::Array[[Symbol?, top, ::Hash[Symbol, top]]] + + def self.variants: -> ::Array[top] + + private def self.discriminator: (Symbol property) -> void + + private def self.variant: ( + Symbol + | ::Hash[Symbol, top] + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input key, + ?::Hash[Symbol, top] + | ^-> Scrapegraphai::Internal::Type::Converter::input + | Scrapegraphai::Internal::Type::Converter::input spec + ) -> void + + private def self.resolve_variant: ( + top value + ) -> Scrapegraphai::Internal::Type::Converter::input? + + def ===: (top other) -> bool + + def ==: (top other) -> bool + + def hash: -> Integer + + def coerce: ( + top value, + state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> top + + def dump: ( + top value, + state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> top + + def to_sorbet_type: -> top + + def inspect: (?depth: Integer) -> String + end + end + end +end diff --git a/sig/scrapegraphai/internal/type/unknown.rbs b/sig/scrapegraphai/internal/type/unknown.rbs new file mode 100644 index 0000000..c079947 --- /dev/null +++ b/sig/scrapegraphai/internal/type/unknown.rbs @@ -0,0 +1,26 @@ +module Scrapegraphai + module Internal + module Type + class Unknown + extend Scrapegraphai::Internal::Type::Converter + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + def self.===: (top other) -> bool + + def self.==: (top other) -> bool + + def self.coerce: ( + top value, + state: Scrapegraphai::Internal::Type::Converter::coerce_state + ) -> top + + def self.dump: ( + top value, + state: Scrapegraphai::Internal::Type::Converter::dump_state + ) -> top + + def self.to_sorbet_type: -> top + end + end + end +end diff --git a/sig/scrapegraphai/internal/util.rbs b/sig/scrapegraphai/internal/util.rbs new file mode 100644 index 0000000..5cd1dc9 --- /dev/null +++ b/sig/scrapegraphai/internal/util.rbs @@ -0,0 +1,185 @@ +module Scrapegraphai + module Internal + module Util + extend Scrapegraphai::Internal::Util::SorbetRuntimeSupport + + def self?.monotonic_secs: -> Float + + def self?.walk_namespaces: ( + Module | Class ns + ) -> Enumerable[(Module | Class)] + + def self?.arch: -> String + + def self?.os: -> String + + def self?.primitive?: (top input) -> bool + + def self?.coerce_boolean: (String | bool input) -> (bool | top) + + def self?.coerce_boolean!: (String | bool input) -> bool? + + def self?.coerce_integer: (String | Integer input) -> (Integer | top) + + def self?.coerce_float: (String | Integer | Float input) -> (Float | top) + + def self?.coerce_hash: (top input) -> (::Hash[top, top] | top) + + def self?.coerce_hash!: (top input) -> ::Hash[top, top]? + + def self?.deep_merge_lr: (top lhs, top rhs, ?concat: bool) -> top + + def self?.deep_merge: ( + *::Array[top] values, + ?sentinel: top?, + ?concat: bool + ) -> top + + def self?.dig: ( + ::Hash[Symbol, top] | ::Array[top] | top data, + (Symbol + | Integer + | ::Array[(Symbol | Integer)] + | (^(top arg0) -> top))? pick + ) { + -> top? + } -> top? + + def self?.uri_origin: (URI::Generic uri) -> String + + def self?.interpolate_path: (String | ::Array[String] path) -> String + + def self?.decode_query: (String? query) -> ::Hash[String, ::Array[String]] + + def self?.encode_query: ( + ::Hash[String, (::Array[String] | String)?]? query + ) -> String? + + type parsed_uri = + { + scheme: String?, + host: String?, + port: Integer?, + path: String?, + query: ::Hash[String, ::Array[String]] + } + + def self?.parse_uri: ( + URI::Generic | String url + ) -> Scrapegraphai::Internal::Util::parsed_uri + + def self?.unparse_uri: ( + Scrapegraphai::Internal::Util::parsed_uri parsed + ) -> URI::Generic + + def self?.join_parsed_uri: ( + Scrapegraphai::Internal::Util::parsed_uri lhs, + Scrapegraphai::Internal::Util::parsed_uri rhs + ) -> URI::Generic + + def self?.normalized_headers: ( + *::Hash[String, (String + | Integer + | ::Array[(String | Integer)?])?] headers + ) -> ::Hash[String, String] + + class ReadIOAdapter + def close?: -> bool? + + def close: -> void + + private def read_enum: (Integer? max_len) -> String + + def read: (?Integer? max_len, ?String? out_string) -> String? + + def initialize: ( + String | Pathname | StringIO | Enumerable[String] src + ) { + (String arg0) -> void + } -> void + end + + def self?.writable_enum: { + (Enumerator::Yielder y) -> void + } -> Enumerable[String] + + JSON_CONTENT: Regexp + JSONL_CONTENT: Regexp + + def self?.write_multipart_content: ( + Enumerator::Yielder y, + val: top, + closing: ::Array[^-> void], + ?content_type: String? + ) -> void + + def self?.write_multipart_chunk: ( + Enumerator::Yielder y, + boundary: String, + key: Symbol | String, + val: top, + closing: ::Array[^-> void] + ) -> void + + def self?.encode_multipart_streaming: ( + top body + ) -> [String, Enumerable[String]] + + def self?.encode_content: ( + ::Hash[String, String] headers, + top body + ) -> top + + def self?.force_charset!: (String content_type, text: String) -> void + + def self?.decode_content: ( + ::Hash[String, String] headers, + stream: Enumerable[String], + ?suppress_error: bool + ) -> top + + def self?.fused_enum: ( + Enumerable[top] enum, + ?external: bool + ) { + -> void + } -> Enumerable[top] + + def self?.close_fused!: (Enumerable[top]? enum) -> void + + def self?.chain_fused: ( + Enumerable[top]? enum + ) { + (Enumerator::Yielder arg0) -> void + } -> Enumerable[top] + + type server_sent_event = + { event: String?, data: String?, id: String?, retry: Integer? } + + def self?.decode_lines: (Enumerable[String] enum) -> Enumerable[String] + + def self?.decode_sse: ( + Enumerable[String] lines + ) -> Enumerable[Scrapegraphai::Internal::Util::server_sent_event] + + module SorbetRuntimeSupport + class MissingSorbetRuntimeError < ::RuntimeError + end + + private def sorbet_runtime_constants: -> ::Hash[Symbol, top] + + def const_missing: (Symbol name) -> void + + def sorbet_constant_defined?: (Symbol name) -> bool + + def define_sorbet_constant!: (Symbol name) { -> top } -> void + + def to_sorbet_type: -> top + + def self.to_sorbet_type: ( + Scrapegraphai::Internal::Util::SorbetRuntimeSupport | top `type` + ) -> top + end + end + end +end diff --git a/sig/scrapegraphai/models.rbs b/sig/scrapegraphai/models.rbs new file mode 100644 index 0000000..d671565 --- /dev/null +++ b/sig/scrapegraphai/models.rbs @@ -0,0 +1,39 @@ +module Scrapegraphai + class CompletedMarkdownify = Scrapegraphai::Models::CompletedMarkdownify + + class CompletedSearchScraper = Scrapegraphai::Models::CompletedSearchScraper + + class CompletedSmartscraper = Scrapegraphai::Models::CompletedSmartscraper + + class CrawlRetrieveResultsParams = Scrapegraphai::Models::CrawlRetrieveResultsParams + + class CrawlStartParams = Scrapegraphai::Models::CrawlStartParams + + class CreditRetrieveParams = Scrapegraphai::Models::CreditRetrieveParams + + class FailedSmartscraper = Scrapegraphai::Models::FailedSmartscraper + + class FeedbackSubmitParams = Scrapegraphai::Models::FeedbackSubmitParams + + class GenerateSchemaCreateParams = Scrapegraphai::Models::GenerateSchemaCreateParams + + class GenerateSchemaRetrieveParams = Scrapegraphai::Models::GenerateSchemaRetrieveParams + + class HealthzCheckParams = Scrapegraphai::Models::HealthzCheckParams + + class MarkdownifyConvertParams = Scrapegraphai::Models::MarkdownifyConvertParams + + class MarkdownifyRetrieveStatusParams = Scrapegraphai::Models::MarkdownifyRetrieveStatusParams + + class SearchscraperCreateParams = Scrapegraphai::Models::SearchscraperCreateParams + + class SearchscraperRetrieveStatusParams = Scrapegraphai::Models::SearchscraperRetrieveStatusParams + + class SmartscraperCreateParams = Scrapegraphai::Models::SmartscraperCreateParams + + class SmartscraperListParams = Scrapegraphai::Models::SmartscraperListParams + + class SmartscraperRetrieveParams = Scrapegraphai::Models::SmartscraperRetrieveParams + + class ValidateAPIKeyParams = Scrapegraphai::Models::ValidateAPIKeyParams +end diff --git a/sig/scrapegraphai/models/completed_markdownify.rbs b/sig/scrapegraphai/models/completed_markdownify.rbs new file mode 100644 index 0000000..e9a5c29 --- /dev/null +++ b/sig/scrapegraphai/models/completed_markdownify.rbs @@ -0,0 +1,62 @@ +module Scrapegraphai + module Models + type completed_markdownify = + { + error: String, + request_id: String, + result: String?, + status: Scrapegraphai::Models::CompletedMarkdownify::status, + website_url: String + } + + class CompletedMarkdownify < Scrapegraphai::Internal::Type::BaseModel + attr_reader error: String? + + def error=: (String) -> String + + attr_reader request_id: String? + + def request_id=: (String) -> String + + attr_accessor result: String? + + attr_reader status: Scrapegraphai::Models::CompletedMarkdownify::status? + + def status=: ( + Scrapegraphai::Models::CompletedMarkdownify::status + ) -> Scrapegraphai::Models::CompletedMarkdownify::status + + attr_reader website_url: String? + + def website_url=: (String) -> String + + def initialize: ( + ?error: String, + ?request_id: String, + ?result: String?, + ?status: Scrapegraphai::Models::CompletedMarkdownify::status, + ?website_url: String + ) -> void + + def to_hash: -> { + error: String, + request_id: String, + result: String?, + status: Scrapegraphai::Models::CompletedMarkdownify::status, + website_url: String + } + + type status = :queued | :processing | :completed + + module Status + extend Scrapegraphai::Internal::Type::Enum + + QUEUED: :queued + PROCESSING: :processing + COMPLETED: :completed + + def self?.values: -> ::Array[Scrapegraphai::Models::CompletedMarkdownify::status] + end + end + end +end diff --git a/sig/scrapegraphai/models/completed_search_scraper.rbs b/sig/scrapegraphai/models/completed_search_scraper.rbs new file mode 100644 index 0000000..754b4cc --- /dev/null +++ b/sig/scrapegraphai/models/completed_search_scraper.rbs @@ -0,0 +1,76 @@ +module Scrapegraphai + module Models + type completed_search_scraper = + { + error: String?, + num_results: Integer, + reference_urls: ::Array[String], + request_id: String, + result: top, + status: Scrapegraphai::Models::CompletedSearchScraper::status, + user_prompt: String + } + + class CompletedSearchScraper < Scrapegraphai::Internal::Type::BaseModel + attr_accessor error: String? + + attr_reader num_results: Integer? + + def num_results=: (Integer) -> Integer + + attr_reader reference_urls: ::Array[String]? + + def reference_urls=: (::Array[String]) -> ::Array[String] + + attr_reader request_id: String? + + def request_id=: (String) -> String + + attr_reader result: top? + + def result=: (top) -> top + + attr_reader status: Scrapegraphai::Models::CompletedSearchScraper::status? + + def status=: ( + Scrapegraphai::Models::CompletedSearchScraper::status + ) -> Scrapegraphai::Models::CompletedSearchScraper::status + + attr_reader user_prompt: String? + + def user_prompt=: (String) -> String + + def initialize: ( + ?error: String?, + ?num_results: Integer, + ?reference_urls: ::Array[String], + ?request_id: String, + ?result: top, + ?status: Scrapegraphai::Models::CompletedSearchScraper::status, + ?user_prompt: String + ) -> void + + def to_hash: -> { + error: String?, + num_results: Integer, + reference_urls: ::Array[String], + request_id: String, + result: top, + status: Scrapegraphai::Models::CompletedSearchScraper::status, + user_prompt: String + } + + type status = :queued | :processing | :completed + + module Status + extend Scrapegraphai::Internal::Type::Enum + + QUEUED: :queued + PROCESSING: :processing + COMPLETED: :completed + + def self?.values: -> ::Array[Scrapegraphai::Models::CompletedSearchScraper::status] + end + end + end +end diff --git a/sig/scrapegraphai/models/completed_smartscraper.rbs b/sig/scrapegraphai/models/completed_smartscraper.rbs new file mode 100644 index 0000000..e04f6eb --- /dev/null +++ b/sig/scrapegraphai/models/completed_smartscraper.rbs @@ -0,0 +1,67 @@ +module Scrapegraphai + module Models + type completed_smartscraper = + { + error: String, + request_id: String, + result: top?, + status: Scrapegraphai::Models::CompletedSmartscraper::status, + user_prompt: String, + website_url: String? + } + + class CompletedSmartscraper < Scrapegraphai::Internal::Type::BaseModel + attr_reader error: String? + + def error=: (String) -> String + + attr_reader request_id: String? + + def request_id=: (String) -> String + + attr_accessor result: top? + + attr_reader status: Scrapegraphai::Models::CompletedSmartscraper::status? + + def status=: ( + Scrapegraphai::Models::CompletedSmartscraper::status + ) -> Scrapegraphai::Models::CompletedSmartscraper::status + + attr_reader user_prompt: String? + + def user_prompt=: (String) -> String + + attr_accessor website_url: String? + + def initialize: ( + ?error: String, + ?request_id: String, + ?result: top?, + ?status: Scrapegraphai::Models::CompletedSmartscraper::status, + ?user_prompt: String, + ?website_url: String? + ) -> void + + def to_hash: -> { + error: String, + request_id: String, + result: top?, + status: Scrapegraphai::Models::CompletedSmartscraper::status, + user_prompt: String, + website_url: String? + } + + type status = :queued | :processing | :completed + + module Status + extend Scrapegraphai::Internal::Type::Enum + + QUEUED: :queued + PROCESSING: :processing + COMPLETED: :completed + + def self?.values: -> ::Array[Scrapegraphai::Models::CompletedSmartscraper::status] + end + end + end +end diff --git a/sig/scrapegraphai/models/crawl_retrieve_results_params.rbs b/sig/scrapegraphai/models/crawl_retrieve_results_params.rbs new file mode 100644 index 0000000..1e35886 --- /dev/null +++ b/sig/scrapegraphai/models/crawl_retrieve_results_params.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type crawl_retrieve_results_params = + { } & Scrapegraphai::Internal::Type::request_parameters + + class CrawlRetrieveResultsParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + def initialize: (?request_options: Scrapegraphai::request_opts) -> void + + def to_hash: -> { request_options: Scrapegraphai::RequestOptions } + end + end +end diff --git a/sig/scrapegraphai/models/crawl_retrieve_results_response.rbs b/sig/scrapegraphai/models/crawl_retrieve_results_response.rbs new file mode 100644 index 0000000..ff0a30e --- /dev/null +++ b/sig/scrapegraphai/models/crawl_retrieve_results_response.rbs @@ -0,0 +1,69 @@ +module Scrapegraphai + module Models + type crawl_retrieve_results_response = + { + result: Scrapegraphai::Models::CrawlRetrieveResultsResponse::result, + status: Scrapegraphai::Models::CrawlRetrieveResultsResponse::status, + task_id: String, + traceback: String? + } + + class CrawlRetrieveResultsResponse < Scrapegraphai::Internal::Type::BaseModel + attr_reader result: Scrapegraphai::Models::CrawlRetrieveResultsResponse::result? + + def result=: ( + Scrapegraphai::Models::CrawlRetrieveResultsResponse::result + ) -> Scrapegraphai::Models::CrawlRetrieveResultsResponse::result + + attr_reader status: Scrapegraphai::Models::CrawlRetrieveResultsResponse::status? + + def status=: ( + Scrapegraphai::Models::CrawlRetrieveResultsResponse::status + ) -> Scrapegraphai::Models::CrawlRetrieveResultsResponse::status + + attr_reader task_id: String? + + def task_id=: (String) -> String + + attr_accessor traceback: String? + + def initialize: ( + ?result: Scrapegraphai::Models::CrawlRetrieveResultsResponse::result, + ?status: Scrapegraphai::Models::CrawlRetrieveResultsResponse::status, + ?task_id: String, + ?traceback: String? + ) -> void + + def to_hash: -> { + result: Scrapegraphai::Models::CrawlRetrieveResultsResponse::result, + status: Scrapegraphai::Models::CrawlRetrieveResultsResponse::status, + task_id: String, + traceback: String? + } + + type result = top | String + + module Result + extend Scrapegraphai::Internal::Type::Union + + def self?.variants: -> ::Array[Scrapegraphai::Models::CrawlRetrieveResultsResponse::result] + end + + type status = + :PENDING | :STARTED | :SUCCESS | :FAILURE | :RETRY | :REVOKED + + module Status + extend Scrapegraphai::Internal::Type::Enum + + PENDING: :PENDING + STARTED: :STARTED + SUCCESS: :SUCCESS + FAILURE: :FAILURE + RETRY: :RETRY + REVOKED: :REVOKED + + def self?.values: -> ::Array[Scrapegraphai::Models::CrawlRetrieveResultsResponse::status] + end + end + end +end diff --git a/sig/scrapegraphai/models/crawl_start_params.rbs b/sig/scrapegraphai/models/crawl_start_params.rbs new file mode 100644 index 0000000..d241529 --- /dev/null +++ b/sig/scrapegraphai/models/crawl_start_params.rbs @@ -0,0 +1,96 @@ +module Scrapegraphai + module Models + type crawl_start_params = + { + url: String, + depth: Integer, + extraction_mode: bool, + max_pages: Integer, + prompt: String?, + render_heavy_js: bool, + rules: Scrapegraphai::CrawlStartParams::Rules, + schema: top?, + sitemap: bool + } + & Scrapegraphai::Internal::Type::request_parameters + + class CrawlStartParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + attr_accessor url: String + + attr_reader depth: Integer? + + def depth=: (Integer) -> Integer + + attr_reader extraction_mode: bool? + + def extraction_mode=: (bool) -> bool + + attr_reader max_pages: Integer? + + def max_pages=: (Integer) -> Integer + + attr_accessor prompt: String? + + attr_reader render_heavy_js: bool? + + def render_heavy_js=: (bool) -> bool + + attr_reader rules: Scrapegraphai::CrawlStartParams::Rules? + + def rules=: ( + Scrapegraphai::CrawlStartParams::Rules + ) -> Scrapegraphai::CrawlStartParams::Rules + + attr_accessor schema: top? + + attr_reader sitemap: bool? + + def sitemap=: (bool) -> bool + + def initialize: ( + url: String, + ?depth: Integer, + ?extraction_mode: bool, + ?max_pages: Integer, + ?prompt: String?, + ?render_heavy_js: bool, + ?rules: Scrapegraphai::CrawlStartParams::Rules, + ?schema: top?, + ?sitemap: bool, + ?request_options: Scrapegraphai::request_opts + ) -> void + + def to_hash: -> { + url: String, + depth: Integer, + extraction_mode: bool, + max_pages: Integer, + prompt: String?, + render_heavy_js: bool, + rules: Scrapegraphai::CrawlStartParams::Rules, + schema: top?, + sitemap: bool, + request_options: Scrapegraphai::RequestOptions + } + + type rules = { exclude: ::Array[String], same_domain: bool } + + class Rules < Scrapegraphai::Internal::Type::BaseModel + attr_reader exclude: ::Array[String]? + + def exclude=: (::Array[String]) -> ::Array[String] + + attr_reader same_domain: bool? + + def same_domain=: (bool) -> bool + + def initialize: (?exclude: ::Array[String], ?same_domain: bool) -> void + + def to_hash: -> { exclude: ::Array[String], same_domain: bool } + end + end + end +end diff --git a/sig/scrapegraphai/models/crawl_start_response.rbs b/sig/scrapegraphai/models/crawl_start_response.rbs new file mode 100644 index 0000000..ba0e578 --- /dev/null +++ b/sig/scrapegraphai/models/crawl_start_response.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type crawl_start_response = { task_id: String } + + class CrawlStartResponse < Scrapegraphai::Internal::Type::BaseModel + attr_reader task_id: String? + + def task_id=: (String) -> String + + def initialize: (?task_id: String) -> void + + def to_hash: -> { task_id: String } + end + end +end diff --git a/sig/scrapegraphai/models/credit_retrieve_params.rbs b/sig/scrapegraphai/models/credit_retrieve_params.rbs new file mode 100644 index 0000000..ea63de2 --- /dev/null +++ b/sig/scrapegraphai/models/credit_retrieve_params.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type credit_retrieve_params = + { } & Scrapegraphai::Internal::Type::request_parameters + + class CreditRetrieveParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + def initialize: (?request_options: Scrapegraphai::request_opts) -> void + + def to_hash: -> { request_options: Scrapegraphai::RequestOptions } + end + end +end diff --git a/sig/scrapegraphai/models/credit_retrieve_response.rbs b/sig/scrapegraphai/models/credit_retrieve_response.rbs new file mode 100644 index 0000000..79b3342 --- /dev/null +++ b/sig/scrapegraphai/models/credit_retrieve_response.rbs @@ -0,0 +1,26 @@ +module Scrapegraphai + module Models + type credit_retrieve_response = + { remaining_credits: Integer, total_credits_used: Integer } + + class CreditRetrieveResponse < Scrapegraphai::Internal::Type::BaseModel + attr_reader remaining_credits: Integer? + + def remaining_credits=: (Integer) -> Integer + + attr_reader total_credits_used: Integer? + + def total_credits_used=: (Integer) -> Integer + + def initialize: ( + ?remaining_credits: Integer, + ?total_credits_used: Integer + ) -> void + + def to_hash: -> { + remaining_credits: Integer, + total_credits_used: Integer + } + end + end +end diff --git a/sig/scrapegraphai/models/failed_smartscraper.rbs b/sig/scrapegraphai/models/failed_smartscraper.rbs new file mode 100644 index 0000000..ed72b35 --- /dev/null +++ b/sig/scrapegraphai/models/failed_smartscraper.rbs @@ -0,0 +1,65 @@ +module Scrapegraphai + module Models + type failed_smartscraper = + { + error: String, + request_id: String, + result: top?, + status: Scrapegraphai::Models::FailedSmartscraper::status, + user_prompt: String, + website_url: String? + } + + class FailedSmartscraper < Scrapegraphai::Internal::Type::BaseModel + attr_reader error: String? + + def error=: (String) -> String + + attr_reader request_id: String? + + def request_id=: (String) -> String + + attr_accessor result: top? + + attr_reader status: Scrapegraphai::Models::FailedSmartscraper::status? + + def status=: ( + Scrapegraphai::Models::FailedSmartscraper::status + ) -> Scrapegraphai::Models::FailedSmartscraper::status + + attr_reader user_prompt: String? + + def user_prompt=: (String) -> String + + attr_accessor website_url: String? + + def initialize: ( + ?error: String, + ?request_id: String, + ?result: top?, + ?status: Scrapegraphai::Models::FailedSmartscraper::status, + ?user_prompt: String, + ?website_url: String? + ) -> void + + def to_hash: -> { + error: String, + request_id: String, + result: top?, + status: Scrapegraphai::Models::FailedSmartscraper::status, + user_prompt: String, + website_url: String? + } + + type status = :failed + + module Status + extend Scrapegraphai::Internal::Type::Enum + + FAILED: :failed + + def self?.values: -> ::Array[Scrapegraphai::Models::FailedSmartscraper::status] + end + end + end +end diff --git a/sig/scrapegraphai/models/feedback_submit_params.rbs b/sig/scrapegraphai/models/feedback_submit_params.rbs new file mode 100644 index 0000000..c5b6b87 --- /dev/null +++ b/sig/scrapegraphai/models/feedback_submit_params.rbs @@ -0,0 +1,32 @@ +module Scrapegraphai + module Models + type feedback_submit_params = + { rating: Integer, request_id: String, feedback_text: String? } + & Scrapegraphai::Internal::Type::request_parameters + + class FeedbackSubmitParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + attr_accessor rating: Integer + + attr_accessor request_id: String + + attr_accessor feedback_text: String? + + def initialize: ( + rating: Integer, + request_id: String, + ?feedback_text: String?, + ?request_options: Scrapegraphai::request_opts + ) -> void + + def to_hash: -> { + rating: Integer, + request_id: String, + feedback_text: String?, + request_options: Scrapegraphai::RequestOptions + } + end + end +end diff --git a/sig/scrapegraphai/models/feedback_submit_response.rbs b/sig/scrapegraphai/models/feedback_submit_response.rbs new file mode 100644 index 0000000..515a7df --- /dev/null +++ b/sig/scrapegraphai/models/feedback_submit_response.rbs @@ -0,0 +1,43 @@ +module Scrapegraphai + module Models + type feedback_submit_response = + { + feedback_id: String, + feedback_timestamp: Time, + message: String, + request_id: String + } + + class FeedbackSubmitResponse < Scrapegraphai::Internal::Type::BaseModel + attr_reader feedback_id: String? + + def feedback_id=: (String) -> String + + attr_reader feedback_timestamp: Time? + + def feedback_timestamp=: (Time) -> Time + + attr_reader message: String? + + def message=: (String) -> String + + attr_reader request_id: String? + + def request_id=: (String) -> String + + def initialize: ( + ?feedback_id: String, + ?feedback_timestamp: Time, + ?message: String, + ?request_id: String + ) -> void + + def to_hash: -> { + feedback_id: String, + feedback_timestamp: Time, + message: String, + request_id: String + } + end + end +end diff --git a/sig/scrapegraphai/models/generate_schema_create_params.rbs b/sig/scrapegraphai/models/generate_schema_create_params.rbs new file mode 100644 index 0000000..a8a9bd9 --- /dev/null +++ b/sig/scrapegraphai/models/generate_schema_create_params.rbs @@ -0,0 +1,28 @@ +module Scrapegraphai + module Models + type generate_schema_create_params = + { user_prompt: String, existing_schema: top? } + & Scrapegraphai::Internal::Type::request_parameters + + class GenerateSchemaCreateParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + attr_accessor user_prompt: String + + attr_accessor existing_schema: top? + + def initialize: ( + user_prompt: String, + ?existing_schema: top?, + ?request_options: Scrapegraphai::request_opts + ) -> void + + def to_hash: -> { + user_prompt: String, + existing_schema: top?, + request_options: Scrapegraphai::RequestOptions + } + end + end +end diff --git a/sig/scrapegraphai/models/generate_schema_create_response.rbs b/sig/scrapegraphai/models/generate_schema_create_response.rbs new file mode 100644 index 0000000..d5ab065 --- /dev/null +++ b/sig/scrapegraphai/models/generate_schema_create_response.rbs @@ -0,0 +1,67 @@ +module Scrapegraphai + module Models + type generate_schema_create_response = + { + error: String?, + generated_schema: top, + refined_prompt: String, + request_id: String, + status: Scrapegraphai::Models::GenerateSchemaCreateResponse::status, + user_prompt: String + } + + class GenerateSchemaCreateResponse < Scrapegraphai::Internal::Type::BaseModel + attr_accessor error: String? + + attr_reader generated_schema: top? + + def generated_schema=: (top) -> top + + attr_reader refined_prompt: String? + + def refined_prompt=: (String) -> String + + attr_reader request_id: String? + + def request_id=: (String) -> String + + attr_reader status: Scrapegraphai::Models::GenerateSchemaCreateResponse::status? + + def status=: ( + Scrapegraphai::Models::GenerateSchemaCreateResponse::status + ) -> Scrapegraphai::Models::GenerateSchemaCreateResponse::status + + attr_reader user_prompt: String? + + def user_prompt=: (String) -> String + + def initialize: ( + ?error: String?, + ?generated_schema: top, + ?refined_prompt: String, + ?request_id: String, + ?status: Scrapegraphai::Models::GenerateSchemaCreateResponse::status, + ?user_prompt: String + ) -> void + + def to_hash: -> { + error: String?, + generated_schema: top, + refined_prompt: String, + request_id: String, + status: Scrapegraphai::Models::GenerateSchemaCreateResponse::status, + user_prompt: String + } + + type status = :completed + + module Status + extend Scrapegraphai::Internal::Type::Enum + + COMPLETED: :completed + + def self?.values: -> ::Array[Scrapegraphai::Models::GenerateSchemaCreateResponse::status] + end + end + end +end diff --git a/sig/scrapegraphai/models/generate_schema_retrieve_params.rbs b/sig/scrapegraphai/models/generate_schema_retrieve_params.rbs new file mode 100644 index 0000000..f668678 --- /dev/null +++ b/sig/scrapegraphai/models/generate_schema_retrieve_params.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type generate_schema_retrieve_params = + { } & Scrapegraphai::Internal::Type::request_parameters + + class GenerateSchemaRetrieveParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + def initialize: (?request_options: Scrapegraphai::request_opts) -> void + + def to_hash: -> { request_options: Scrapegraphai::RequestOptions } + end + end +end diff --git a/sig/scrapegraphai/models/generate_schema_retrieve_response.rbs b/sig/scrapegraphai/models/generate_schema_retrieve_response.rbs new file mode 100644 index 0000000..c97fe3a --- /dev/null +++ b/sig/scrapegraphai/models/generate_schema_retrieve_response.rbs @@ -0,0 +1,139 @@ +module Scrapegraphai + module Models + type generate_schema_retrieve_response = + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse + | Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse + + module GenerateSchemaRetrieveResponse + extend Scrapegraphai::Internal::Type::Union + + type completed_schema_generation_response = + { + error: String?, + generated_schema: top, + refined_prompt: String, + request_id: String, + status: Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::status, + user_prompt: String + } + + class CompletedSchemaGenerationResponse < Scrapegraphai::Internal::Type::BaseModel + attr_accessor error: String? + + attr_reader generated_schema: top? + + def generated_schema=: (top) -> top + + attr_reader refined_prompt: String? + + def refined_prompt=: (String) -> String + + attr_reader request_id: String? + + def request_id=: (String) -> String + + attr_reader status: Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::status? + + def status=: ( + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::status + ) -> Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::status + + attr_reader user_prompt: String? + + def user_prompt=: (String) -> String + + def initialize: ( + ?error: String?, + ?generated_schema: top, + ?refined_prompt: String, + ?request_id: String, + ?status: Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::status, + ?user_prompt: String + ) -> void + + def to_hash: -> { + error: String?, + generated_schema: top, + refined_prompt: String, + request_id: String, + status: Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::status, + user_prompt: String + } + + type status = :completed + + module Status + extend Scrapegraphai::Internal::Type::Enum + + COMPLETED: :completed + + def self?.values: -> ::Array[Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse::status] + end + end + + type failed_schema_generation_response = + { + error: String, + generated_schema: top?, + refined_prompt: String?, + request_id: String, + status: Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::status, + user_prompt: String + } + + class FailedSchemaGenerationResponse < Scrapegraphai::Internal::Type::BaseModel + attr_reader error: String? + + def error=: (String) -> String + + attr_accessor generated_schema: top? + + attr_accessor refined_prompt: String? + + attr_reader request_id: String? + + def request_id=: (String) -> String + + attr_reader status: Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::status? + + def status=: ( + Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::status + ) -> Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::status + + attr_reader user_prompt: String? + + def user_prompt=: (String) -> String + + def initialize: ( + ?error: String, + ?generated_schema: top?, + ?refined_prompt: String?, + ?request_id: String, + ?status: Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::status, + ?user_prompt: String + ) -> void + + def to_hash: -> { + error: String, + generated_schema: top?, + refined_prompt: String?, + request_id: String, + status: Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::status, + user_prompt: String + } + + type status = :failed + + module Status + extend Scrapegraphai::Internal::Type::Enum + + FAILED: :failed + + def self?.values: -> ::Array[Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse::status] + end + end + + def self?.variants: -> ::Array[Scrapegraphai::Models::generate_schema_retrieve_response] + end + end +end diff --git a/sig/scrapegraphai/models/healthz_check_params.rbs b/sig/scrapegraphai/models/healthz_check_params.rbs new file mode 100644 index 0000000..4383767 --- /dev/null +++ b/sig/scrapegraphai/models/healthz_check_params.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type healthz_check_params = + { } & Scrapegraphai::Internal::Type::request_parameters + + class HealthzCheckParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + def initialize: (?request_options: Scrapegraphai::request_opts) -> void + + def to_hash: -> { request_options: Scrapegraphai::RequestOptions } + end + end +end diff --git a/sig/scrapegraphai/models/healthz_check_response.rbs b/sig/scrapegraphai/models/healthz_check_response.rbs new file mode 100644 index 0000000..cce3a86 --- /dev/null +++ b/sig/scrapegraphai/models/healthz_check_response.rbs @@ -0,0 +1,23 @@ +module Scrapegraphai + module Models + type healthz_check_response = + { services: ::Hash[Symbol, String], status: String } + + class HealthzCheckResponse < Scrapegraphai::Internal::Type::BaseModel + attr_reader services: ::Hash[Symbol, String]? + + def services=: (::Hash[Symbol, String]) -> ::Hash[Symbol, String] + + attr_reader status: String? + + def status=: (String) -> String + + def initialize: ( + ?services: ::Hash[Symbol, String], + ?status: String + ) -> void + + def to_hash: -> { services: ::Hash[Symbol, String], status: String } + end + end +end diff --git a/sig/scrapegraphai/models/markdownify_convert_params.rbs b/sig/scrapegraphai/models/markdownify_convert_params.rbs new file mode 100644 index 0000000..db1faea --- /dev/null +++ b/sig/scrapegraphai/models/markdownify_convert_params.rbs @@ -0,0 +1,40 @@ +module Scrapegraphai + module Models + type markdownify_convert_params = + { + website_url: String, + headers: ::Hash[Symbol, String], + steps: ::Array[String] + } + & Scrapegraphai::Internal::Type::request_parameters + + class MarkdownifyConvertParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + attr_accessor website_url: String + + attr_reader headers: ::Hash[Symbol, String]? + + def headers=: (::Hash[Symbol, String]) -> ::Hash[Symbol, String] + + attr_reader steps: ::Array[String]? + + def steps=: (::Array[String]) -> ::Array[String] + + def initialize: ( + website_url: String, + ?headers: ::Hash[Symbol, String], + ?steps: ::Array[String], + ?request_options: Scrapegraphai::request_opts + ) -> void + + def to_hash: -> { + website_url: String, + headers: ::Hash[Symbol, String], + steps: ::Array[String], + request_options: Scrapegraphai::RequestOptions + } + end + end +end diff --git a/sig/scrapegraphai/models/markdownify_retrieve_status_params.rbs b/sig/scrapegraphai/models/markdownify_retrieve_status_params.rbs new file mode 100644 index 0000000..f093402 --- /dev/null +++ b/sig/scrapegraphai/models/markdownify_retrieve_status_params.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type markdownify_retrieve_status_params = + { } & Scrapegraphai::Internal::Type::request_parameters + + class MarkdownifyRetrieveStatusParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + def initialize: (?request_options: Scrapegraphai::request_opts) -> void + + def to_hash: -> { request_options: Scrapegraphai::RequestOptions } + end + end +end diff --git a/sig/scrapegraphai/models/markdownify_retrieve_status_response.rbs b/sig/scrapegraphai/models/markdownify_retrieve_status_response.rbs new file mode 100644 index 0000000..5a860c7 --- /dev/null +++ b/sig/scrapegraphai/models/markdownify_retrieve_status_response.rbs @@ -0,0 +1,70 @@ +module Scrapegraphai + module Models + type markdownify_retrieve_status_response = + Scrapegraphai::CompletedMarkdownify + | Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse + + module MarkdownifyRetrieveStatusResponse + extend Scrapegraphai::Internal::Type::Union + + type failed_markdownify_response = + { + error: String, + request_id: String, + result: String?, + status: Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::status, + website_url: String + } + + class FailedMarkdownifyResponse < Scrapegraphai::Internal::Type::BaseModel + attr_reader error: String? + + def error=: (String) -> String + + attr_reader request_id: String? + + def request_id=: (String) -> String + + attr_accessor result: String? + + attr_reader status: Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::status? + + def status=: ( + Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::status + ) -> Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::status + + attr_reader website_url: String? + + def website_url=: (String) -> String + + def initialize: ( + ?error: String, + ?request_id: String, + ?result: String?, + ?status: Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::status, + ?website_url: String + ) -> void + + def to_hash: -> { + error: String, + request_id: String, + result: String?, + status: Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::status, + website_url: String + } + + type status = :failed + + module Status + extend Scrapegraphai::Internal::Type::Enum + + FAILED: :failed + + def self?.values: -> ::Array[Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse::status] + end + end + + def self?.variants: -> ::Array[Scrapegraphai::Models::markdownify_retrieve_status_response] + end + end +end diff --git a/sig/scrapegraphai/models/searchscraper_create_params.rbs b/sig/scrapegraphai/models/searchscraper_create_params.rbs new file mode 100644 index 0000000..949e7bb --- /dev/null +++ b/sig/scrapegraphai/models/searchscraper_create_params.rbs @@ -0,0 +1,47 @@ +module Scrapegraphai + module Models + type searchscraper_create_params = + { + user_prompt: String, + headers: ::Hash[Symbol, String], + num_results: Integer, + output_schema: top + } + & Scrapegraphai::Internal::Type::request_parameters + + class SearchscraperCreateParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + attr_accessor user_prompt: String + + attr_reader headers: ::Hash[Symbol, String]? + + def headers=: (::Hash[Symbol, String]) -> ::Hash[Symbol, String] + + attr_reader num_results: Integer? + + def num_results=: (Integer) -> Integer + + attr_reader output_schema: top? + + def output_schema=: (top) -> top + + def initialize: ( + user_prompt: String, + ?headers: ::Hash[Symbol, String], + ?num_results: Integer, + ?output_schema: top, + ?request_options: Scrapegraphai::request_opts + ) -> void + + def to_hash: -> { + user_prompt: String, + headers: ::Hash[Symbol, String], + num_results: Integer, + output_schema: top, + request_options: Scrapegraphai::RequestOptions + } + end + end +end diff --git a/sig/scrapegraphai/models/searchscraper_retrieve_status_params.rbs b/sig/scrapegraphai/models/searchscraper_retrieve_status_params.rbs new file mode 100644 index 0000000..c0bf3a7 --- /dev/null +++ b/sig/scrapegraphai/models/searchscraper_retrieve_status_params.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type searchscraper_retrieve_status_params = + { } & Scrapegraphai::Internal::Type::request_parameters + + class SearchscraperRetrieveStatusParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + def initialize: (?request_options: Scrapegraphai::request_opts) -> void + + def to_hash: -> { request_options: Scrapegraphai::RequestOptions } + end + end +end diff --git a/sig/scrapegraphai/models/searchscraper_retrieve_status_response.rbs b/sig/scrapegraphai/models/searchscraper_retrieve_status_response.rbs new file mode 100644 index 0000000..87010ba --- /dev/null +++ b/sig/scrapegraphai/models/searchscraper_retrieve_status_response.rbs @@ -0,0 +1,84 @@ +module Scrapegraphai + module Models + type searchscraper_retrieve_status_response = + Scrapegraphai::CompletedSearchScraper + | Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse + + module SearchscraperRetrieveStatusResponse + extend Scrapegraphai::Internal::Type::Union + + type failed_search_scraper_response = + { + error: String, + num_results: Integer, + reference_urls: ::Array[String], + request_id: String, + result: top?, + status: Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::status, + user_prompt: String + } + + class FailedSearchScraperResponse < Scrapegraphai::Internal::Type::BaseModel + attr_reader error: String? + + def error=: (String) -> String + + attr_reader num_results: Integer? + + def num_results=: (Integer) -> Integer + + attr_reader reference_urls: ::Array[String]? + + def reference_urls=: (::Array[String]) -> ::Array[String] + + attr_reader request_id: String? + + def request_id=: (String) -> String + + attr_accessor result: top? + + attr_reader status: Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::status? + + def status=: ( + Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::status + ) -> Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::status + + attr_reader user_prompt: String? + + def user_prompt=: (String) -> String + + def initialize: ( + ?error: String, + ?num_results: Integer, + ?reference_urls: ::Array[String], + ?request_id: String, + ?result: top?, + ?status: Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::status, + ?user_prompt: String + ) -> void + + def to_hash: -> { + error: String, + num_results: Integer, + reference_urls: ::Array[String], + request_id: String, + result: top?, + status: Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::status, + user_prompt: String + } + + type status = :failed + + module Status + extend Scrapegraphai::Internal::Type::Enum + + FAILED: :failed + + def self?.values: -> ::Array[Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse::status] + end + end + + def self?.variants: -> ::Array[Scrapegraphai::Models::searchscraper_retrieve_status_response] + end + end +end diff --git a/sig/scrapegraphai/models/smartscraper_create_params.rbs b/sig/scrapegraphai/models/smartscraper_create_params.rbs new file mode 100644 index 0000000..dfa291d --- /dev/null +++ b/sig/scrapegraphai/models/smartscraper_create_params.rbs @@ -0,0 +1,89 @@ +module Scrapegraphai + module Models + type smartscraper_create_params = + { + user_prompt: String, + cookies: ::Hash[Symbol, String], + headers: ::Hash[Symbol, String], + number_of_scrolls: Integer, + output_schema: top, + render_heavy_js: bool, + steps: ::Array[String], + total_pages: Integer, + website_html: String, + website_url: String + } + & Scrapegraphai::Internal::Type::request_parameters + + class SmartscraperCreateParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + attr_accessor user_prompt: String + + attr_reader cookies: ::Hash[Symbol, String]? + + def cookies=: (::Hash[Symbol, String]) -> ::Hash[Symbol, String] + + attr_reader headers: ::Hash[Symbol, String]? + + def headers=: (::Hash[Symbol, String]) -> ::Hash[Symbol, String] + + attr_reader number_of_scrolls: Integer? + + def number_of_scrolls=: (Integer) -> Integer + + attr_reader output_schema: top? + + def output_schema=: (top) -> top + + attr_reader render_heavy_js: bool? + + def render_heavy_js=: (bool) -> bool + + attr_reader steps: ::Array[String]? + + def steps=: (::Array[String]) -> ::Array[String] + + attr_reader total_pages: Integer? + + def total_pages=: (Integer) -> Integer + + attr_reader website_html: String? + + def website_html=: (String) -> String + + attr_reader website_url: String? + + def website_url=: (String) -> String + + def initialize: ( + user_prompt: String, + ?cookies: ::Hash[Symbol, String], + ?headers: ::Hash[Symbol, String], + ?number_of_scrolls: Integer, + ?output_schema: top, + ?render_heavy_js: bool, + ?steps: ::Array[String], + ?total_pages: Integer, + ?website_html: String, + ?website_url: String, + ?request_options: Scrapegraphai::request_opts + ) -> void + + def to_hash: -> { + user_prompt: String, + cookies: ::Hash[Symbol, String], + headers: ::Hash[Symbol, String], + number_of_scrolls: Integer, + output_schema: top, + render_heavy_js: bool, + steps: ::Array[String], + total_pages: Integer, + website_html: String, + website_url: String, + request_options: Scrapegraphai::RequestOptions + } + end + end +end diff --git a/sig/scrapegraphai/models/smartscraper_list_params.rbs b/sig/scrapegraphai/models/smartscraper_list_params.rbs new file mode 100644 index 0000000..f25d8b5 --- /dev/null +++ b/sig/scrapegraphai/models/smartscraper_list_params.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type smartscraper_list_params = + { } & Scrapegraphai::Internal::Type::request_parameters + + class SmartscraperListParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + def initialize: (?request_options: Scrapegraphai::request_opts) -> void + + def to_hash: -> { request_options: Scrapegraphai::RequestOptions } + end + end +end diff --git a/sig/scrapegraphai/models/smartscraper_list_response.rbs b/sig/scrapegraphai/models/smartscraper_list_response.rbs new file mode 100644 index 0000000..6a5eba2 --- /dev/null +++ b/sig/scrapegraphai/models/smartscraper_list_response.rbs @@ -0,0 +1,12 @@ +module Scrapegraphai + module Models + type smartscraper_list_response = + Scrapegraphai::CompletedSmartscraper | Scrapegraphai::FailedSmartscraper + + module SmartscraperListResponse + extend Scrapegraphai::Internal::Type::Union + + def self?.variants: -> ::Array[Scrapegraphai::Models::smartscraper_list_response] + end + end +end diff --git a/sig/scrapegraphai/models/smartscraper_retrieve_params.rbs b/sig/scrapegraphai/models/smartscraper_retrieve_params.rbs new file mode 100644 index 0000000..4d1824c --- /dev/null +++ b/sig/scrapegraphai/models/smartscraper_retrieve_params.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type smartscraper_retrieve_params = + { } & Scrapegraphai::Internal::Type::request_parameters + + class SmartscraperRetrieveParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + def initialize: (?request_options: Scrapegraphai::request_opts) -> void + + def to_hash: -> { request_options: Scrapegraphai::RequestOptions } + end + end +end diff --git a/sig/scrapegraphai/models/smartscraper_retrieve_response.rbs b/sig/scrapegraphai/models/smartscraper_retrieve_response.rbs new file mode 100644 index 0000000..a17b77b --- /dev/null +++ b/sig/scrapegraphai/models/smartscraper_retrieve_response.rbs @@ -0,0 +1,12 @@ +module Scrapegraphai + module Models + type smartscraper_retrieve_response = + Scrapegraphai::CompletedSmartscraper | Scrapegraphai::FailedSmartscraper + + module SmartscraperRetrieveResponse + extend Scrapegraphai::Internal::Type::Union + + def self?.variants: -> ::Array[Scrapegraphai::Models::smartscraper_retrieve_response] + end + end +end diff --git a/sig/scrapegraphai/models/validate_api_key_params.rbs b/sig/scrapegraphai/models/validate_api_key_params.rbs new file mode 100644 index 0000000..593c335 --- /dev/null +++ b/sig/scrapegraphai/models/validate_api_key_params.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type validate_api_key_params = + { } & Scrapegraphai::Internal::Type::request_parameters + + class ValidateAPIKeyParams < Scrapegraphai::Internal::Type::BaseModel + extend Scrapegraphai::Internal::Type::RequestParameters::Converter + include Scrapegraphai::Internal::Type::RequestParameters + + def initialize: (?request_options: Scrapegraphai::request_opts) -> void + + def to_hash: -> { request_options: Scrapegraphai::RequestOptions } + end + end +end diff --git a/sig/scrapegraphai/models/validate_api_key_response.rbs b/sig/scrapegraphai/models/validate_api_key_response.rbs new file mode 100644 index 0000000..7f3bfe1 --- /dev/null +++ b/sig/scrapegraphai/models/validate_api_key_response.rbs @@ -0,0 +1,15 @@ +module Scrapegraphai + module Models + type validate_api_key_response = { email: String } + + class ValidateAPIKeyResponse < Scrapegraphai::Internal::Type::BaseModel + attr_reader email: String? + + def email=: (String) -> String + + def initialize: (?email: String) -> void + + def to_hash: -> { email: String } + end + end +end diff --git a/sig/scrapegraphai/request_options.rbs b/sig/scrapegraphai/request_options.rbs new file mode 100644 index 0000000..ec5499b --- /dev/null +++ b/sig/scrapegraphai/request_options.rbs @@ -0,0 +1,36 @@ +module Scrapegraphai + type request_opts = + Scrapegraphai::RequestOptions + | Scrapegraphai::request_options + | ::Hash[Symbol, top] + + type request_options = + { + idempotency_key: String?, + extra_query: ::Hash[String, (::Array[String] | String)?]?, + extra_headers: ::Hash[String, String?]?, + extra_body: top?, + max_retries: Integer?, + timeout: Float? + } + + class RequestOptions < Scrapegraphai::Internal::Type::BaseModel + def self.validate!: (Scrapegraphai::request_opts opts) -> void + + attr_accessor idempotency_key: String? + + attr_accessor extra_query: ::Hash[String, (::Array[String] | String)?]? + + attr_accessor extra_headers: ::Hash[String, String?]? + + attr_accessor extra_body: top? + + attr_accessor max_retries: Integer? + + attr_accessor timeout: Float? + + def initialize: ( + ?Scrapegraphai::request_options | ::Hash[Symbol, top] values + ) -> void + end +end diff --git a/sig/scrapegraphai/resources/crawl.rbs b/sig/scrapegraphai/resources/crawl.rbs new file mode 100644 index 0000000..30a560d --- /dev/null +++ b/sig/scrapegraphai/resources/crawl.rbs @@ -0,0 +1,25 @@ +module Scrapegraphai + module Resources + class Crawl + def retrieve_results: ( + String task_id, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::CrawlRetrieveResultsResponse + + def start: ( + url: String, + ?depth: Integer, + ?extraction_mode: bool, + ?max_pages: Integer, + ?prompt: String?, + ?render_heavy_js: bool, + ?rules: Scrapegraphai::CrawlStartParams::Rules, + ?schema: top?, + ?sitemap: bool, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::CrawlStartResponse + + def initialize: (client: Scrapegraphai::Client) -> void + end + end +end diff --git a/sig/scrapegraphai/resources/credits.rbs b/sig/scrapegraphai/resources/credits.rbs new file mode 100644 index 0000000..8674c60 --- /dev/null +++ b/sig/scrapegraphai/resources/credits.rbs @@ -0,0 +1,11 @@ +module Scrapegraphai + module Resources + class Credits + def retrieve: ( + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::CreditRetrieveResponse + + def initialize: (client: Scrapegraphai::Client) -> void + end + end +end diff --git a/sig/scrapegraphai/resources/feedback.rbs b/sig/scrapegraphai/resources/feedback.rbs new file mode 100644 index 0000000..317deb0 --- /dev/null +++ b/sig/scrapegraphai/resources/feedback.rbs @@ -0,0 +1,14 @@ +module Scrapegraphai + module Resources + class Feedback + def submit: ( + rating: Integer, + request_id: String, + ?feedback_text: String?, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::FeedbackSubmitResponse + + def initialize: (client: Scrapegraphai::Client) -> void + end + end +end diff --git a/sig/scrapegraphai/resources/generate_schema.rbs b/sig/scrapegraphai/resources/generate_schema.rbs new file mode 100644 index 0000000..300cbd1 --- /dev/null +++ b/sig/scrapegraphai/resources/generate_schema.rbs @@ -0,0 +1,18 @@ +module Scrapegraphai + module Resources + class GenerateSchema + def create: ( + user_prompt: String, + ?existing_schema: top?, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::GenerateSchemaCreateResponse + + def retrieve: ( + String request_id, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::generate_schema_retrieve_response + + def initialize: (client: Scrapegraphai::Client) -> void + end + end +end diff --git a/sig/scrapegraphai/resources/healthz.rbs b/sig/scrapegraphai/resources/healthz.rbs new file mode 100644 index 0000000..bb54879 --- /dev/null +++ b/sig/scrapegraphai/resources/healthz.rbs @@ -0,0 +1,11 @@ +module Scrapegraphai + module Resources + class Healthz + def check: ( + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::HealthzCheckResponse + + def initialize: (client: Scrapegraphai::Client) -> void + end + end +end diff --git a/sig/scrapegraphai/resources/markdownify.rbs b/sig/scrapegraphai/resources/markdownify.rbs new file mode 100644 index 0000000..fa99e5b --- /dev/null +++ b/sig/scrapegraphai/resources/markdownify.rbs @@ -0,0 +1,19 @@ +module Scrapegraphai + module Resources + class Markdownify + def convert: ( + website_url: String, + ?headers: ::Hash[Symbol, String], + ?steps: ::Array[String], + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::CompletedMarkdownify + + def retrieve_status: ( + String request_id, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::markdownify_retrieve_status_response + + def initialize: (client: Scrapegraphai::Client) -> void + end + end +end diff --git a/sig/scrapegraphai/resources/searchscraper.rbs b/sig/scrapegraphai/resources/searchscraper.rbs new file mode 100644 index 0000000..0c3628b --- /dev/null +++ b/sig/scrapegraphai/resources/searchscraper.rbs @@ -0,0 +1,20 @@ +module Scrapegraphai + module Resources + class Searchscraper + def create: ( + user_prompt: String, + ?headers: ::Hash[Symbol, String], + ?num_results: Integer, + ?output_schema: top, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::CompletedSearchScraper + + def retrieve_status: ( + String request_id, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::searchscraper_retrieve_status_response + + def initialize: (client: Scrapegraphai::Client) -> void + end + end +end diff --git a/sig/scrapegraphai/resources/smartscraper.rbs b/sig/scrapegraphai/resources/smartscraper.rbs new file mode 100644 index 0000000..63cac86 --- /dev/null +++ b/sig/scrapegraphai/resources/smartscraper.rbs @@ -0,0 +1,30 @@ +module Scrapegraphai + module Resources + class Smartscraper + def create: ( + user_prompt: String, + ?cookies: ::Hash[Symbol, String], + ?headers: ::Hash[Symbol, String], + ?number_of_scrolls: Integer, + ?output_schema: top, + ?render_heavy_js: bool, + ?steps: ::Array[String], + ?total_pages: Integer, + ?website_html: String, + ?website_url: String, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::CompletedSmartscraper + + def retrieve: ( + String request_id, + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::smartscraper_retrieve_response + + def list: ( + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::smartscraper_list_response + + def initialize: (client: Scrapegraphai::Client) -> void + end + end +end diff --git a/sig/scrapegraphai/resources/validate.rbs b/sig/scrapegraphai/resources/validate.rbs new file mode 100644 index 0000000..601734b --- /dev/null +++ b/sig/scrapegraphai/resources/validate.rbs @@ -0,0 +1,11 @@ +module Scrapegraphai + module Resources + class Validate + def api_key: ( + ?request_options: Scrapegraphai::request_opts + ) -> Scrapegraphai::Models::ValidateAPIKeyResponse + + def initialize: (client: Scrapegraphai::Client) -> void + end + end +end diff --git a/sig/scrapegraphai/version.rbs b/sig/scrapegraphai/version.rbs new file mode 100644 index 0000000..22d14c3 --- /dev/null +++ b/sig/scrapegraphai/version.rbs @@ -0,0 +1,3 @@ +module Scrapegraphai + VERSION: String +end diff --git a/sorbet/config b/sorbet/config new file mode 100644 index 0000000..6fe84ed --- /dev/null +++ b/sorbet/config @@ -0,0 +1,2 @@ +--dir=rbi/ +--ignore=test/ diff --git a/sorbet/rbi/.gitignore b/sorbet/rbi/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/sorbet/rbi/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/test/scrapegraphai/client_test.rb b/test/scrapegraphai/client_test.rb new file mode 100644 index 0000000..ea4f9fb --- /dev/null +++ b/test/scrapegraphai/client_test.rb @@ -0,0 +1,329 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +class ScrapegraphaiTest < Minitest::Test + extend Minitest::Serial + include WebMock::API + + def before_all + super + WebMock.enable! + end + + def setup + super + Thread.current.thread_variable_set(:mock_sleep, []) + end + + def teardown + Thread.current.thread_variable_set(:mock_sleep, nil) + WebMock.reset! + super + end + + def after_all + WebMock.disable! + super + end + + def test_raises_on_unknown_environment + e = assert_raises(ArgumentError) do + Scrapegraphai::Client.new(environment: "wrong") + end + assert_match(/environment must be one of/, e.message) + end + + def test_raises_on_missing_non_nullable_opts + e = assert_raises(ArgumentError) do + Scrapegraphai::Client.new + end + assert_match(/is required/, e.message) + end + + def test_client_default_request_default_retry_attempts + stub_request(:post, "http://localhost/smartscraper").to_return_json(status: 500, body: {}) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + end + + assert_requested(:any, /./, times: 3) + end + + def test_client_given_request_default_retry_attempts + stub_request(:post, "http://localhost/smartscraper").to_return_json(status: 500, body: {}) + + scrapegraphai = + Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key", max_retries: 3) + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + end + + assert_requested(:any, /./, times: 4) + end + + def test_client_default_request_given_retry_attempts + stub_request(:post, "http://localhost/smartscraper").to_return_json(status: 500, body: {}) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {max_retries: 3} + ) + end + + assert_requested(:any, /./, times: 4) + end + + def test_client_given_request_given_retry_attempts + stub_request(:post, "http://localhost/smartscraper").to_return_json(status: 500, body: {}) + + scrapegraphai = + Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key", max_retries: 3) + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {max_retries: 4} + ) + end + + assert_requested(:any, /./, times: 5) + end + + def test_client_retry_after_seconds + stub_request(:post, "http://localhost/smartscraper").to_return_json( + status: 500, + headers: {"retry-after" => "1.3"}, + body: {} + ) + + scrapegraphai = + Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key", max_retries: 1) + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + end + + assert_requested(:any, /./, times: 2) + assert_equal(1.3, Thread.current.thread_variable_get(:mock_sleep).last) + end + + def test_client_retry_after_date + stub_request(:post, "http://localhost/smartscraper").to_return_json( + status: 500, + headers: {"retry-after" => (Time.now + 10).httpdate}, + body: {} + ) + + scrapegraphai = + Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key", max_retries: 1) + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + Thread.current.thread_variable_set(:time_now, Time.now) + scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + Thread.current.thread_variable_set(:time_now, nil) + end + + assert_requested(:any, /./, times: 2) + assert_in_delta(10, Thread.current.thread_variable_get(:mock_sleep).last, 1.0) + end + + def test_client_retry_after_ms + stub_request(:post, "http://localhost/smartscraper").to_return_json( + status: 500, + headers: {"retry-after-ms" => "1300"}, + body: {} + ) + + scrapegraphai = + Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key", max_retries: 1) + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + end + + assert_requested(:any, /./, times: 2) + assert_equal(1.3, Thread.current.thread_variable_get(:mock_sleep).last) + end + + def test_retry_count_header + stub_request(:post, "http://localhost/smartscraper").to_return_json(status: 500, body: {}) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + end + + 3.times do + assert_requested(:any, /./, headers: {"x-stainless-retry-count" => _1}) + end + end + + def test_omit_retry_count_header + stub_request(:post, "http://localhost/smartscraper").to_return_json(status: 500, body: {}) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {extra_headers: {"x-stainless-retry-count" => nil}} + ) + end + + assert_requested(:any, /./, times: 3) do + refute_includes(_1.headers.keys.map(&:downcase), "x-stainless-retry-count") + end + end + + def test_overwrite_retry_count_header + stub_request(:post, "http://localhost/smartscraper").to_return_json(status: 500, body: {}) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + assert_raises(Scrapegraphai::Errors::InternalServerError) do + scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {extra_headers: {"x-stainless-retry-count" => "42"}} + ) + end + + assert_requested(:any, /./, headers: {"x-stainless-retry-count" => "42"}, times: 3) + end + + def test_client_redirect_307 + stub_request(:post, "http://localhost/smartscraper").to_return_json( + status: 307, + headers: {"location" => "/redirected"}, + body: {} + ) + stub_request(:any, "http://localhost/redirected").to_return( + status: 307, + headers: {"location" => "/redirected"} + ) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + assert_raises(Scrapegraphai::Errors::APIConnectionError) do + scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {extra_headers: {}} + ) + end + + recorded, = WebMock::RequestRegistry.instance.requested_signatures.hash.first + + assert_requested(:any, "http://localhost/redirected", times: Scrapegraphai::Client::MAX_REDIRECTS) do + assert_equal(recorded.method, _1.method) + assert_equal(recorded.body, _1.body) + assert_equal( + recorded.headers.transform_keys(&:downcase).fetch("content-type"), + _1.headers.transform_keys(&:downcase).fetch("content-type") + ) + end + end + + def test_client_redirect_303 + stub_request(:post, "http://localhost/smartscraper").to_return_json( + status: 303, + headers: {"location" => "/redirected"}, + body: {} + ) + stub_request(:get, "http://localhost/redirected").to_return( + status: 303, + headers: {"location" => "/redirected"} + ) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + assert_raises(Scrapegraphai::Errors::APIConnectionError) do + scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {extra_headers: {}} + ) + end + + assert_requested(:get, "http://localhost/redirected", times: Scrapegraphai::Client::MAX_REDIRECTS) do + headers = _1.headers.keys.map(&:downcase) + refute_includes(headers, "content-type") + assert_nil(_1.body) + end + end + + def test_client_redirect_auth_keep_same_origin + stub_request(:post, "http://localhost/smartscraper").to_return_json( + status: 307, + headers: {"location" => "/redirected"}, + body: {} + ) + stub_request(:any, "http://localhost/redirected").to_return( + status: 307, + headers: {"location" => "/redirected"} + ) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + assert_raises(Scrapegraphai::Errors::APIConnectionError) do + scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {extra_headers: {"authorization" => "Bearer xyz"}} + ) + end + + recorded, = WebMock::RequestRegistry.instance.requested_signatures.hash.first + auth_header = recorded.headers.transform_keys(&:downcase).fetch("authorization") + + assert_equal("Bearer xyz", auth_header) + assert_requested(:any, "http://localhost/redirected", times: Scrapegraphai::Client::MAX_REDIRECTS) do + auth_header = _1.headers.transform_keys(&:downcase).fetch("authorization") + assert_equal("Bearer xyz", auth_header) + end + end + + def test_client_redirect_auth_strip_cross_origin + stub_request(:post, "http://localhost/smartscraper").to_return_json( + status: 307, + headers: {"location" => "https://example.com/redirected"}, + body: {} + ) + stub_request(:any, "https://example.com/redirected").to_return( + status: 307, + headers: {"location" => "https://example.com/redirected"} + ) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + assert_raises(Scrapegraphai::Errors::APIConnectionError) do + scrapegraphai.smartscraper.create( + user_prompt: "Extract the product name, price, and description", + request_options: {extra_headers: {"authorization" => "Bearer xyz"}} + ) + end + + assert_requested(:any, "https://example.com/redirected", times: Scrapegraphai::Client::MAX_REDIRECTS) do + headers = _1.headers.keys.map(&:downcase) + refute_includes(headers, "authorization") + end + end + + def test_default_headers + stub_request(:post, "http://localhost/smartscraper").to_return_json(status: 200, body: {}) + + scrapegraphai = Scrapegraphai::Client.new(base_url: "http://localhost", api_key: "My API Key") + + scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + + assert_requested(:any, /./) do |req| + headers = req.headers.transform_keys(&:downcase).fetch_values("accept", "content-type") + headers.each { refute_empty(_1) } + end + end +end diff --git a/test/scrapegraphai/file_part_test.rb b/test/scrapegraphai/file_part_test.rb new file mode 100644 index 0000000..f6970a2 --- /dev/null +++ b/test/scrapegraphai/file_part_test.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +class Scrapegraphai::Test::FilePartTest < Minitest::Test + def test_to_json + text = "gray" + filepart = Scrapegraphai::FilePart.new(StringIO.new(text)) + + assert_equal(text.to_json, filepart.to_json) + assert_equal(text.to_yaml, filepart.to_yaml) + end +end diff --git a/test/scrapegraphai/internal/sorbet_runtime_support_test.rb b/test/scrapegraphai/internal/sorbet_runtime_support_test.rb new file mode 100644 index 0000000..864831c --- /dev/null +++ b/test/scrapegraphai/internal/sorbet_runtime_support_test.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::SorbetRuntimeSupportTest < Minitest::Test + extend Minitest::Serial + + i_suck_and_my_tests_are_order_dependent! + + module E + extend Scrapegraphai::Internal::Type::Enum + + define_sorbet_constant!(:TaggedSymbol) { 1 } + end + + module U + extend Scrapegraphai::Internal::Type::Union + + define_sorbet_constant!(:Variants) { 2 } + end + + class M < Scrapegraphai::Internal::Type::BaseModel + define_sorbet_constant!(:OrHash) { 3 } + end + + def test_nil_aliases + err = Scrapegraphai::Internal::Util::SorbetRuntimeSupport::MissingSorbetRuntimeError + + assert_raises(err) { Scrapegraphai::Internal::AnyHash } + assert_raises(err) { Scrapegraphai::Internal::FileInput } + assert_raises(err) { Scrapegraphai::Internal::Type::Converter::Input } + assert_raises(err) { Scrapegraphai::Internal::Type::Converter::CoerceState } + assert_raises(err) { Scrapegraphai::Internal::Type::Converter::DumpState } + assert_raises(err) { Scrapegraphai::Internal::Type::BaseModel::KnownField } + assert_raises(err) { Scrapegraphai::Internal::Util::ParsedUri } + assert_raises(err) { Scrapegraphai::Internal::Util::ServerSentEvent } + assert_raises(err) { Scrapegraphai::Internal::Transport::BaseClient::RequestComponents } + assert_raises(err) { Scrapegraphai::Internal::Transport::BaseClient::RequestInput } + assert_raises(err) { Scrapegraphai::Internal::Transport::PooledNetRequester::Request } + assert_raises(err) { E::TaggedSymbol } + assert_raises(err) { U::Variants } + assert_raises(err) { M::OrHash } + end + + def test_stubbed_aliases + Kernel.instance_eval { const_set(:T, nil) } + + assert_equal(1, E::TaggedSymbol) + assert_equal(2, U::Variants) + assert_equal(3, M::OrHash) + end +end diff --git a/test/scrapegraphai/internal/type/base_model_test.rb b/test/scrapegraphai/internal/type/base_model_test.rb new file mode 100644 index 0000000..1b1e591 --- /dev/null +++ b/test/scrapegraphai/internal/type/base_model_test.rb @@ -0,0 +1,727 @@ +# frozen_string_literal: true + +require_relative "../../test_helper" + +class Scrapegraphai::Test::PrimitiveModelTest < Minitest::Test + A = Scrapegraphai::Internal::Type::ArrayOf[-> { Integer }] + H = Scrapegraphai::Internal::Type::HashOf[-> { Integer }, nil?: true] + + module E + extend Scrapegraphai::Internal::Type::Enum + end + + module U + extend Scrapegraphai::Internal::Type::Union + end + + class B < Scrapegraphai::Internal::Type::BaseModel + optional :a, Integer + optional :b, B + end + + def test_typing + converters = [ + Scrapegraphai::Internal::Type::Unknown, + Scrapegraphai::Internal::Type::Boolean, + A, + H, + E, + U, + B + ] + + converters.each do |conv| + assert_pattern do + conv => Scrapegraphai::Internal::Type::Converter + end + end + end + + def test_coerce + cases = { + [Scrapegraphai::Internal::Type::Unknown, :a] => [{yes: 1}, :a], + [NilClass, :a] => [{maybe: 1}, nil], + [NilClass, nil] => [{yes: 1}, nil], + [Scrapegraphai::Internal::Type::Boolean, true] => [{yes: 1}, true], + [Scrapegraphai::Internal::Type::Boolean, "true"] => [{no: 1}, "true"], + [Integer, 1] => [{yes: 1}, 1], + [Integer, 1.0] => [{maybe: 1}, 1], + [Integer, "1"] => [{maybe: 1}, 1], + [Integer, "one"] => [{no: 1}, "one"], + [Float, 1] => [{yes: 1}, 1.0], + [Float, "1"] => [{maybe: 1}, 1.0], + [Float, :one] => [{no: 1}, :one], + [String, :str] => [{yes: 1}, "str"], + [String, "str"] => [{yes: 1}, "str"], + [String, 1] => [{maybe: 1}, "1"], + [:a, "a"] => [{yes: 1}, :a], + [Date, "1990-09-19"] => [{yes: 1}, Date.new(1990, 9, 19)], + [Date, Date.new(1990, 9, 19)] => [{yes: 1}, Date.new(1990, 9, 19)], + [Date, "one"] => [{no: 1}, "one"], + [Time, "1990-09-19"] => [{yes: 1}, Time.new(1990, 9, 19)], + [Time, Time.new(1990, 9, 19)] => [{yes: 1}, Time.new(1990, 9, 19)], + [Time, "one"] => [{no: 1}, "one"] + } + + cases.each do |lhs, rhs| + target, input = lhs + exactness, expect = rhs + state = Scrapegraphai::Internal::Type::Converter.new_coerce_state + assert_pattern do + Scrapegraphai::Internal::Type::Converter.coerce(target, input, state: state) => ^expect + state.fetch(:exactness).filter { _2.nonzero? }.to_h => ^exactness + end + end + end + + def test_dump + cases = { + [Scrapegraphai::Internal::Type::Unknown, B.new(a: "one", b: B.new(a: 1.0))] => {a: "one", b: {a: 1}}, + [A, B.new(a: "one", b: B.new(a: 1.0))] => {a: "one", b: {a: 1}}, + [H, B.new(a: "one", b: B.new(a: 1.0))] => {a: "one", b: {a: 1}}, + [E, B.new(a: "one", b: B.new(a: 1.0))] => {a: "one", b: {a: 1}}, + [U, B.new(a: "one", b: B.new(a: 1.0))] => {a: "one", b: {a: 1}}, + [B, B.new(a: "one", b: B.new(a: 1.0))] => {a: "one", b: {a: 1}}, + [String, B.new(a: "one", b: B.new(a: 1.0))] => {a: "one", b: {a: 1}}, + [:b, B.new(a: "one", b: B.new(a: 1.0))] => {a: "one", b: {a: 1}}, + [nil, B.new(a: "one", b: B.new(a: 1.0))] => {a: "one", b: {a: 1}}, + [Scrapegraphai::Internal::Type::Boolean, true] => true, + [Scrapegraphai::Internal::Type::Boolean, "true"] => "true", + [Integer, "1"] => "1", + [Float, 1] => 1, + [String, "one"] => "one", + [String, :one] => :one, + [:a, :b] => :b, + [:a, "a"] => "a", + [String, StringIO.new("one")] => "one", + [String, Pathname(__FILE__)] => Scrapegraphai::FilePart + } + + cases.each do + target, input = _1 + expect = _2 + assert_pattern do + Scrapegraphai::Internal::Type::Converter.dump(target, input) => ^expect + end + end + end + + def test_coerce_errors + cases = { + [Integer, "one"] => ArgumentError, + [Float, "one"] => ArgumentError, + [String, Time] => TypeError, + [Date, "one"] => ArgumentError, + [Time, "one"] => ArgumentError + } + + cases.each do |testcase, expect| + target, input = testcase + state = Scrapegraphai::Internal::Type::Converter.new_coerce_state + Scrapegraphai::Internal::Type::Converter.coerce(target, input, state: state) + assert_pattern do + state => {error: ^expect} + end + end + end + + def test_dump_retry + types = [ + Scrapegraphai::Internal::Type::Unknown, + Scrapegraphai::Internal::Type::Boolean, + A, + H, + E, + U, + B + ] + Pathname(__FILE__).open do |fd| + cases = [ + fd, + [fd], + {a: fd}, + {a: {b: fd}} + ] + types.product(cases).each do |target, input| + state = {can_retry: true} + Scrapegraphai::Internal::Type::Converter.dump(target, input, state: state) + + assert_pattern do + state => {can_retry: false} + end + end + end + end +end + +class Scrapegraphai::Test::EnumModelTest < Minitest::Test + class E0 + include Scrapegraphai::Internal::Type::Enum + + attr_reader :values + + def initialize(*values) = (@values = values) + end + + module E1 + extend Scrapegraphai::Internal::Type::Enum + + TRUE = true + end + + module E2 + extend Scrapegraphai::Internal::Type::Enum + + ONE = 1 + TWO = 2 + end + + module E3 + extend Scrapegraphai::Internal::Type::Enum + + ONE = 1.0 + TWO = 2.0 + end + + module E4 + extend Scrapegraphai::Internal::Type::Enum + + ONE = :one + TWO = :two + end + + def test_coerce + cases = { + [E0.new, "one"] => [{no: 1}, "one"], + [E0.new(:one), "one"] => [{yes: 1}, :one], + [E0.new(:two), "one"] => [{maybe: 1}, "one"], + + [E1, true] => [{yes: 1}, true], + [E1, false] => [{no: 1}, false], + [E1, :true] => [{no: 1}, :true], + + [E2, 1] => [{yes: 1}, 1], + [E2, 1.0] => [{yes: 1}, 1], + [E2, 1.2] => [{no: 1}, 1.2], + [E2, "1"] => [{no: 1}, "1"], + + [E3, 1.0] => [{yes: 1}, 1.0], + [E3, 1] => [{yes: 1}, 1.0], + [E3, "one"] => [{no: 1}, "one"], + + [E4, :one] => [{yes: 1}, :one], + [E4, "one"] => [{yes: 1}, :one], + [E4, "1"] => [{maybe: 1}, "1"], + [E4, :"1"] => [{maybe: 1}, :"1"], + [E4, 1] => [{no: 1}, 1] + } + + cases.each do |lhs, rhs| + target, input = lhs + exactness, expect = rhs + state = Scrapegraphai::Internal::Type::Converter.new_coerce_state + assert_pattern do + Scrapegraphai::Internal::Type::Converter.coerce(target, input, state: state) => ^expect + state.fetch(:exactness).filter { _2.nonzero? }.to_h => ^exactness + end + end + end + + def test_dump + cases = { + [E1, true] => true, + [E1, "true"] => "true", + + [E2, 1.0] => 1.0, + [E2, 3] => 3, + [E2, "1.0"] => "1.0", + + [E3, 1.0] => 1.0, + [E3, 3] => 3, + [E3, "1.0"] => "1.0", + + [E4, :one] => :one, + [E4, "one"] => "one", + [E4, "1.0"] => "1.0" + } + + cases.each do + target, input = _1 + expect = _2 + assert_pattern do + Scrapegraphai::Internal::Type::Converter.dump(target, input) => ^expect + end + end + end +end + +class Scrapegraphai::Test::CollectionModelTest < Minitest::Test + A1 = Scrapegraphai::Internal::Type::ArrayOf[-> { Integer }] + H1 = Scrapegraphai::Internal::Type::HashOf[Integer] + + A2 = Scrapegraphai::Internal::Type::ArrayOf[H1] + H2 = Scrapegraphai::Internal::Type::HashOf[-> { A1 }] + + A3 = Scrapegraphai::Internal::Type::ArrayOf[Integer, nil?: true] + H3 = Scrapegraphai::Internal::Type::HashOf[Integer, nil?: true] + + def test_coerce + cases = { + [A1, []] => [{yes: 1}, []], + [A1, {}] => [{no: 1}, {}], + [A1, [1, 2.0]] => [{yes: 2, maybe: 1}, [1, 2]], + [A1, ["1", 2.0]] => [{yes: 1, maybe: 2}, [1, 2]], + [H1, {}] => [{yes: 1}, {}], + [H1, []] => [{no: 1}, []], + [H1, {a: 1, b: 2}] => [{yes: 3}, {a: 1, b: 2}], + [H1, {"a" => 1, "b" => 2}] => [{yes: 3}, {a: 1, b: 2}], + [H1, {[] => 1}] => [{yes: 2, no: 1}, {[] => 1}], + [H1, {a: 1.5}] => [{yes: 1, maybe: 1}, {a: 1}], + + [A2, [{}, {"a" => 1}]] => [{yes: 4}, [{}, {a: 1}]], + [A2, [{"a" => "1"}]] => [{yes: 2, maybe: 1}, [{a: 1}]], + [H2, {a: [1, 2]}] => [{yes: 4}, {a: [1, 2]}], + [H2, {"a" => ["1", 2]}] => [{yes: 3, maybe: 1}, {a: [1, 2]}], + [H2, {"a" => ["one", 2]}] => [{yes: 3, no: 1}, {a: ["one", 2]}], + + [A3, [nil, 1]] => [{yes: 3}, [nil, 1]], + [A3, [nil, "1"]] => [{yes: 2, maybe: 1}, [nil, 1]], + [H3, {a: nil, b: "1"}] => [{yes: 2, maybe: 1}, {a: nil, b: 1}], + [H3, {a: nil}] => [{yes: 2}, {a: nil}] + } + + cases.each do |lhs, rhs| + target, input = lhs + exactness, expect = rhs + state = Scrapegraphai::Internal::Type::Converter.new_coerce_state + assert_pattern do + Scrapegraphai::Internal::Type::Converter.coerce(target, input, state: state) => ^expect + state.fetch(:exactness).filter { _2.nonzero? }.to_h => ^exactness + end + end + end +end + +class Scrapegraphai::Test::BaseModelTest < Minitest::Test + class M1 < Scrapegraphai::Internal::Type::BaseModel + required :a, Integer + end + + class M2 < M1 + required :a, Time + required :b, Integer, nil?: true + optional :c, String + end + + class M3 < Scrapegraphai::Internal::Type::BaseModel + optional :c, const: :c + required :d, const: :d + end + + class M4 < M1 + request_only do + required :a, Integer + optional :b, String + end + + response_only do + required :c, Integer + optional :d, String + end + end + + class M5 < Scrapegraphai::Internal::Type::BaseModel + request_only do + required :c, const: :c + end + + response_only do + required :d, const: :d + end + end + + class M6 < M1 + required :a, Scrapegraphai::Internal::Type::ArrayOf[M6] + optional :b, M6 + end + + def test_coerce + cases = { + [M1, {}] => [{yes: 1, no: 1}, {}], + [M1, :m1] => [{no: 1}, :m1], + + [M2, {}] => [{yes: 2, no: 1, maybe: 1}, {}], + [M2, {a: "1990-09-19", b: nil}] => [{yes: 4}, {a: "1990-09-19", b: nil}], + [M2, {a: "1990-09-19", b: "1"}] => [{yes: 3, maybe: 1}, {a: "1990-09-19", b: "1"}], + [M2, {a: "1990-09-19"}] => [{yes: 3, maybe: 1}, {a: "1990-09-19"}], + [M2, {a: "1990-09-19", c: nil}] => [{yes: 2, maybe: 2}, {a: "1990-09-19", c: nil}], + + [M3, {c: "c", d: "d"}] => [{yes: 3}, {c: :c, d: :d}], + [M3, {c: "d", d: "c"}] => [{yes: 1, maybe: 2}, {c: "d", d: "c"}], + + [M4, {c: 2}] => [{yes: 5}, {c: 2}], + [M4, {a: "1", c: 2}] => [{yes: 4, maybe: 1}, {a: "1", c: 2}], + [M4, {b: nil, c: 2}] => [{yes: 4, maybe: 1}, {b: nil, c: 2}], + + [M5, {}] => [{yes: 3}, {}], + [M5, {c: "c"}] => [{yes: 3}, {c: :c}], + [M5, {d: "d"}] => [{yes: 3}, {d: :d}], + [M5, {d: nil}] => [{yes: 2, no: 1}, {d: nil}], + + [M6, {a: [{a: []}]}] => [{yes: 6}, -> { _1 in {a: [M6]} }], + [M6, {b: {a: []}}] => [{yes: 4, no: 1}, -> { _1 in {b: M6} }] + } + + cases.each do |lhs, rhs| + target, input = lhs + exactness, expect = rhs + state = Scrapegraphai::Internal::Type::Converter.new_coerce_state + assert_pattern do + coerced = Scrapegraphai::Internal::Type::Converter.coerce(target, input, state: state) + assert_equal(coerced, coerced) + if coerced.is_a?(Scrapegraphai::Internal::Type::BaseModel) + coerced.to_h => ^expect + else + coerced => ^expect + end + state.fetch(:exactness).filter { _2.nonzero? }.to_h => ^exactness + end + end + end + + def test_dump + cases = { + [M3, M3.new] => {d: :d}, + [M3, {}] => {d: :d}, + [M3, {d: 1}] => {d: 1}, + + [M4, M4.new(a: 1, b: "b", c: 2, d: "d")] => {a: 1, b: "b"}, + [M4, {a: 1, b: "b", c: 2, d: "d"}] => {a: 1, b: "b"}, + + [M5, M5.new] => {c: :c}, + [M5, {}] => {c: :c}, + [M5, {c: 1}] => {c: 1} + } + + cases.each do + target, input = _1 + expect = _2 + assert_pattern do + Scrapegraphai::Internal::Type::Converter.dump(target, input) => ^expect + end + end + end + + def test_accessors + cases = { + M2.new({a: "1990-09-19", b: "1"}) => [{a: "1990-09-19", b: "1"}, {a: Time.new(1990, 9, 19), b: 1}], + M2.new(a: "one", b: "one") => [{a: "one", b: "one"}, {a: ArgumentError, b: ArgumentError}], + M2.new(a: nil, b: 2.0) => [{a: nil, b: 2.0}, {a: TypeError}], + M2.new(a: nil, b: 2.2) => [{a: nil, b: 2.2}, {a: TypeError, b: 2}], + + M3.new => [{}, {d: :d}], + M3.new(d: 1) => [{d: 1}, {d: ArgumentError}], + + M5.new => [{}, {c: :c, d: :d}] + } + + cases.each do + target = _1 + data, attributes = _2 + + assert_pattern do + target.to_h => ^data + end + + attributes.each do |accessor, expect| + case expect + in Class if expect <= StandardError + tap do + target.public_send(accessor) + flunk + rescue Scrapegraphai::Errors::ConversionError => e + assert_kind_of(expect, e.cause) + end + else + assert_pattern { target.public_send(accessor) => ^expect } + end + end + end + end + + def test_inplace_modification + m1 = M6.new(a: []) + m1.a << M6.new(a: []) + + m2 = M6.new(b: M6.new(a: [])) + m2.b.a << M6.new(a: []) + + m3 = M6.new(a: []) + m4 = M6.new(b: m3) + m3.a << M6.new(a: []) + + assert_pattern do + m1 => {a: [{a: []}]} + m2 => {b: {a: [{a: []}]}} + m4 => {b: {a: [{a: []}]}} + end + end +end + +class Scrapegraphai::Test::UnionTest < Minitest::Test + class U0 + include Scrapegraphai::Internal::Type::Union + + def initialize(*variants) = variants.each { variant(_1) } + end + + module U1 + extend Scrapegraphai::Internal::Type::Union + + variant const: :a + variant const: 2 + end + + class M1 < Scrapegraphai::Internal::Type::BaseModel + required :t, const: :a, api_name: :type + optional :c, String + end + + class M2 < Scrapegraphai::Internal::Type::BaseModel + required :type, const: :b + optional :c, String + end + + module U2 + extend Scrapegraphai::Internal::Type::Union + + discriminator :type + + variant :a, M1 + variant :b, M2 + end + + module U3 + extend Scrapegraphai::Internal::Type::Union + + discriminator :type + + variant :a, M1 + variant String + end + + module U4 + extend Scrapegraphai::Internal::Type::Union + + discriminator :type + + variant String + variant :a, M1 + end + + class M3 < Scrapegraphai::Internal::Type::BaseModel + optional :recur, -> { U5 } + required :a, Integer + end + + class M4 < Scrapegraphai::Internal::Type::BaseModel + optional :recur, -> { U5 } + required :a, Scrapegraphai::Internal::Type::ArrayOf[-> { U5 }] + end + + class M5 < Scrapegraphai::Internal::Type::BaseModel + optional :recur, -> { U5 } + required :b, Scrapegraphai::Internal::Type::ArrayOf[-> { U5 }] + end + + module U5 + extend Scrapegraphai::Internal::Type::Union + + variant -> { M3 } + variant -> { M4 } + end + + module U6 + extend Scrapegraphai::Internal::Type::Union + + variant -> { M3 } + variant -> { M5 } + end + + def test_accessors + model = M3.new(recur: []) + tap do + model.recur + flunk + rescue Scrapegraphai::Errors::ConversionError => e + assert_kind_of(ArgumentError, e.cause) + end + end + + def test_coerce + cases = { + [U0, :""] => [{no: 1}, 0, :""], + + [U0.new(Integer, Float), "one"] => [{no: 1}, 2, "one"], + [U0.new(Integer, Float), 1.0] => [{yes: 1}, 2, 1.0], + [U0.new({const: :a}), "a"] => [{yes: 1}, 1, :a], + [U0.new({const: :a}), "2"] => [{maybe: 1}, 1, "2"], + + [U1, "a"] => [{yes: 1}, 1, :a], + [U1, "2"] => [{maybe: 1}, 2, "2"], + [U1, :b] => [{maybe: 1}, 2, :b], + + [U2, {type: :a}] => [{yes: 3}, 0, {t: :a}], + [U2, {type: "b"}] => [{yes: 3}, 0, {type: :b}], + + [U3, "one"] => [{yes: 1}, 2, "one"], + [U4, "one"] => [{yes: 1}, 1, "one"], + + [U5, {a: []}] => [{yes: 3}, 2, {a: []}], + [U6, {b: []}] => [{yes: 3}, 2, {b: []}], + + [U5, {a: [{a: []}]}] => [{yes: 6}, 4, {a: [M4.new(a: [])]}], + [U5, {a: [{a: [{a: []}]}]}] => [{yes: 9}, 6, {a: [M4.new(a: [M4.new(a: [])])]}] + } + + cases.each do |lhs, rhs| + target, input = lhs + exactness, branched, expect = rhs + state = Scrapegraphai::Internal::Type::Converter.new_coerce_state + assert_pattern do + coerced = Scrapegraphai::Internal::Type::Converter.coerce(target, input, state: state) + assert_equal(coerced, coerced) + if coerced.is_a?(Scrapegraphai::Internal::Type::BaseModel) + coerced.to_h => ^expect + else + coerced => ^expect + end + state.fetch(:exactness).filter { _2.nonzero? }.to_h => ^exactness + state => {branched: ^branched} + end + end + end +end + +class Scrapegraphai::Test::BaseModelQoLTest < Minitest::Test + class E0 + include Scrapegraphai::Internal::Type::Enum + + attr_reader :values + + def initialize(*values) = (@values = values) + end + + module E1 + extend Scrapegraphai::Internal::Type::Enum + + A = 1 + end + + module E2 + extend Scrapegraphai::Internal::Type::Enum + + A = 1 + end + + module E3 + extend Scrapegraphai::Internal::Type::Enum + + A = 2 + B = 3 + end + + class U0 + include Scrapegraphai::Internal::Type::Union + + def initialize(*variants) = variants.each { variant(_1) } + end + + module U1 + extend Scrapegraphai::Internal::Type::Union + + variant String + variant Integer + end + + module U2 + extend Scrapegraphai::Internal::Type::Union + + variant String + variant Integer + end + + class M1 < Scrapegraphai::Internal::Type::BaseModel + required :a, Integer + end + + class M2 < Scrapegraphai::Internal::Type::BaseModel + required :a, Integer, nil?: true + end + + class M3 < M2 + required :a, Integer + end + + def test_equality + cases = { + [Scrapegraphai::Internal::Type::Unknown, Scrapegraphai::Internal::Type::Unknown] => true, + [Scrapegraphai::Internal::Type::Boolean, Scrapegraphai::Internal::Type::Boolean] => true, + [Scrapegraphai::Internal::Type::Unknown, Scrapegraphai::Internal::Type::Boolean] => false, + [E0.new(:a, :b), E0.new(:a, :b)] => true, + [E0.new(:a, :b), E0.new(:b, :a)] => true, + [E0.new(:a, :b), E0.new(:b, :c)] => false, + [E1, E2] => true, + [E1, E3] => false, + [U0.new(String, Integer), U0.new(String, Integer)] => true, + [U0.new(String, Integer), U0.new(Integer, String)] => false, + [U0.new(String, Float), U0.new(String, Integer)] => false, + [U1, U2] => true, + [M1, M2] => false, + [M1, M3] => true, + [M1.new(a: 1), M1.new(a: 1)] => true + } + + cases.each do + if _2 + assert_equal(*_1) + assert_equal(*_1.map(&:hash)) + else + refute_equal(*_1) + refute_equal(*_1.map(&:hash)) + end + end + end +end + +class Scrapegraphai::Test::MetaInfoTest < Minitest::Test + A1 = Scrapegraphai::Internal::Type::ArrayOf[Integer, nil?: true, doc: "dog"] + H1 = Scrapegraphai::Internal::Type::HashOf[-> { String }, nil?: true, doc: "dawg"] + + class M1 < Scrapegraphai::Internal::Type::BaseModel + required :a, Integer, doc: "dog" + optional :b, -> { String }, nil?: true, doc: "dawg" + end + + module U1 + extend Scrapegraphai::Internal::Type::Union + + variant -> { Integer }, const: 2, doc: "dog" + variant -> { String }, doc: "dawg" + end + + def test_meta_retrieval + m1 = A1.instance_variable_get(:@meta) + m2 = H1.instance_variable_get(:@meta) + assert_equal({doc: "dog"}, m1) + assert_equal({doc: "dawg"}, m2) + + ma, mb = M1.fields.fetch_values(:a, :b) + assert_equal({doc: "dog"}, ma.fetch(:meta)) + assert_equal({doc: "dawg"}, mb.fetch(:meta)) + + ua, ub = U1.send(:known_variants).map(&:last) + assert_equal({doc: "dog"}, ua) + assert_equal({doc: "dawg"}, ub) + end +end diff --git a/test/scrapegraphai/internal/util_test.rb b/test/scrapegraphai/internal/util_test.rb new file mode 100644 index 0000000..7e15816 --- /dev/null +++ b/test/scrapegraphai/internal/util_test.rb @@ -0,0 +1,642 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::UtilDataHandlingTest < Minitest::Test + def test_left_map + assert_pattern do + Scrapegraphai::Internal::Util.deep_merge({a: 1}, nil) => nil + end + end + + def test_right_map + assert_pattern do + Scrapegraphai::Internal::Util.deep_merge(nil, {a: 1}) => {a: 1} + end + end + + def test_disjoint_maps + assert_pattern do + Scrapegraphai::Internal::Util.deep_merge({b: 2}, {a: 1}) => {a: 1, b: 2} + end + end + + def test_overlapping_maps + assert_pattern do + Scrapegraphai::Internal::Util.deep_merge({b: 2, c: 3}, {a: 1, c: 4}) => {a: 1, b: 2, c: 4} + end + end + + def test_nested + assert_pattern do + Scrapegraphai::Internal::Util.deep_merge({b: {b2: 1}}, {b: {b2: 2}}) => {b: {b2: 2}} + end + end + + def test_nested_left_map + assert_pattern do + Scrapegraphai::Internal::Util.deep_merge({b: {b2: 1}}, {b: 6}) => {b: 6} + end + end + + def test_omission + merged = Scrapegraphai::Internal::Util.deep_merge( + {b: {b2: 1, b3: {c: 4, d: 5}}}, + {b: {b2: 1, b3: {c: Scrapegraphai::Internal::OMIT, d: 5}}} + ) + + assert_pattern do + merged => {b: {b2: 1, b3: {d: 5}}} + end + end + + def test_concat + merged = Scrapegraphai::Internal::Util.deep_merge( + {a: {b: [1, 2]}}, + {a: {b: [3, 4]}}, + concat: true + ) + + assert_pattern do + merged => {a: {b: [1, 2, 3, 4]}} + end + end + + def test_concat_false + merged = Scrapegraphai::Internal::Util.deep_merge( + {a: {b: [1, 2]}}, + {a: {b: [3, 4]}}, + concat: false + ) + + assert_pattern do + merged => {a: {b: [3, 4]}} + end + end + + def test_dig + assert_pattern do + Scrapegraphai::Internal::Util.dig(1, nil) => 1 + Scrapegraphai::Internal::Util.dig({a: 1}, :b) => nil + Scrapegraphai::Internal::Util.dig({a: 1}, :a) => 1 + Scrapegraphai::Internal::Util.dig({a: {b: 1}}, [:a, :b]) => 1 + + Scrapegraphai::Internal::Util.dig([], 1) => nil + Scrapegraphai::Internal::Util.dig([nil, [nil, 1]], [1, 1]) => 1 + Scrapegraphai::Internal::Util.dig({a: [nil, 1]}, [:a, 1]) => 1 + Scrapegraphai::Internal::Util.dig([], 1.0) => nil + + Scrapegraphai::Internal::Util.dig(Object, 1) => nil + Scrapegraphai::Internal::Util.dig([], 1.0) { 2 } => 2 + Scrapegraphai::Internal::Util.dig([], ->(_) { 2 }) => 2 + Scrapegraphai::Internal::Util.dig([1], -> { _1 in [1] }) => true + end + end +end + +class Scrapegraphai::Test::UtilUriHandlingTest < Minitest::Test + def test_parsing + %w[ + http://example.com + https://example.com/ + https://example.com:443/example?e1=e1&e2=e2&e= + ].each do |url| + parsed = Scrapegraphai::Internal::Util.parse_uri(url) + unparsed = Scrapegraphai::Internal::Util.unparse_uri(parsed).to_s + + assert_equal(url, unparsed) + assert_equal(parsed, Scrapegraphai::Internal::Util.parse_uri(unparsed)) + end + end + + def test_joining + cases = [ + [ + "h://a.b/c?d=e", + "h://nope/ignored", + Scrapegraphai::Internal::Util.parse_uri("h://a.b/c?d=e") + ], + [ + "h://a.b/c?d=e", + "h://nope", + { + host: "a.b", + path: "/c", + query: {"d" => ["e"]} + } + ] + ] + + cases.each do |expect, lhs, rhs| + assert_equal( + URI.parse(expect), + Scrapegraphai::Internal::Util.join_parsed_uri( + Scrapegraphai::Internal::Util.parse_uri(lhs), + rhs + ) + ) + end + end + + def test_joining_queries + base_url = "h://a.b/c?d=e" + cases = { + "c2" => "h://a.b/c/c2", + "/c2?f=g" => "h://a.b/c2?f=g", + "/c?f=g" => "h://a.b/c?d=e&f=g" + } + + cases.each do |path, expected| + assert_equal( + URI.parse(expected), + Scrapegraphai::Internal::Util.join_parsed_uri( + Scrapegraphai::Internal::Util.parse_uri(base_url), + {path: path} + ) + ) + end + end +end + +class Scrapegraphai::Test::RegexMatchTest < Minitest::Test + def test_json_content + cases = { + "application/json" => true, + "application/jsonl" => false, + "application/vnd.github.v3+json" => true, + "application/vnd.api+json" => true + } + cases.each do |header, verdict| + assert_pattern do + Scrapegraphai::Internal::Util::JSON_CONTENT.match?(header) => ^verdict + end + end + end + + def test_jsonl_content + cases = { + "application/x-ndjson" => true, + "application/x-ldjson" => true, + "application/jsonl" => true, + "application/x-jsonl" => true, + "application/json" => false, + "application/vnd.api+json" => false + } + cases.each do |header, verdict| + assert_pattern do + Scrapegraphai::Internal::Util::JSONL_CONTENT.match?(header) => ^verdict + end + end + end +end + +class Scrapegraphai::Test::UtilFormDataEncodingTest < Minitest::Test + class FakeCGI < CGI + def initialize(headers, io) + encoded = io.to_a + @ctype = headers["content-type"] + # rubocop:disable Lint/EmptyBlock + @io = Scrapegraphai::Internal::Util::ReadIOAdapter.new(encoded.to_enum) {} + # rubocop:enable Lint/EmptyBlock + @c_len = encoded.join.bytesize.to_s + super() + end + + def stdinput = @io + + def env_table + { + "REQUEST_METHOD" => "POST", + "CONTENT_TYPE" => @ctype, + "CONTENT_LENGTH" => @c_len + } + end + end + + def test_encoding_length + headers, = Scrapegraphai::Internal::Util.encode_content( + {"content-type" => "multipart/form-data"}, + Pathname(__FILE__) + ) + assert_pattern do + headers.fetch("content-type") => /boundary=(.+)$/ + end + field, = Regexp.last_match.captures + assert(field.length < 70 - 6) + end + + def test_file_encode + file = Pathname(__FILE__) + fileinput = Scrapegraphai::Internal::Type::Converter.dump(Scrapegraphai::Internal::Type::FileInput, "abc") + headers = {"content-type" => "multipart/form-data"} + cases = { + "abc" => ["", "abc"], + StringIO.new("abc") => ["", "abc"], + fileinput => %w[upload abc], + Scrapegraphai::FilePart.new(StringIO.new("abc")) => ["", "abc"], + file => [file.basename.to_path, /^class Scrapegraphai/], + Scrapegraphai::FilePart.new(file, filename: "d o g") => ["d%20o%20g", /^class Scrapegraphai/] + } + cases.each do |body, testcase| + filename, val = testcase + encoded = Scrapegraphai::Internal::Util.encode_content(headers, body) + cgi = FakeCGI.new(*encoded) + io = cgi[""] + assert_pattern do + io.original_filename => ^filename + io.read => ^val + end + end + end + + def test_hash_encode + headers = {"content-type" => "multipart/form-data"} + cases = { + {a: 2, b: 3} => {"a" => "2", "b" => "3"}, + {a: 2, b: nil} => {"a" => "2", "b" => "null"}, + {a: 2, b: [1, 2, 3]} => {"a" => "2", "b" => "1"}, + {strio: StringIO.new("a")} => {"strio" => "a"}, + {strio: Scrapegraphai::FilePart.new("a")} => {"strio" => "a"}, + {pathname: Pathname(__FILE__)} => {"pathname" => -> { _1.read in /^class Scrapegraphai/ }}, + {pathname: Scrapegraphai::FilePart.new(Pathname(__FILE__))} => {"pathname" => -> { _1.read in /^class Scrapegraphai/ }} + } + cases.each do |body, testcase| + encoded = Scrapegraphai::Internal::Util.encode_content(headers, body) + cgi = FakeCGI.new(*encoded) + testcase.each do |key, val| + assert_pattern do + parsed = + case (p = cgi[key]) + in StringIO + p.read + else + p + end + parsed => ^val + end + end + end + end +end + +class Scrapegraphai::Test::UtilIOAdapterTest < Minitest::Test + def test_copy_read + cases = { + StringIO.new("abc") => "abc", + Enumerator.new { _1 << "abc" } => "abc" + } + cases.each do |input, expected| + io = StringIO.new + # rubocop:disable Lint/EmptyBlock + adapter = Scrapegraphai::Internal::Util::ReadIOAdapter.new(input) {} + # rubocop:enable Lint/EmptyBlock + IO.copy_stream(adapter, io) + assert_equal(expected, io.string) + end + end + + def test_copy_write + cases = { + StringIO.new => "", + StringIO.new("abc") => "abc" + } + cases.each do |input, expected| + enum = Scrapegraphai::Internal::Util.writable_enum do |y| + IO.copy_stream(input, y) + end + assert_equal(expected, enum.to_a.join) + end + end +end + +class Scrapegraphai::Test::UtilFusedEnumTest < Minitest::Test + def test_rewind_closing + touched = false + once = 0 + steps = 0 + enum = Enumerator.new do |y| + next if touched + + 10.times do + steps = _1 + y << _1 + end + ensure + once = once.succ + end + + fused = Scrapegraphai::Internal::Util.fused_enum(enum, external: true) do + touched = true + loop { enum.next } + end + Scrapegraphai::Internal::Util.close_fused!(fused) + + assert_equal(1, once) + assert_equal(0, steps) + end + + def test_closing + arr = [1, 2, 3] + once = 0 + fused = Scrapegraphai::Internal::Util.fused_enum(arr.to_enum) do + once = once.succ + end + + enumerated_1 = fused.to_a + assert_equal(arr, enumerated_1) + assert_equal(1, once) + + enumerated_2 = fused.to_a + assert_equal([], enumerated_2) + assert_equal(1, once) + end + + def test_rewind_chain + once = 0 + fused = Scrapegraphai::Internal::Util.fused_enum([1, 2, 3].to_enum) do + once = once.succ + end + .lazy + .map(&:succ) + .filter(&:odd?) + first = fused.next + + assert_equal(3, first) + assert_equal(0, once) + assert_raises(StopIteration) { fused.rewind.next } + assert_equal(1, once) + end + + def test_external_iteration + iter = [1, 2, 3].to_enum + first = iter.next + fused = Scrapegraphai::Internal::Util.fused_enum(iter, external: true) + + assert_equal(1, first) + assert_equal([2, 3], fused.to_a) + end + + def test_close_fused + once = 0 + fused = Scrapegraphai::Internal::Util.fused_enum([1, 2, 3].to_enum) do + once = once.succ + end + + Scrapegraphai::Internal::Util.close_fused!(fused) + + assert_equal(1, once) + assert_equal([], fused.to_a) + assert_equal(1, once) + end + + def test_closed_fused_extern_iteration + taken = 0 + enum = [1, 2, 3].to_enum.lazy.map do + taken = taken.succ + _1 + end + fused = Scrapegraphai::Internal::Util.fused_enum(enum) + first = fused.next + + assert_equal(1, first) + Scrapegraphai::Internal::Util.close_fused!(fused) + assert_equal(1, taken) + end + + def test_closed_fused_taken_count + taken = 0 + enum = [1, 2, 3].to_enum.lazy.map do + taken = taken.succ + _1 + end + .map(&:succ) + .filter(&:odd?) + fused = Scrapegraphai::Internal::Util.fused_enum(enum) + + assert_equal(0, taken) + Scrapegraphai::Internal::Util.close_fused!(fused) + assert_equal(0, taken) + end + + def test_closed_fused_extern_iter_taken_count + taken = 0 + enum = [1, 2, 3].to_enum.lazy.map do + taken = taken.succ + _1 + end + .map(&:succ) + .filter(&:itself) + first = enum.next + assert_equal(2, first) + assert_equal(1, taken) + + fused = Scrapegraphai::Internal::Util.fused_enum(enum) + Scrapegraphai::Internal::Util.close_fused!(fused) + assert_equal(1, taken) + end + + def test_close_fused_sse_chain + taken = 0 + enum = [1, 2, 3].to_enum.lazy.map do + taken = taken.succ + _1 + end + .map(&:succ) + .filter(&:odd?) + .map(&:to_s) + + fused_1 = Scrapegraphai::Internal::Util.fused_enum(enum) + fused_2 = Scrapegraphai::Internal::Util.decode_lines(fused_1) + fused_3 = Scrapegraphai::Internal::Util.decode_sse(fused_2) + + assert_equal(0, taken) + Scrapegraphai::Internal::Util.close_fused!(fused_3) + assert_equal(0, taken) + end +end + +class Scrapegraphai::Test::UtilContentDecodingTest < Minitest::Test + def test_charset + cases = { + "application/json" => Encoding::BINARY, + "application/json; charset=utf-8" => Encoding::UTF_8, + "charset=uTf-8 application/json; " => Encoding::UTF_8, + "charset=UTF-8; application/json; " => Encoding::UTF_8, + "charset=ISO-8859-1 ;application/json; " => Encoding::ISO_8859_1, + "charset=EUC-KR ;application/json; " => Encoding::EUC_KR + } + text = String.new.force_encoding(Encoding::BINARY) + cases.each do |content_type, encoding| + Scrapegraphai::Internal::Util.force_charset!(content_type, text: text) + assert_equal(encoding, text.encoding) + end + end +end + +class Scrapegraphai::Test::UtilSseTest < Minitest::Test + def test_decode_lines + cases = { + %w[] => %w[], + %W[\n\n] => %W[\n \n], + %W[\n \n] => %W[\n \n], + %w[a] => %w[a], + %W[a\nb] => %W[a\n b], + %W[a\nb\n] => %W[a\n b\n], + %W[\na b\n] => %W[\n ab\n], + %W[\na b\n\n] => %W[\n ab\n \n], + %W[\na b] => %W[\n ab], + %W[\u1F62E\u200D\u1F4A8] => %W[\u1F62E\u200D\u1F4A8], + %W[\u1F62E \u200D \u1F4A8] => %W[\u1F62E\u200D\u1F4A8], + ["\xf0\x9f".b, "\xa5\xba".b] => ["\xf0\x9f\xa5\xba".b], + ["\xf0".b, "\x9f".b, "\xa5".b, "\xba".b] => ["\xf0\x9f\xa5\xba".b] + } + eols = %W[\n \r \r\n] + cases.each do |enum, expected| + eols.each do |eol| + lines = Scrapegraphai::Internal::Util.decode_lines(enum.map { _1.gsub("\n", eol) }) + assert_equal(expected.map { _1.gsub("\n", eol) }, lines.to_a, "eol=#{JSON.generate(eol)}") + end + end + end + + def test_mixed_decode_lines + cases = { + %w[] => %w[], + %W[\r\r] => %W[\r \r], + %W[\r \r] => %W[\r \r], + %W[\r\r\r] => %W[\r \r \r], + %W[\r\r \r] => %W[\r \r \r], + %W[\r \n] => %W[\r\n], + %W[\r\r\n] => %W[\r \r\n], + %W[\n\r] => %W[\n \r] + } + cases.each do |enum, expected| + lines = Scrapegraphai::Internal::Util.decode_lines(enum) + assert_equal(expected, lines.to_a) + end + end + + def test_decode_sse + cases = { + "empty input" => { + [] => [] + }, + "single data event" => { + [ + "data: hello world\n", + "\n" + ] => [ + {data: "hello world\n"} + ] + }, + "multiple data lines" => { + [ + "data: line 1\n", + "data: line 2\n", + "\n" + ] => [ + {data: "line 1\nline 2\n"} + ] + }, + "complete event" => { + [ + "id: 123\n", + "event: update\n", + "data: hello world\n", + "retry: 5000\n", + "\n" + ] => [ + { + event: "update", + id: "123", + data: "hello world\n", + retry: 5000 + } + ] + }, + "multiple events" => { + [ + "event: update\n", + "data: first\n", + "\n", + "event: message\n", + "data: second\n", + "\n" + ] => [ + {event: "update", data: "first\n"}, + {event: "message", data: "second\n"} + ] + }, + "comments" => { + [ + ": this is a comment\n", + "data: actual data\n", + "\n" + ] => [ + {data: "actual data\n"} + ] + }, + "invalid retry" => { + [ + "retry: not a number\n", + "data: hello\n", + "\n" + ] => [ + {data: "hello\n"} + ] + }, + "invalid id with null" => { + [ + "id: bad\0id\n", + "data: hello\n", + "\n" + ] => [ + {data: "hello\n"} + ] + }, + "leading space in value" => { + [ + "data: hello world\n", + "data: leading space\n", + "\n" + ] => [ + {data: "hello world\n leading space\n"} + ] + }, + "no final newline" => { + [ + "data: hello\n", + "id: 1" + ] => [ + {data: "hello\n", id: "1"} + ] + }, + "multiple empty lines" => { + [ + "data: first\n", + "\n", + "\n", + "data: second\n", + "\n" + ] => [ + {data: "first\n"}, + {data: "second\n"} + ] + }, + "multibyte unicode" => { + [ + "data: \u1F62E\u200D\u1F4A8\n" + ] => [ + {data: "\u1F62E\u200D\u1F4A8\n"} + ] + } + } + + cases.each do |name, test_cases| + test_cases.each do |input, expected| + actual = Scrapegraphai::Internal::Util.decode_sse(input).map(&:compact) + assert_equal(expected, actual, name) + end + end + end +end diff --git a/test/scrapegraphai/resource_namespaces.rb b/test/scrapegraphai/resource_namespaces.rb new file mode 100644 index 0000000..4ea8478 --- /dev/null +++ b/test/scrapegraphai/resource_namespaces.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Scrapegraphai + module Test + module Resources + end + end +end diff --git a/test/scrapegraphai/resources/crawl_test.rb b/test/scrapegraphai/resources/crawl_test.rb new file mode 100644 index 0000000..9daeb85 --- /dev/null +++ b/test/scrapegraphai/resources/crawl_test.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::Resources::CrawlTest < Scrapegraphai::Test::ResourceTest + def test_retrieve_results + skip("Prism tests are disabled") + + response = @scrapegraphai.crawl.retrieve_results("task_id") + + assert_pattern do + response => Scrapegraphai::Models::CrawlRetrieveResultsResponse + end + + assert_pattern do + response => { + result: Scrapegraphai::Models::CrawlRetrieveResultsResponse::Result | nil, + status: Scrapegraphai::Models::CrawlRetrieveResultsResponse::Status | nil, + task_id: String | nil, + traceback: String | nil + } + end + end + + def test_start_required_params + skip("Prism tests are disabled") + + response = @scrapegraphai.crawl.start(url: "https://example.com") + + assert_pattern do + response => Scrapegraphai::Models::CrawlStartResponse + end + + assert_pattern do + response => { + task_id: String | nil + } + end + end +end diff --git a/test/scrapegraphai/resources/credits_test.rb b/test/scrapegraphai/resources/credits_test.rb new file mode 100644 index 0000000..acd1609 --- /dev/null +++ b/test/scrapegraphai/resources/credits_test.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::Resources::CreditsTest < Scrapegraphai::Test::ResourceTest + def test_retrieve + skip("Prism tests are disabled") + + response = @scrapegraphai.credits.retrieve + + assert_pattern do + response => Scrapegraphai::Models::CreditRetrieveResponse + end + + assert_pattern do + response => { + remaining_credits: Integer | nil, + total_credits_used: Integer | nil + } + end + end +end diff --git a/test/scrapegraphai/resources/feedback_test.rb b/test/scrapegraphai/resources/feedback_test.rb new file mode 100644 index 0000000..1b89018 --- /dev/null +++ b/test/scrapegraphai/resources/feedback_test.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::Resources::FeedbackTest < Scrapegraphai::Test::ResourceTest + def test_submit_required_params + skip("Prism tests are disabled") + + response = @scrapegraphai.feedback.submit(rating: 0, request_id: "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") + + assert_pattern do + response => Scrapegraphai::Models::FeedbackSubmitResponse + end + + assert_pattern do + response => { + feedback_id: String | nil, + feedback_timestamp: Time | nil, + message: String | nil, + request_id: String | nil + } + end + end +end diff --git a/test/scrapegraphai/resources/generate_schema_test.rb b/test/scrapegraphai/resources/generate_schema_test.rb new file mode 100644 index 0000000..a52814c --- /dev/null +++ b/test/scrapegraphai/resources/generate_schema_test.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::Resources::GenerateSchemaTest < Scrapegraphai::Test::ResourceTest + def test_create_required_params + skip("Prism tests are disabled") + + response = + @scrapegraphai.generate_schema.create( + user_prompt: "Create a schema for product information including name, price, and reviews" + ) + + assert_pattern do + response => Scrapegraphai::Models::GenerateSchemaCreateResponse + end + + assert_pattern do + response => { + error: String | nil, + generated_schema: Scrapegraphai::Internal::Type::Unknown | nil, + refined_prompt: String | nil, + request_id: String | nil, + status: Scrapegraphai::Models::GenerateSchemaCreateResponse::Status | nil, + user_prompt: String | nil + } + end + end + + def test_retrieve + skip("Prism tests are disabled") + + response = @scrapegraphai.generate_schema.retrieve("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") + + assert_pattern do + response => Scrapegraphai::Models::GenerateSchemaRetrieveResponse + end + + assert_pattern do + case response + in Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse + in Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse + end + end + end +end diff --git a/test/scrapegraphai/resources/healthz_test.rb b/test/scrapegraphai/resources/healthz_test.rb new file mode 100644 index 0000000..5bc8359 --- /dev/null +++ b/test/scrapegraphai/resources/healthz_test.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::Resources::HealthzTest < Scrapegraphai::Test::ResourceTest + def test_check + skip("Prism tests are disabled") + + response = @scrapegraphai.healthz.check + + assert_pattern do + response => Scrapegraphai::Models::HealthzCheckResponse + end + + assert_pattern do + response => { + services: ^(Scrapegraphai::Internal::Type::HashOf[String]) | nil, + status: String | nil + } + end + end +end diff --git a/test/scrapegraphai/resources/markdownify_test.rb b/test/scrapegraphai/resources/markdownify_test.rb new file mode 100644 index 0000000..a5313bb --- /dev/null +++ b/test/scrapegraphai/resources/markdownify_test.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::Resources::MarkdownifyTest < Scrapegraphai::Test::ResourceTest + def test_convert_required_params + skip("Prism tests are disabled") + + response = @scrapegraphai.markdownify.convert(website_url: "https://example.com") + + assert_pattern do + response => Scrapegraphai::CompletedMarkdownify + end + + assert_pattern do + response => { + error: String | nil, + request_id: String | nil, + result: String | nil, + status: Scrapegraphai::CompletedMarkdownify::Status | nil, + website_url: String | nil + } + end + end + + def test_retrieve_status + skip("Prism tests are disabled") + + response = @scrapegraphai.markdownify.retrieve_status("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") + + assert_pattern do + response => Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse + end + + assert_pattern do + case response + in Scrapegraphai::CompletedMarkdownify + in Scrapegraphai::Models::MarkdownifyRetrieveStatusResponse::FailedMarkdownifyResponse + end + end + end +end diff --git a/test/scrapegraphai/resources/searchscraper_test.rb b/test/scrapegraphai/resources/searchscraper_test.rb new file mode 100644 index 0000000..707d386 --- /dev/null +++ b/test/scrapegraphai/resources/searchscraper_test.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::Resources::SearchscraperTest < Scrapegraphai::Test::ResourceTest + def test_create_required_params + skip("Prism tests are disabled") + + response = + @scrapegraphai.searchscraper.create( + user_prompt: "Find the latest AI news and extract headlines and summaries" + ) + + assert_pattern do + response => Scrapegraphai::CompletedSearchScraper + end + + assert_pattern do + response => { + error: String | nil, + num_results: Integer | nil, + reference_urls: ^(Scrapegraphai::Internal::Type::ArrayOf[String]) | nil, + request_id: String | nil, + result: Scrapegraphai::Internal::Type::Unknown | nil, + status: Scrapegraphai::CompletedSearchScraper::Status | nil, + user_prompt: String | nil + } + end + end + + def test_retrieve_status + skip("Prism tests are disabled") + + response = @scrapegraphai.searchscraper.retrieve_status("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") + + assert_pattern do + response => Scrapegraphai::Models::SearchscraperRetrieveStatusResponse + end + + assert_pattern do + case response + in Scrapegraphai::CompletedSearchScraper + in Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse + end + end + end +end diff --git a/test/scrapegraphai/resources/smartscraper_test.rb b/test/scrapegraphai/resources/smartscraper_test.rb new file mode 100644 index 0000000..e449439 --- /dev/null +++ b/test/scrapegraphai/resources/smartscraper_test.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::Resources::SmartscraperTest < Scrapegraphai::Test::ResourceTest + def test_create_required_params + skip("Prism tests are disabled") + + response = + @scrapegraphai.smartscraper.create(user_prompt: "Extract the product name, price, and description") + + assert_pattern do + response => Scrapegraphai::CompletedSmartscraper + end + + assert_pattern do + response => { + error: String | nil, + request_id: String | nil, + result: Scrapegraphai::Internal::Type::Unknown | nil, + status: Scrapegraphai::CompletedSmartscraper::Status | nil, + user_prompt: String | nil, + website_url: String | nil + } + end + end + + def test_retrieve + skip("Prism tests are disabled") + + response = @scrapegraphai.smartscraper.retrieve("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") + + assert_pattern do + response => Scrapegraphai::Models::SmartscraperRetrieveResponse + end + + assert_pattern do + case response + in Scrapegraphai::CompletedSmartscraper + in Scrapegraphai::FailedSmartscraper + end + end + end + + def test_list + skip("Prism tests are disabled") + + response = @scrapegraphai.smartscraper.list + + assert_pattern do + response => Scrapegraphai::Models::SmartscraperListResponse + end + + assert_pattern do + case response + in Scrapegraphai::CompletedSmartscraper + in Scrapegraphai::FailedSmartscraper + end + end + end +end diff --git a/test/scrapegraphai/resources/validate_test.rb b/test/scrapegraphai/resources/validate_test.rb new file mode 100644 index 0000000..41758c3 --- /dev/null +++ b/test/scrapegraphai/resources/validate_test.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class Scrapegraphai::Test::Resources::ValidateTest < Scrapegraphai::Test::ResourceTest + def test_api_key + skip("Prism tests are disabled") + + response = @scrapegraphai.validate.api_key + + assert_pattern do + response => Scrapegraphai::Models::ValidateAPIKeyResponse + end + + assert_pattern do + response => { + email: String | nil + } + end + end +end diff --git a/test/scrapegraphai/test_helper.rb b/test/scrapegraphai/test_helper.rb new file mode 100644 index 0000000..8ccc5f7 --- /dev/null +++ b/test/scrapegraphai/test_helper.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +# Requiring this file from each test file ensures we always do the following, even +# when running a single-file test: +# - Load the whole gem (as one would in production) +# - Define shared testing namespace so that we don't need to indent test files as much +# - Setting up testing dependencies + +require "digest" +require "singleton" + +require "async" +require "minitest/autorun" +require "minitest/focus" +require "minitest/hooks/test" +require "minitest/proveit" +require "minitest/rg" +require "webmock" + +require_relative "../../lib/scrapegraphai" +require_relative "resource_namespaces" + +module Kernel + alias_method :_sleep, :sleep + + def sleep(secs) + case Thread.current.thread_variable_get(:mock_sleep) + in Array => counter + counter << secs + secs + else + _sleep(secs) + end + end +end + +class Time + class << self + alias_method :_now, :now + end + + def self.now = Thread.current.thread_variable_get(:time_now) || _now +end + +class Scrapegraphai::Test::SingletonClient < Scrapegraphai::Client + include Singleton + + TEST_API_BASE_URL = ENV.fetch("TEST_API_BASE_URL", "http://localhost:4010") + + def initialize + super(base_url: Scrapegraphai::Test::SingletonClient::TEST_API_BASE_URL, api_key: "My API Key") + end +end + +module Minitest::Serial + def test_order = :random + + def run_one_method(...) = Minitest::Runnable.run_one_method(...) +end + +class Minitest::Test + include Minitest::Hooks + + make_my_diffs_pretty! + parallelize_me! + prove_it! +end + +class Scrapegraphai::Test::ResourceTest < Minitest::Test + def async? + return @async unless @async.nil? + @async = Digest::SHA256.hexdigest(self.class.name).to_i(16).odd? + end + + def before_all + super + @scrapegraphai = Scrapegraphai::Test::SingletonClient.instance + end + + def around_all = async? ? Sync { super } : super + + def around = async? ? Async { super }.wait : super +end + +module WebMock + AssertionFailure.error_class = Minitest::Assertion +end