Overview
Here is the list of the different parameters you can use with ScrapingBee's HTML API.
You can also discover this API using our Postman collection covering every ScrapingBee's features.
type
]
(default
)
boolean
]
(false
)
boolean
]
(false
)
boolean
]
(false
)
boolean
]
(false
)
boolean
]
(false
)
string
]
(""
)
boolean
]
(false
)
string
]
(domcontentloaded
)
int
]
(1080
)
int
]
(1920
)
Getting Started
ScrapingBee is meant to be the easiest scraping API available on the web.
To scrape a web page, you only need two things:
- Your API key, available here
- The encoded web page URL you want to scrape (learn more about URL encoding)
The following snippet is an example of a simple GET
API call to scrape the URL defined in the query string variable YOUR-URL
:
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get("YOUR-URL")
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
The API will then respond with the raw HTML content of the target URL:
<html>
<head>
...
</head>
<body>
...
</body>
</html>
Every URL that failed will be tried as many times as possible for 30 seconds.
Be aware of the maximum 30-second timeout when your code calls the API.
Headers and cookies returned by the target website are prefixed with Spb-
(for ScraPingBee).
API key
api_key
[string
]
(default= ""
)
required
All requests are authenticated using your private API key.
To get access to your API key, create an account here and confirm your email address.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get("YOUR-URL")
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
URL
url
[string
]
(default= ""
)
required
This parameter is the full URL including the protocol (with http/https
) of the page to extract data from.
You must encode your URL. For example, the +
character is encoded to %20
. Consult your programming language documentation for functions that encode URLs
Several examples of URL encoding can be seen below:
sudo apt-get install gridsite-clients
urlencode "YOUR URL"
import urllib.parse
encoded_url = urllib.parse.quote("YOUR URL")
encoded_url = encodeURIComponent("YOUR URL")
String encoded_url = URLEncoder.encode("YOUR URL", "UTF-8");
p = URI::Parser.new
p.escape("YOUR-URL")
<?php
$url_encoded = urlencode("YOUR URL");
?>
package main
import (
"net/url"
)
func main() {
encoded_url := url.QueryEscape("YOUR URL")
}
Headless Browser
Javascript Rendering
render_js
[boolean
]
(default= True
)
By default, ScrapingBee fetches the URL to scrape via a headless browser that will execute the JavaScript code on the page. This is the default behavior and costs 5 credits per request.
This can be useful for scraping a Single Page Application built with frameworks such as React.js, Angular.js, JQuery or Vue.
To fetch the URL without using a headless browser, use the render_js=false
parameter in the GET
request.
The following is an example with a dummy Single Page Application (SPA):
If you use render_js=true
(default behavior)
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get("YOUR-URL")
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
The following result is returned:
<html>
<head>
...
</head>
<body>
<content>
</content>
<content>
</content>
<content>
</content>
<content>
</content>
<content>
</content>
</body>
</html>
But if you use render_js=False
instead:
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&render_js=False"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'render_js': 'False',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'render_js': 'False',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&render_js=False")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&render_js=False')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&render_js=False');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&render_js=False", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
This is what is returned:
<html>
<head>
..
</head>
<body>
</body>
</html>
JavaScript Execution
js_scenario
[stringified JSON
]
(default= {}
)
If you want to interact with pages you want to scrape before we return your the HTML you can add JavaScript scenario to your API call.
For example, if you wish to click on a button, you will need to use this scenario.
{
"instructions": [
{"click": "#buttonId"}
]
}
And so our scraper will scrape the webpage, click on the button #buttonId
and then return you the HTML of the page.
Important: JavaScript scenario are JSON formatted, and in order to pass them to a GET request, you need to stringify them.
You can add multiple instructions to the scenario, they will get executed one by one on our end.
Below is a quick overview of all the different instruction you can use.
{"click": "#button_id"} # Click on a an element
{"wait": 1000} # Wait for a fixed duration in ms
{"wait_for": "#slow_div"} # Wait for an element to appear
{"wait_for_and_click": "#slow_div"} # Wait for an element to appear and then click on it
{"scroll_x": 1000} # Scroll the screen in the horizontal axis, in px
{"scroll_y": 1000} # Scroll the screen in the vertical axis, in px
{"fill": ["#input_1", "value_1"]} # Fill some input
{"evaluate": "console.log('toto')"} # Run custom JavaScript code
{"infinite_scroll": # Scroll the page until the end
{
"max_count": 0, # Maximum number of scroll, 0 for infinite
"delay": 1000 # Delay between each scroll, in ms
"end_click": {"selector": "#button_id"} # (optional) Click on a button when the end of the page is reached, usually a "load more" button
}
}
If you want to learn more about this powerful feature, you can check the full documentation here.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&js_scenario=%7B%22instructions%22%3A+%5B%7B%22click%22%3A+%22%23buttonId%22%7D%5D%7D"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'js_scenario': {"instructions": [{"click": "#buttonId"}]},
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'js_scenario': {"instructions": [{"click": "#buttonId"}]},
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&js_scenario=%7B%22instructions%22%3A+%5B%7B%22click%22%3A+%22%23buttonId%22%7D%5D%7D")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&js_scenario=%7B%22instructions%22%3A+%5B%7B%22click%22%3A+%22%23buttonId%22%7D%5D%7D')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&js_scenario=%7B%22instructions%22%3A+%5B%7B%22click%22%3A+%22%23buttonId%22%7D%5D%7D');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&js_scenario=%7B%22instructions%22%3A+%5B%7B%22click%22%3A+%22%23buttonId%22%7D%5D%7D", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Wait for a fixed amount of time
wait
[integer
]
(default= 0
)
Some code-heavy websites need time to fully "render". To direct ScrapingBee to wait before it returns the fully rendered HTML, use the wait
parameter with a value in milliseconds between 0 and 35000.
The ScrapingBee headless browsers will then wait the duration of the time set in milliseconds before returning the page's HTML.
If you need some help setting this up, do not hesitate to contact us.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait=10000"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'wait': '10000',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'wait': '10000',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait=10000")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait=10000')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait=10000');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait=10000", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Wait for selector
wait_for
[string
]
(default= ""
)
It's sometimes necessary to wait for a particular element to appear in the DOM before ScrapingBee returns the HTML content.
Our headless browsers will wait for the CSS / Xpath selector passed in the parameter before returning the HTML.
For example, to wait for the element <div class="loading-done"></div>
use wait_for=.loading-done
in your request.
All selectors beginning with
/
will be treated as XPath selectors. All other selectors will be treated as CSS selectors.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_for=.loading-done"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'wait_for': '.loading-done',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'wait_for': '.loading-done',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_for=.loading-done")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_for=.loading-done')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_for=.loading-done');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_for=.loading-done", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Wait for browser
wait_browser
[string
]
(default= domcontentloaded
)
This advanced parameter tells the browser to wait until certain network condition are met.
It can take 4 different values:
domcontentloaded
(default): Wait until the DOM is loadedload
: Wait until the page is fully loadednetworkidle0
: Wait until there are no more than 0 network connections for at least 500 msnetworkidle2
: Wait until there are no more than 2 network connections for at least 500 ms
For example, to wait until the page is fully loaded before getting the results, you can use wait_browser=load
.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_browser=load"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'wait_browser': 'load',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'wait_browser': 'load',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_browser=load")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_browser=load')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_browser=load');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&wait_browser=load", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Blocking Ads
block_ads
[boolean
]
(default= false
)
By default, ScrapingBee does not block ads. To avoid scraping them (e.g.,to speed up your request), use block_ads=true
This parameter is unnecessary if JavaScript rendering is disabled.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_ads=True"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'block_ads': 'True',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'block_ads': 'True',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_ads=True")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_ads=True')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_ads=True');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_ads=True", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Blocking Images and CSS
block_resources
[boolean
]
(default= true
)
By default, and to speed up requests, ScrapingBee blocks all images and CSS in the scraped page, but to scrape them, use block_resources=false
This parameter is unnecessary if JavaScript rendering is disabled.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_resources=True"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'block_resources': 'True',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'block_resources': 'True',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_resources=True")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_resources=True')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_resources=True');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&block_resources=True", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Viewport width
window_width
[int
]
(default= 1920
)
If you need to change the dimension of the browser's viewport (window) when scraping the target page you can use the window_width
and window_height
parameters.
Only useful when using render_js=True
.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_width=1500"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'window_width': '1500',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'window_width': '1500',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_width=1500")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_width=1500')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_width=1500');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_width=1500", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Viewport height
window_height
[int
]
(default= 1080
)
If you need to change the dimension of the browser's viewport (window) when scraping the target page you can use the window_width
and window_height
parameters.
Only useful when using render_js=True
.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_height=500"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'window_height': '500',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'window_height': '500',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_height=500")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_height=500')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_height=500');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&window_height=500", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Proxies
Premium proxy
premium_proxy
[boolean
]
(default= false
)
For some hard-to-scrape websites, you may need to use premium proxies (or residential proxies). These proxies are rarely blocked and we recommend trying premium proxies when you receive error codes or difficult to scrape websites, like search engines, social networks, or hard to scrape E-commerce websites.
To scrape these sites, you need to add the parameter premium_proxy=true
.
Each request with this parameter will count as 25 API credits with Javascript enabled. If used without JavaScript rendering it will cost 10 credits
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&premium_proxy=True"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'premium_proxy': 'True',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'premium_proxy': 'True',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&premium_proxy=True")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&premium_proxy=True')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&premium_proxy=True');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&premium_proxy=True", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Geolocation
country_code
[string
]
(default= ""
)
In addition to premium proxies, you can also choose the proxy country from the following list of countries using the parameter country_code=COUNTRY_CODE
.
To use premium proxies from Germany for example you need to set both premium_proxy=true
and country_code=de
parameters in your API call.
The following is the list of the most popular supported country codes using ISO 3166-1 format ).
The whole list of supported country codes can be found here.
country_code | Country Name |
---|---|
br | Brazil |
in | India |
mx | Mexico |
... | ... |
ru | Russia |
us | UnitedStates |
gb | UnitedKingdom |
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&country_code=de&premium_proxy=True"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'country_code': 'de',
'premium_proxy': 'True',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'country_code': 'de',
'premium_proxy': 'True',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&country_code=de&premium_proxy=True")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&country_code=de&premium_proxy=True')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&country_code=de&premium_proxy=True');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&country_code=de&premium_proxy=True", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Stealth Proxy (beta)
stealth_proxy
[boolean
]
(default= false
)
Sometimes, even using premium_proxy=True
is not enough. For hard websites to scrape, we have developed a new pool of proxies that should be enough to scrape even the hardest to scrape websites.
To use this pool, simply add stealth_proxy=True
to your API calls.
Things to keep in mind if you use this option:
- this option currently only works when JavaScript rendering is enabled.
- Each successful API call using this option will cost 75 credits
- The
evaluate
andinfinite_scroll
instruction of the JavaScript scenario is not supported with this option.
Here is an example if you want to use this option.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&stealth_proxy=True"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'stealth_proxy': 'True',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'stealth_proxy': 'True',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&stealth_proxy=True")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&stealth_proxy=True')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&stealth_proxy=True');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&stealth_proxy=True", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Own proxy
own_proxy
[string
]
(default= ""
)
If you want to use our infrastructure with your own proxy, you can use the own_proxy
parameter. This will allows you to use all the ScrapingBee features, with your own proxies.
The proxy information syntax is: <protocol><username>:<password>@<host>:<port>
.
Things to keep in mind:
- protocol is optional
- if no
port
is specified, we will use1080
(cURL
default)
Here is an example if you want to use https://johndoe:password@my_proxy.com:123
proxy.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&own_proxy=https%3A%2F%2Fjohndoe%3Apassword%40my_proxy.com%3A1234"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'own_proxy': 'https://johndoe:password@my_proxy.com:1234',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'own_proxy': 'https://johndoe:password@my_proxy.com:1234',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&own_proxy=https%3A%2F%2Fjohndoe%3Apassword%40my_proxy.com%3A1234")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&own_proxy=https%3A%2F%2Fjohndoe%3Apassword%40my_proxy.com%3A1234')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&own_proxy=https%3A%2F%2Fjohndoe%3Apassword%40my_proxy.com%3A1234');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&own_proxy=https%3A%2F%2Fjohndoe%3Apassword%40my_proxy.com%3A1234", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Headers
Header Forwarding
forward_headers
[boolean
]
(default= false
)
You might need to forward specific headers to the website that you want to scrape.
In order to forward headers, you must set forward_headers
to true
and then pass your custom headers.
You must then prefix the headers to forward to the website with "Spb-" (for ScraPingBee).
This prefix will be trimmed by ScrapingBee and headers will be forwarded to the target web page.
Example :
If you want to send the header Accept-Language: En-US
, add the header: Spb-Accept-Language: En-US
and the parameter forward_headers=true
to the request sent to the ScrapingBee API.
Note :
If you are using the ScrapingBee Python or Node library, no need to prefix headers with "Spb-"
or to use forward_headers=True
.
Please note that in order to make your request look like real ones, ScrapingBee adds several headers to all API requests. Use forward_headers_pure=True
to avoid this behavior.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true" \
-H "Spb-Accept-Language:En-US"
# Install the Python ScrapingBee library:
# `pip install scrapingbee`
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get(
'http://httpbin.org/headers?json',
headers={'Accept-Language': 'En-US'}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
send_request()
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
headers: {
'Accept-Language': 'En-US',
},
})
}
get('http://httpbin.org/anything?json').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true")
// Add headers
.addHeader("Spb-Accept-Language", "En-US")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Add headers
req.add_field "Spb-Accept-Language", "En-US"
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// set headers
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Spb-Accept-Language: En-US',
]);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
?>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true", nil)
// Headers
req.Header.Add("Spb-Accept-Language", "En-US")
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Here we are scraping httpbin.org/headers?json
, a page that demonstrates and displays the headers it received.
The following is the response from the above code. Note the Accept-Language
header in the response.
{
"headers": {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7",
"Host": "httpbin.org",
"Accept-Language": "En-US" # <-- Your header
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
}
}
Pure Header Forwarding
forward_headers_pure
[boolean
]
(default= false
)
If you want to forward specific headers to the website that you want to scrape, and don't need ScrapingBee to add any headers to your request, you should use forward_headers_pure=True
.
You must then prefix the headers to forward to the website with "Spb-" (for ScraPingBee).
This prefix will be trimmed by ScrapingBee and headers will be forwarded to the target web page.
Example :
If you want to only send the header Accept-Language: En-US
, add the header: Spb-Accept-Language: En-US
and the parameter forward_headers_pure=true
to the request sent to the ScrapingBee API.
Note :
If you are using the ScrapingBee Python or Node library, no need to prefix headers with "Spb-"
or to use forward_headers=True
.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers_pure=true" \
-H "Spb-Accept-Language:En-US"
# Install the Python ScrapingBee library:
# `pip install scrapingbee`
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get(
'http://httpbin.org/headers?json',
params={'forward_headers_pure': True},
headers={'Accept-Language': 'En-US'}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
send_request()
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
forward_headers_pure: true,
headers: {
'Accept-Language': 'En-US',
},
})
return response
}
get('http://httpbin.org/anything?json').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers_pure=true")
// Add headers
.addHeader("Spb-Accept-Language", "En-US")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers_pure=true')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Add headers
req.add_field "Spb-Accept-Language", "En-US"
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers_pure=true');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// set headers
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Spb-Accept-Language: En-US',
]);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
?>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers_pure=true", nil)
// Headers
req.Header.Add("Spb-Accept-Language", "En-US")
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Here we are scraping httpbin.org/headers?json
, a page that demonstrates and displays the headers it received.
The following is the response from the above code. Note the Accept-Language
header in the response.
{
"headers": {
"Accept-Encoding": "gzip, deflate", # Technical header sent with all requests
"Host": "httpbin.org", # Technical header sent with all requests
"Accept-Language": "En-US" # <-- Your headers, and only your headers
}
}
This parameter is only useful when using render_js=False
.
Response format
By default, the API will transparently return you the resource you want to scrape.
But you can do way more.
Downloading Picture and Files
The API will transparently download images, PDF or anything that is not HTML.
We recommend downloading files with render_js=false
.
There is a 2 MB limit per request.
Data extraction from CSS or XPATH selectors
extract_rules
[stringified JSON
]
(default= ""
)
If you want to extract data from pages and don't want to parse the HTML on your side, you can add extraction rules to your API call.
The simplest way to use JSON rules is to use the following format
{"key_name" : "css_or_xpath_selector"}
If you wish to extract the title, subtitle and intro of our blog, you will just need to use those rules.
{
"title" : "h1",
"subtitle" : "#subtitle",
}
And this will be the JSON response
{
"title" : "The ScrapingBee Blog",
"subtitle" : "We help you get better at web-scraping: detailed tutorial, case studies and writing by industry experts",
}
Important: extraction rules are JSON formatted, and in order to pass them to a GET request, you need to stringify them.
We've just described the easiest and quickest way to use this feature. If you want to read more about it, check out our full guide.
Important: We strongly advice you to use JS scenario with json_response
set to true
(learn more), as it will return you a detailed report of the scenario execution under the js_scenario_report
key. Very useful for debugging for example.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&extract_rules=%7B%22title%22%3A+%22h1%22%2C+%22subtitle%22%3A+%22%23subtitle%22%7D"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'extract_rules': {"title": "h1", "subtitle": "#subtitle"},
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'extract_rules': {"title": "h1", "subtitle": "#subtitle"},
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&extract_rules=%7B%22title%22%3A+%22h1%22%2C+%22subtitle%22%3A+%22%23subtitle%22%7D")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&extract_rules=%7B%22title%22%3A+%22h1%22%2C+%22subtitle%22%3A+%22%23subtitle%22%7D')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&extract_rules=%7B%22title%22%3A+%22h1%22%2C+%22subtitle%22%3A+%22%23subtitle%22%7D');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&extract_rules=%7B%22title%22%3A+%22h1%22%2C+%22subtitle%22%3A+%22%23subtitle%22%7D", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Screenshot
screenshot
[boolean
]
(default= false
)
If you want to get a screenshot of the page you want to scrape, use the screenshot=True
parameter.
Screenshots are only available when using render_js=True
.
When using screenshot=True
, block_resources
will automatically be set to False
so the browser will load images and CSS before taking the screenshot.
If you need both the screenshot of the page and the HTML content of it, use
screenshot=True
andjson_response=True
(learn more about json_response)If you need to wait for a particular amount of time, a DOM element, or a browser event, use our
wait
,wait_for
andwait_browser
parameters (learn more)
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&screenshot=True" > ./screenshot.png
# Install the Python ScrapingBee library:
# pip install scrapingbee
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get(
'YOUR-URL',
params={
'screenshot': True,
}
)
if response.ok:
with open("./screenshot.png", "wb") as f:
f.write(response.content)
// npm install scrapingbee
const fs = require('fs');
const scrapingbee = require('scrapingbee');
async function screenshot(url, path) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
screenshot: true, // Take a screenshot
screenshot_full_page: true, // Specify that we need the full height
window_width: 375, // Specify a mobile width in pixel
}
});
fs.writeFileSync(path, response.data);
}
screenshot('YOUR-URL', './screenshot.png').catch((e) =>
console.log('A problem occurs : ' + e.message)
);
Coming Soon
Coming Soon
Coming Soon
Screenshot a particular CSS selector
screenshot_selector
[string
]
(default= ""
)
By default, the screenshot will only capture the portion of the page that is visible in the browser's viewport.
If you need to screenshot a particular area of the page, you can use screenshot_selector=<CSS_selector>
where <CSS_selector>
is the CSS selector of the area you want to capture.
Screenshot full page
screenshot_full_page
[boolean
]
(default= false
)
By default, the screenshot will only capture the portion of the page that is visible in the browser's viewport.
If you need a screenshot of the full page from the target website use screenshot_full_page=True
If you need to change the size of the browser's viewport before taking a screenshot you can do it using window_width
and window_height
parameters. (learn more).
JSON Response
json_response
[bool
]
(default= false
)
If you are planning to integrate ScrapingBee with third-party tools that only accept JSON response, or want to intercept the response of some XHR / Ajax requests, you can send your API call with json_response=True
.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&json_response=True"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'json_response': 'True',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'json_response': 'True',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&json_response=True")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&json_response=True')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&json_response=True');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&json_response=True", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
The following is the received response when using this parameter:
{
# Headers sent by the server
"headers": {
"Date": "Fri, 16 Apr 2021 15:03:54 GMT",
...
"Access-Control-Allow-Credentials": "true"
},
# Credit cost of your request
"cost": 1,
# Initial status code of the server
"initial-status-code": 200,
# Resolved URL (following redirection)
"resolved-url": "https://httpbin.org/",
# Type of the response "html" or "json" or "b64_bytes" for file, image, pdf,...
"type": "html",
# Content of the answer. Content will be base 64 encoded if is a file, image, pdf,...
"body": "<html>... </body>"
# base 64 encoded screenshot of the page, if screenshot=true is used
"screenshot": "b0918...aef",
# Cookies sent back by the server
'cookies': [
{
"name": "cookie_name",
"value": "cookie_value",
"domain": "test.com",
...
},
...
],
# Results of the JS scenario "evaluate" instructions
"evaluate_results": [...]
# Content and source of iframes in the page
"iframes": [
{
"content": "<html>... </body>",
"src": "https://site.com/iframe"
},
...
],
# XHR / Ajax requests sent by the browser
"xhr": [
{
# URL
"url": "https://",
# status code of the server
"status_code": 200,
# Method of the request
"method": "POST",
# Headers of the XHR / Ajax request
"headers": {
"pragma": "no-cache",
...
},
# Response of the XHR / Ajax request
"body": "2d,x"
},
...
],
# js_scenario detailed report ( only useful if using render_js=True and js_scenario=...)
"js_scenario_report": {
"task_executed": 1,
"task_failure": 0,
"task_success": 1,
"tasks": [
{
"duration": 3.042,
"params": 3000,
"success": true,
"task": "wait"
}
],
"total_duration": 3.042
},
# Metada / Schema data
"metadata": {
"microdata": ...,
"json-ld": ...,
}
}
If the requested content is json, then the answers will look like this:
{
# Headers sent by the server
"headers": {
"Date": "Fri, 16 Apr 2021 15:13:02 GMT",
...
"Access-Control-Allow-Credentials": "true"
},
# Credit cost of your request
"cost": 1,
# Initial status code of the server
"initial-status-code": 200,
# Resolved URL (following redirection)
"resolved-url": "https://httpbin.org/anything?json",
# Type of the response "html" of "json"
"type": "json",
# Content of the answer
"body": {
"args": {
....
}
}
# Results of the JS scenario "evaluate" instructions
"evaluate_results": [...]
# XHR / Ajax requests sent by the browser
"xhr": [
...
]
# js_scenario detailed report ( only useful if using render_js=True and js_scenario=...)
"js_scenario_report": {
...
},
# Metada / Schema data
"metadata": {
"microdata": ...,
"json-ld": ...,
}
}
Return Page Source
return_page_source
[boolean
]
(default= false
)
To have HTML returned by the server and unaltered by the browser (before the JavaScript execution), use return_page_source=true
This parameter is unnecessary if JavaScript rendering is disabled.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&return_page_source=True"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'return_page_source': 'True',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'return_page_source': 'True',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&return_page_source=True")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&return_page_source=True')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&return_page_source=True');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&return_page_source=True", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
HTML to PDF conversion
Coming soon.
Proxy Mode
ScrapingBee also offers a proxy front-end to the API. This can make integration with third-party tools easier. The Proxy mode only changes the way you access ScrapingBee. The ScrapingBee API will then handle requests just like any standard request.
Request cost, return code and default parameters will be the same as a standard no-proxy request.
We recommend disabling Javascript rendering in proxy mode, which is enabled by default. The following credentials and configurations are used to access the proxy mode:
- HTTP address:
proxy.scrapingbee.com:8886
- HTTPS address:
proxy.scrapingbee.com:8887
- Socks5 address:
socks.scrapingbee.com:8888
- Username:
YOUR-API-KEY
- Password:
PARAMETERS
Important : Replace PARAMETERS
with our supported API parameters. If you don't know what to use, you can begin by using render_js=False
.
As an alternative, you can use URLs like the following:
{
"url_http": "http://YOUR-API-KEY:[email protected]:8886",
"url_https": "https://YOUR-API-KEY:[email protected]:8887",
"url_socks5": "socks5://YOUR-API-KEY:[email protected]:8888",
}
Important: if you try to scrape Google with this mode, each requests will cost 20 credits.
To read more on how to integrate the proxy-mode with your favorite tools and language, check out the our dedicated-page.
Session
session_id
[integer
]
(default= ""
)
All API requests using the same session_id
will be routed through the same IP address for a duration of 5 minutes.
We advice that you use a random integer between 0
and 10,000,000
everytime you wish to generate a new session_id
.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&session_id=123"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'session_id': '123',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'session_id': '123',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&session_id=123")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&session_id=123')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&session_id=123');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&session_id=123", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Timeout (in ms)
timeout
[int
]
(default= 140 000
)
The maximum number of ms, comprise between 1000
and 140000
, ScrapingBee will wait before returning you a results, use timeout=45000
in your API call to modify it.
Changing it could have a negative impact on your success rate.
Important: There will be a 0.5 second margin of error between the timeout
used in your API call and the actual maximum duration of this API call.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&timeout=10000"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'timeout': '10000',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'timeout': '10000',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&timeout=10000")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&timeout=10000')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&timeout=10000');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&timeout=10000", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Custom Cookies
cookies
[string
]
(default= ""
)
You can pass custom cookies to the target webpages
To do this you must pass a cookie string in the cookies
parameter.
If you want to set multiple cookies, separate them with ;
.
ScrapingBee currently supports cookies with the following attributes:
- name (required)
- value (required)
- domain (optional)
- path (optional)
- expires (optional)
You need to separate each attribute with ,
and each cookie with ;
.
Cookie syntax is as follows:
name=value,other_attributes=other_attribues_value;
Example
cookie_name_1=cookie_value1,domain=scrapingbee.com;cookie_name_2=cookie_value_2;cookie_name_3=cookie_value_3,path=/
Example with cookie_name_1=cookie_value1;cookie_name_2=cookie_value_2
:
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get(
'http://httpbin.org/cookies?json',
cookies= {"cookie_name_1":"cookie_value1","cookie_name_2":"cookie_value_2"},
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content))
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
},
cookies: {"cookie_name_1":"cookie_value_1","cookie_name_2":"cookie_value_2"},
})
return response
}
get('http://httpbin.org/cookies?json').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
?>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
In the above example, ScrapingBee is set to scrape httpbin.org/cookies?json
, a page that displays the cookies it received.
The following will be returned by the above request.
{
"cookies": {
"cookie_name_1": "cookie_value1",
"cookie_name_2": "cookie_value_2"
}
}
Device
device
[string
]
(default= "desktop"
)
Choose the kind of device that will send the request to the server. Only two choices are available, desktop
, the default, and mobile
.
Set device desktop
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=desktop"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'device': 'desktop',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'device': 'desktop',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=desktop")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=desktop')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=desktop');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=desktop", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Set device mobile
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=mobile"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'device': 'mobile',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'device': 'mobile',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=mobile")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=mobile')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=mobile');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&device=mobile", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
custom_google
[boolean
]
(default= false
)
If you need to scrape webpage on Google main domain (google.com) or subdomains (news.google.com, scholar.google.com, etc ...) you'll need to use custom_google=True
.
Important: each request using custom_google=True
will cost you 20 credits.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=https%3A%2F%2Fwww.google.com&custom_google=True"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('https://www.google.com',
params = {
'custom_google': 'True',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'custom_google': 'True',
},
})
return response
}
get('https://www.google.com').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=https%3A%2F%2Fwww.google.com&custom_google=True")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=https%3A%2F%2Fwww.google.com&custom_google=True')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=https%3A%2F%2Fwww.google.com&custom_google=True');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=https%3A%2F%2Fwww.google.com&custom_google=True", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
POST / PUT
To send a POST / PUT request, send the POST / PUT request to the main endpoint with your api_key
and url
parameter.
Data will be forwarded transparently to the target web page.
Headers and cookies will also be returned.
Below is an example using httpbin.org
, a service mirroring HTTP requests sent to the site.
curl -X "POST" "https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY" \
-H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
--data-urlencode "KEY_1=VALUE_1"
# Install the Python Requests library:
# pip install requests
import requests
def send_request():
# Post
# POST https://app.scrapingbee.com/api/v1
try:
response = requests.post(
url="https://app.scrapingbee.com/api/v1",
params={
"url": "https://httpbin.org/anything",
"api_key": "YOUR-API-KEY",
},
headers={
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
},
data={
"KEY_1": "VALUE_1",
},
)
print('Response HTTP Status Code: {status_code}'.format(
status_code=response.status_code))
print('Response HTTP Response Body: {content}'.format(
content=response.content))
except requests.exceptions.RequestException:
print('HTTP Request failed')
// request Post
(function(callback) {
'use strict';
const httpTransport = require('https');
const responseEncoding = 'utf8';
const httpOptions = {
hostname: 'app.scrapingbee.com',
port: '443',
path: '/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY',
method: 'POST',
headers: {"Content-Type":"application/x-www-form-urlencoded; charset=utf-8"}
};
httpOptions.headers['User-Agent'] = 'node ' + process.version;
const request = httpTransport.request(httpOptions, (res) => {
let responseBufs = [];
let responseStr = '';
res.on('data', (chunk) => {
if (Buffer.isBuffer(chunk)) {
responseBufs.push(chunk);
}
else {
responseStr = responseStr + chunk;
}
}).on('end', () => {
responseStr = responseBufs.length > 0 ?
Buffer.concat(responseBufs).toString(responseEncoding) : responseStr;
callback(null, res.statusCode, res.headers, responseStr);
});
})
.setTimeout(0)
.on('error', (error) => {
callback(error);
});
request.write("KEY_1=VALUE_1")
request.end();
})((error, statusCode, headers, body) => {
console.log('ERROR:', error);
console.log('STATUS:', statusCode);
console.log('HEADERS:', JSON.stringify(headers));
console.log('BODY:', body);
});
import java.io.IOException;
import org.apache.http.client.fluent.*;
import org.apache.http.entity.ContentType;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Post (POST )
try {
// Create request
Content content = Request.Post("https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY")
// Add headers
.addHeader("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
// Add body
.bodyForm(Form.form()
.add("KEY_1", "VALUE_1")
.build())
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Post (POST )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
data = {
"KEY_1" => "VALUE_1",
}
body = URI.encode_www_form(data)
# Create Request
req = Net::HTTP::Post.new(uri)
# Add headers
req.add_field "Content-Type", "application/x-www-form-urlencoded; charset=utf-8"
# Set body
req.body = body
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{res.code}"
puts "Response HTTP Response Body: #{res.body}"
rescue StandardError => e
puts "HTTP Request failed (#{e.message})"
end
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// set headers
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Content-Type: application/x-www-form-urlencoded; charset=utf-8',
]);
// form body
$body = [
'KEY_1' => 'VALUE_1',
];
$body = http_build_query($body);
// set body
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $body);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);>
package main
import (
"fmt"
"io/ioutil"
"net/http"
"net/url"
"bytes"
)
func sendPost() {
// Post (POST https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY)
params := url.Values{}
params.Set("KEY_1", "VALUE_1")
body := bytes.NewBufferString(params.Encode())
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("POST", "https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY", body)
// Headers
req.Header.Add("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
Credit cost for your requests
Each ScrapingBee plan provides a certain amount of API credits per month.
It costs 1 to 75 credits to make requests to the ScrapingBee API. The credit cost depends on the parameters used with your API calls.
Here is a breakdown of ScrapingBee API credit costs:
Feature used | API credit cost |
---|---|
Rotating Proxy without JavaScript rendering | 1 |
Rotating Proxy with JavaScript rendering (default) | 5 |
Premium Proxy without JavaScript rendering | 10 |
Premium Proxy with JavaScript rendering | 25 |
Stealth Proxy without JavaScript rendering | (coming soon) |
Stealth Proxy with JavaScript rendering | 75 |
Usage endpoint
To programmatically monitor credit consumption and concurrency usage use /usage
endpoint.
Calls to this endpoint will not increase concurrency, but you can only call it 6 times per minute.
Please note that the results are available in real-time.
curl "https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY"
# Install the Python Requests library:
# `pip install requests`
import requests
def send_request():
response = requests.get(
url="https://app.scrapingbee.com/api/v1/usage",
params={
"api_key": "YOUR-API-KEY",
},
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
send_request()
// request Axios
const axios = require('axios');
axios.get('https://app.scrapingbee.com/api/v1/usage', {
params: {'api_key': 'YOUR-API-KEY'}
}).then(function (response) {
// handle success
console.log(response);
})
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
?>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Results.
{
"max_api_credit": 20000000,
"used_api_credit": 3704332,
"max_concurrency": 200,
"current_concurrency": 1,
"renewal_subscription_date": "2022-04-18T10:05:58.134716"
}
Status Code
Response Status Code
The following list of HTTP codes are returned by ScrapingBee.
Code | Billed? | Meaning | Solution |
---|---|---|---|
200 | Yes | Successful Call | |
400 | No | Bad request | Incorrect parameters or parameters type. See the message in the response body. |
401 | No | No more credit available | Please upgrade your plan or contact sales. |
404 | Yes | Requested URL not found | Provide a valid URL. |
410 | Yes | Requested URL gone | Provide a valid URL. |
413 | No | File too large | Request a smaller file. |
429 | No | Too many concurrent requests. | Please upgrade your plan or contact sales. |
500 | No | Misc error | Please retry, and see the message in the response body. |
Transparent HTTP status code
transparent_status_code
[boolean
]
(default= false
)
By default, ScrapingBee will return an HTTP 500 whenever the requested URL returns something other than: a 200-299 or a 404.
To get same body and status code as the requested URL in any circumstances, use the transparent_status_code=true
parameter.
When this parameter is set to true:
- Every requests you make will be considered successful and will cost some amount of credits.
- ScrapingBee will not retry the request multiple time if the request returns a 500 error.
curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&transparent_status_code=True"
# Install the Python ScrapingBee library:
# pip install scrapingbee
from scrapingbee import ScrapingBeeClient
client = ScrapingBeeClient(api_key='YOUR-API-KEY')
response = client.get('YOUR-URL',
params = {
'transparent_status_code': 'True',
}
)
print('Response HTTP Status Code: ', response.status_code)
print('Response HTTP Response Body: ', response.content)
// Install the Node ScrapingBee library
// npm install scrapingbee
const scrapingbee = require('scrapingbee');
async function get(url) {
var client = new scrapingbee.ScrapingBeeClient('YOUR-API-KEY');
var response = await client.get({
url: url,
params: {
'transparent_status_code': 'True',
},
})
return response
}
get('YOUR-URL').then(function (response) {
var decoder = new TextDecoder();
var text = decoder.decode(response.data);
console.log(text);
}).catch((e) => console.log('A problem occurs : ' + e.response.data));
import java.io.IOException;
import org.apache.http.client.fluent.*;
public class SendRequest
{
public static void main(String[] args) {
sendRequest();
}
private static void sendRequest() {
// Classic (GET )
try {
// Create request
Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&transparent_status_code=True")
// Fetch request and return content
.execute().returnContent();
// Print content
System.out.println(content);
}
catch (IOException e) { System.out.println(e); }
}
}
require 'net/http'
require 'net/https'
# Classic (GET )
def send_request
uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&transparent_status_code=True')
# Create client
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
# Create Request
req = Net::HTTP::Get.new(uri)
# Fetch Request
res = http.request(req)
puts "Response HTTP Status Code: #{ res.code }"
puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
puts "HTTP Request failed (#{ e.message })"
end
send_request()
<?php
// get cURL resource
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&transparent_status_code=True');
// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// send the request and save response to $response
$response = curl_exec($ch);
// stop if fails
if (!$response) {
die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}
echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;
// close curl resource to free up system resources
curl_close($ch);
>
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func sendClassic() {
// Create client
client := &http.Client{}
// Create request
req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&transparent_status_code=True", nil)
parseFormErr := req.ParseForm()
if parseFormErr != nil {
fmt.Println(parseFormErr)
}
// Fetch Request
resp, err := client.Do(req)
if err != nil {
fmt.Println("Failure : ", err)
}
// Read Response Body
respBody, _ := ioutil.ReadAll(resp.Body)
// Display Results
fmt.Println("response Status : ", resp.Status)
fmt.Println("response Headers : ", resp.Header)
fmt.Println("response Body : ", string(respBody))
}
func main() {
sendClassic()
}
Response Headers
The following is the list of additional HTTP headers returned by ScrapingBee.
Name | Meaning |
---|---|
Spb-cost |
Request cost in credits. |
Spb-initial-status-code |
The initial status code returned by the scraped page. Useful when the page redirects. |
Spb-resolved-url |
The resolved URL of the scraped page. Useful when the page redirects. |