-
Notifications
You must be signed in to change notification settings - Fork 116
Open
Description
@jvmvik this issue is for discussion.
I'm not 100% sure what the cause is, but there's might be some overhead or additional processing in the wrapper that causes longer response times. Or it is as it should be? Let me know if it's the case.
Table shows results when making 50 requests:
Making direct requests to serpapi.com/search.json | Making a request to serpapi.com through API wrapper | Making a request with async batch requests with Queue |
---|---|---|
~7.192448616027832 seconds | ~135.2969319820404 seconds | ~24.80349826812744 seconds |
Making a direct request to serpapi.com/search.json
:
import aiohttp
import asyncio
import os
import json
import time
async def fetch_results(session, query):
params = {
'api_key': '...',
'engine': 'youtube',
'device': 'desktop',
'search_query': query,
'no_cache': 'true'
}
url = 'https://serpapi.com/search.json'
async with session.get(url, params=params) as response:
results = await response.json()
data = []
if 'error' in results:
print(results['error'])
else:
for result in results.get('video_results', []):
data.append({
'title': result.get('title'),
'link': result.get('link'),
'channel': result.get('channel').get('name'),
})
return data
async def main():
# 50 queries
queries = [
'burly',
'creator',
'doubtful',
'chance',
'capable',
'window',
'dynamic',
'train',
'worry',
'useless',
'steady',
'thoughtful',
'matter',
'rotten',
'overflow',
'object',
'far-flung',
'gabby',
'tiresome',
'scatter',
'exclusive',
'wealth',
'yummy',
'play',
'saw',
'spiteful',
'perform',
'busy',
'hypnotic',
'sniff',
'early',
'mindless',
'airplane',
'distribution',
'ahead',
'good',
'squeeze',
'ship',
'excuse',
'chubby',
'smiling',
'wide',
'structure',
'wrap',
'point',
'file',
'sack',
'slope',
'therapeutic',
'disturbed'
]
data = []
async with aiohttp.ClientSession() as session:
tasks = []
for query in queries:
task = asyncio.ensure_future(fetch_results(session, query))
tasks.append(task)
start_time = time.time()
results = await asyncio.gather(*tasks)
end_time = time.time()
data = [item for sublist in results for item in sublist]
print(json.dumps(data, indent=2, ensure_ascii=False))
print(f'Script execution time: {end_time - start_time} seconds') # ~7.192448616027832 seconds
asyncio.run(main())
Same code but using the wrapper YoutubeSearch
(not 100% sure if valid comparison):
import aiohttp
import asyncio
from serpapi import YoutubeSearch
import os
import json
import time
async def fetch_results(session, query):
params = {
'api_key': '...',
'engine': 'youtube',
'device': 'desktop',
'search_query': query,
'no_cache': 'true'
}
search = YoutubeSearch(params)
results = search.get_json()
data = []
if 'error' in results:
print(results['error'])
else:
for result in results.get('video_results', []):
data.append({
'title': result.get('title'),
'link': result.get('link'),
'channel': result.get('channel').get('name'),
})
return data
async def main():
queries = [
'burly',
'creator',
'doubtful',
'chance',
'capable',
'window',
'dynamic',
'train',
'worry',
'useless',
'steady',
'thoughtful',
'matter',
'rotten',
'overflow',
'object',
'far-flung',
'gabby',
'tiresome',
'scatter',
'exclusive',
'wealth',
'yummy',
'play',
'saw',
'spiteful',
'perform',
'busy',
'hypnotic',
'sniff',
'early',
'mindless',
'airplane',
'distribution',
'ahead',
'good',
'squeeze',
'ship',
'excuse',
'chubby',
'smiling',
'wide',
'structure',
'wrap',
'point',
'file',
'sack',
'slope',
'therapeutic',
'disturbed'
]
data = []
async with aiohttp.ClientSession() as session:
tasks = []
for query in queries:
task = asyncio.ensure_future(fetch_results(session, query))
tasks.append(task)
start_time = time.time()
results = await asyncio.gather(*tasks)
end_time = time.time()
data = [item for sublist in results for item in sublist]
print(json.dumps(data, indent=2, ensure_ascii=False))
print(f'Script execution time: {end_time - start_time} seconds') # ~135.2969319820404 seconds
Using async
batch requests with Queue
:
from serpapi import YoutubeSearch
from urllib.parse import (parse_qsl, urlsplit)
from queue import Queue
import os, re, json
import time
# 50 queries
queries = [
'burly',
'creator',
'doubtful',
'chance',
'capable',
'window',
'dynamic',
'train',
'worry',
'useless',
'steady',
'thoughtful',
'matter',
'rotten',
'overflow',
'object',
'far-flung',
'gabby',
'tiresome',
'scatter',
'exclusive',
'wealth',
'yummy',
'play',
'saw',
'spiteful',
'perform',
'busy',
'hypnotic',
'sniff',
'early',
'mindless',
'airplane',
'distribution',
'ahead',
'good',
'squeeze',
'ship',
'excuse',
'chubby',
'smiling',
'wide',
'structure',
'wrap',
'point',
'file',
'sack',
'slope',
'therapeutic',
'disturbed'
]
search_queue = Queue()
for query in queries:
params = {
'api_key': '...',
'engine': 'youtube',
'device': 'desktop',
'search_query': query,
'async': True,
'no_cache': 'true'
}
search = YoutubeSearch(params) # where data extraction happens
results = search.get_dict() # JSON -> Python dict
if 'error' in results:
print(results['error'])
break
print(f"Add search to the queue with ID: {results['search_metadata']}")
search_queue.put(results)
data = []
start_time = time.time()
while not search_queue.empty():
result = search_queue.get()
search_id = result['search_metadata']['id']
print(f'Get search from archive: {search_id}')
search_archived = search.get_search_archive(search_id)
print(f"Search ID: {search_id}, Status: {search_archived['search_metadata']['status']}")
if re.search(r'Cached|Success', search_archived['search_metadata']['status']):
for video_result in search_archived.get('video_results', []):
data.append({
'title': video_result.get('title'),
'link': video_result.get('link'),
'channel': video_result.get('channel').get('name'),
})
else:
print(f'Requeue search: {search_id}')
search_queue.put(result)
print(json.dumps(data, indent=2))
print('All searches completed')
execution_time = time.time() - start_time
print(f'Script execution time: {execution_time} seconds') # ~24.80349826812744 seconds
Metadata
Metadata
Assignees
Labels
No labels