Issue
I want to crawl the website: https://www.zappos.com/p/lamade-mozza-halter-pullover-black/product/9796103/color/3 user review part, but it return an empty result, the path of the selector is correct.
import scrapy
from scrapy import Selector,Request
class LaptopSpider(scrapy.Spider):
name = 'cs'
def start_requests(self):
url =' https://www.zappos.com/p/lamade-mozza-halter-pullover-black/product/9796103/color/3'
yield Request(url,callback=self.parse)
def parse(self, response):
products_selector = response.css('#productRecap > div.p--z > div:nth-child(3) > div > div > div > div > div.Oi-z > div::text').get()
print(products_selector)
Solution
Try this to get the reviews from the link in your post:
import scrapy
class ZapposSpider(scrapy.Spider):
name = 'zappos'
link = 'https://www.zappos.com/p/lamade-mozza-halter-pullover-black/product/9796103/color/3'
base_url = 'https://api.prod.cassiopeia.ugc.zappos.com/display/v2/reviews'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
}
params = {
'offset': '0',
'page': '1',
'productId': '',
'sort': 'upVotes:desc,overallRating:desc,reviewDate:desc'
}
def start_requests(self):
product_id = self.link.split("product/")[1].split("/")[0]
self.params['productId'] = product_id
yield scrapy.FormRequest(
url=self.base_url,
headers=self.headers,
callback= self.parse,
method="GET",
formdata=self.params,
)
def parse(self, response):
for item in response.json()['reviews']:
reviewer = item['name']
review = item['summary']
yield {"reviewer":reviewer,"review":review}
Answered By - SIM
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.