Question: I have written the code for extracting the data but the data is not scrapping into the csv file or excel sheet import scrapy import
I have written the code for extracting the data but the data is not scrapping into the csv file or excel sheet
import scrapy
import pandas as pd
import time
import csv
starttime=time.time()
class ArtisanDataSpider(scrapy.Spider):
name = "artisan_data"
start_urls = ['http://www.handicrafts.nic.in/ArtisanData.aspx?MID=SZmOd%2fCrxTo9CHD2XKF+pA%3d%3d']
def parse(self, response):
# Select the form and fill in the form data
form = response.xpath('//form[@id="form1"]')
form.xpath('.//select[@name="ddlState"]/option[text()="Uttar Pradesh"]/@value').extract_first()
form.xpath('.//select[@name="ddlDistrict"]/option[text()="Sant Ravidas Nagar"]/@value').extract_first()
form.xpath('.//select[@name="ddlDistrict"]/option[text()="Agra"]/@value').extract_first()
form.xpath('.//select[@name="ddlDistrict"]/option[text()="Varanasi"]/@value').extract_first()
yield scrapy.FormRequest.from_response(response, formdata={'ddlState': 'Uttar Pradesh', 'ddlDistrict': ['Sant Ravidas Nagar', 'Agra', 'Varanasi'],'btnSubmit': 'Submit'},
callback=self.parse_result)
def parse_result(self, response):
rows = response.xpath('//table[@id="gvArtisanData"]/tr')
for row in rows:
PEHCHAN_CARD_NO = row.xpath('./td[1]/text()').extract_first()
ARTISIAN_NAME = row.xpath('./td[2]/text()').extract_first()
Father_spouse = row.xpath('./td[3]/text()').extract_first()
Category = row.xpath('./td[4]/text()').extract_first()
AADHARNO = row.xpath('./td[5]/text()').extract_first()
NAME_OF_CRAFT = row.xpath('./td[6]/text()').extract_first()
MOBILENO= row.xpath('./td[7]/text()').extract_first()
VILLAGE= row.xpath('./td[8]/text()').extract_first()
TOWN= row.xpath('./td[9]/text()').extract_first()
CITY = row.xpath('./td[10]/text()').extract_first()
DISTRICT = row.xpath('./td[11]/text()').extract_first()
STATE = row.xpath('./td[12]/text()').extract_first()
yield {'PEHCHAN_CARD_NO':PEHCHAN_CARD_NO, 'ARTISIAN_NAME':ARTISIAN_NAME, 'Father_spouse':Father_spouse, 'Category': Category, 'AADHAR_NO': AADHAR_NO, 'NAME_OF_CRAFT':NAME_OF_CRAFT,'MOBILENO':MOBILENO,'VILLAGE':VILLAGE,'TOWN':TOWN,'CITY':CITY, 'DISTRICT':DISTRICT, 'STATE':STATE}
next_page = response.xpath('//a[text()="Next"]/@href').extract_first()
if next_page:
stripped = (line.strip() for line in scrapy.Request(response.urljoin(next_page), callback=self.parse_result))
lines = (line.split(",") for line in stripped if line)
with open('log.csv', 'w') as out_file:
writer = csv.writer(out_file)
writer.writerow(('title', 'intro'))
writer.writerows(lines)
endtime=time.time()
result=endtime-starttime
print("the time taken is:", result)
I am getting 0 items were scrapped
STATE = row.xpath('./td[12]/text()').extract_first()
yield {'PEHCHAN_CARD_NO':PEHCHAN_CARD_NO, 'ARTISIAN_NAME':ARTISIAN_NAME, 'Father_spouse':Father_spouse, 'Category': Category, 'AADHAR_NO': AADHAR_NO, 'NAME_OF_CRAFT':NAME_OF_CRAFT,'MOBILENO':MOBILENO,'VILLAGE':VILLAGE,'TOWN':TOWN,'CITY':CITY, 'DISTRICT':DISTRICT, 'STATE':STATE}
next_page = response.xpath('//a[text()="Next"]/@href').extract_first()
stripped = (line.strip() for line in scrapy.Request(response.urljoin(next_page), callback=self.parse_result))
lines = (line.split(",") for line in stripped if line)
with open('log.csv', 'w') as out_file:
writer = csv.writer(out_file)
writer.writerow(('title', 'intro'))
writer.writerows(lines)
endtime=time.time()
result=endtime-starttime
print("the time taken is:", result)
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
