AliExpress Parts Python Scraper


Mar 29, 2015
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementClickInterceptedException: Message: Element <button class="next-btn next-medium next-btn-normal next-pagination-item next-next" type="button"> is not clickable at point (699,649) because another element <div class="next-overlay-backdrop"> obscures it

Anyone know how to fix - it only scrapes a single page: asked here, next-overlay-backdrop obscures it

## Reddit r/indianripoff import re, sys, time from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import StaleElementReferenceException from selenium import webdriver from import By from selenium.webdriver.common.keys import Keys from import WebDriverWait from import expected_conditions as EC from selenium.webdriver.common.desired_capabilities import DesiredCapabilities xchg_rate = 72.6 browser_timeout = 100 url = '' #url = 'file:///tmp/1N4001%20-%20Buy%201N4001%20with%20free%20shipping%20on%20AliExpress.html' caps = DesiredCapabilities().FIREFOX; caps["pageLoadStrategy"] = 'eager' ignored_exceptions=(NoSuchElementException,StaleElementReferenceException,) def load_all_srch_listings(): try: WebDriverWait(driver, browser_timeout, ignored_exceptions=ignored_exceptions).until( EC.presence_of_element_located((By.XPATH, '//div[@class="product-container"]'))) finally: tmp = driver.find_element_by_xpath('//body') if not 'body' in tmp.tag_name: raise SystemExit('no srch_result <body>') tmp.send_keys(Keys.HOME) for i in range(1,10): time.sleep(1) tmp.send_keys(Keys.PAGE_DOWN) #scroll to the page bottom so all listings load def process_lots(e_l): if len(e_l) != 1: raise SystemExit('ambiguous lots' + str(e_l)) lot_e = e_l[0] if (lot_e is None) or (lot_e is 0): raise SystemExit('error in lot value' + str(e_l)) span_e = lot_e.find_element_by_tag_name('span') try: txt = span_e.get_attribute('innerHTML') return txt.replace('pieces / lot', '') except Exception as e: raise def convert(m_obj): if len( money = return str(round(float(money) * xchg_rate, 3)) else: raise SystemExit('ambiguous money ' + str(len(m_obj))+ str( import re def process_money(e_l): if len(e_l) > 1: raise SystemExit('ambiguous price' + str(e_l)) if len(e_l) == 0: return str(0) txt = e_l[0].text if txt is None: return str(0) if 'incl' in txt: return str(0) # Shipping incl. if '$' in txt: #clean up our txt txt = txt.replace('+', ''); txt = txt.replace('$', '') txt = txt.replace('US', ''); txt = txt.replace('Shipping:', '') r = re.sub(r'\s*([0-9]+\.[0-9]+)', convert, txt) #convert $ to rupee else: raise SystemExit('unknown value' + txt) return r def process_shop(e_l): if len(e_l) > 1: raise SystemExit('ambiguous price' + str(e_l)) if len(e_l) == 0: return str(0) txt = e_l[0].text if txt is None: return 'error' else: return txt def process_title(e_l): if len(e_l) > 1: raise SystemExit('ambiguous price' + str(e_l)) if len(e_l) == 0: return str(0) txt = e_l[0].get_attribute('title') if txt is None: return 'error' else: return txt def process_container(listings): for c, div in enumerate(listings): log_fh.write('\n<tr>\n') #start row and look for tmp = div.find_elements_by_class_name('price-current') price = process_money(tmp) tmp = div.find_elements_by_class_name('shipping-value') ship_cost = process_money(tmp) tmp = div.find_elements_by_xpath('.//div[@class="item-price-row packaging-sale"]') lot_count = process_lots(tmp) tmp = div.find_elements_by_class_name('store-name') shop_name = process_shop(tmp) tmp = div.find_elements_by_class_name('item-title') title = process_title(tmp) for item in [price, ship_cost, lot_count, shop_name, title]: try: log_fh.write('<td>' + item + '</td>') except Exception as e: print price, ship_cost, lot_count, shop_name, title print item raise log_fh.write('\n</tr>') #end row print 'count of items', c def locate_page_listings_container(): element = \ WebDriverWait(driver, browser_timeout, ignored_exceptions=ignored_exceptions).until( EC.presence_of_element_located((By.XPATH, '//div[@class="product-container"]')) ) EC.presence_of_element_located((By.XPATH, '//div[contains(@class, " product-list")]')) if not 'div' in element.tag_name: SystemExit('no product-container') divs = element.find_elements_by_xpath('.//li[@class="list-item packaging_sale"]') if len(divs): return divs else: raise SystemExit('no listings found') import re def total_page_hits(): global page_hits tmp = driver.find_element_by_class_name('total-page') if tmp is not None: txt = tmp.text r_l = re.findall(r'([0-9]+)', txt) if len(r_l): page_hits = r_l[0] def page_next(): tmp = driver.find_element_by_xpath('//button[contains(@class, " next-next")]') tmp = driver.find_element_by_xpath('.//div[@class="next-overlay-backdrop")]') # e = WebDriverWait(driver, 200).until(EC.element_to_be_clickable( # (By.XPATH, "//button[contains(@class, ' next-next']"))).click() # if tmp: else: raise SystemExit('no next') def ali_search(url, txt): global page_hits driver.get(url) assert 'AliExpress' in driver.title WebDriverWait(driver, browser_timeout, ignored_exceptions=ignored_exceptions).until( EC.presence_of_element_located((By.XPATH, '//div[@class="search-key-box"]')) ) tmp = driver.find_element_by_id('search-key') if not 'input' in tmp.tag_name: raise SystemExit('failed to locate search element') for c in list(txt): time.sleep(1) tmp.send_keys(c) tmp.send_keys(Keys.RETURN) #issue the search load_all_srch_listings() listings = locate_page_listings_container() process_container(listings) total_page_hits() for x in page_hits: page_next() def close_log(): log_fh.write('\n</table></body>') log_fh.close(); time.sleep(3600) def create_log(fname='/tmp/ali.log'): log_hdr = ''' <!doctype html> <title>parts</title> <body> <table> ''' global log_fh log_fh = open('/tmp/log.html', 'w') log_fh.write(log_hdr) def getpart_and_init(): global driver gecko = '/mnt/sdb1/root/geckodriver'; ffox = '/mnt/sdb1/firefox/firefox-bin' with webdriver.Firefox(executable_path=gecko, firefox_binary=ffox, capabilities=caps) as driver: create_log() if len(sys.argv) == 2: ali_search(url, sys.argv[1]) else: print 'Usage: ' + sys.argv[0] + part close_log() #------------------------------- if __name__ == '__main__': getpart_and_init()
Order your Rega Turntables & Amplifiers from - India's reputed online dealer.