Multi threadsΒΆ

Sometimes multi-threads is required to speed up your scrape.

Example code

from scrapex import Scraper
from scrapex import common

s = Scraper()

def scrape_github_by_keyword(keyword):
        doc = s.load(url = 'https://github.com/search', params={'q': keyword})
        print(doc.extract("//h3[contains(text(),'results')]").strip())
        #....#

def scrape():
        keywords = ['scraping tool','python','nodejs','image processing','many more']
        print('start 3 threads')
        common.start_threads(keywords, scrape_github_by_keyword, cc=3)

if __name__ == '__main__':
        scrape()