italianstream.py --> item "peliculas"
Código: Seleccionar todo
def peliculas(item):
logger.info("pelisalacarta.italianstream peliculas")
itemlist = []
# Descarga la pagina
data = scrapertools.cache_page(item.url)
# Extrae las entradas (carpetas)
patron = '<div class="arch-thumb">[^<]+<a href="(.*?)" title="(.*?)"><img[^src]+src="(.*?)"[^<]+</a>[^<]+</div>'
matches = re.compile(patron,re.DOTALL).findall(data)
scrapertools.printMatches(matches)
for scrapedurl,scrapedtitle,scrapedthumbnail in matches:
response = urllib2.urlopen(scrapedurl)
html = response.read()
start = html.find("Trama:")
end = html.find("</div>", start)
scrapedplot = html[start:end]
if scrapedplot.startswith(""):
scrapedplot = scrapedplot[23:]
if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")
itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True, fanart=scrapedthumbnail) )
# Extrae el paginador
patronvideos = '<div class="wp-pagenavi">.*?<a href="([^"]+)" >›</a></div>'
matches = re.compile(patronvideos,re.DOTALL).findall(data)
scrapertools.printMatches(matches)
if len(matches)>0:
scrapedurl = urlparse.urljoin(item.url,matches[0])
itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR orange]Successivo >>[/COLOR]" , url=scrapedurl , folder=True, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png") )
return itemlist

Need a repack?