italianstream.py --> item "peliculas"
Código: Seleccionar todo
def peliculas(item):
    logger.info("pelisalacarta.italianstream peliculas")
    itemlist = []
    # Descarga la pagina
    data = scrapertools.cache_page(item.url)
    # Extrae las entradas (carpetas)
    patron = '<div class="arch-thumb">[^<]+<a href="(.*?)" title="(.*?)"><img[^src]+src="(.*?)"[^<]+</a>[^<]+</div>'
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    for scrapedurl,scrapedtitle,scrapedthumbnail in matches:
        response = urllib2.urlopen(scrapedurl)
        html = response.read()
        start = html.find("Trama:")
        end = html.find("</div>", start)
        scrapedplot = html[start:end]
        if scrapedplot.startswith(""):
           scrapedplot = scrapedplot[23:]
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")
        itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True, fanart=scrapedthumbnail) )
    # Extrae el paginador
    patronvideos  = '<div class="wp-pagenavi">.*?<a href="([^"]+)" >›</a></div>'
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    if len(matches)>0:
        scrapedurl = urlparse.urljoin(item.url,matches[0])
        itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR orange]Successivo >>[/COLOR]" , url=scrapedurl , folder=True, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png") )
    return itemlistNeed a repack?

