Página 1 de 1

Casa-cinema headers - Italian channel

Publicado: 05 Ago 2015, 07:16
por zanzibar1982
Hola todos,

I am trying to edit this channel with Italian contents http://casa-cinema.net

I understand it needs headers, found them with a ffox plugin, but I get a blank page when not an error.

So I am asking: what am I doing wrong? The rest should be easy as even if they have embed videos

the video urls are all speedvideo and nowvideo.

Thanks in advance.

Código: Seleccionar todo

# -*- coding: utf-8 -*-
#------------------------------------------------------------
# pelisalacarta - XBMC Plugin
# Canal para piratestreaming
# http://blog.tvalacarta.info/plugin-xbmc/pelisalacarta/
#------------------------------------------------------------
import urlparse,urllib2,urllib,re
import os, sys

from core import logger
from core import config
from core import scrapertools
from core.item import Item
from servers import servertools

__channel__ = "casacinema"
__category__ = "F,S,A"
__type__ = "generic"
__title__ = "casacinema"
__language__ = "IT"

headers = [
    ['Host','http://casa-cinema.net/'],
    ['User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0'],
    ['Accept-Language','it-IT,it;q=0.8,en-US;q=0.5,en;q=0.3'],
    ['Accept-Encoding','gzip, deflate'],
    ['Cookie',' __cfduid=d840bd3e785f8920764c5e4a826a58a531438745692']
]

sito="http://casa-cinema.net/"

DEBUG = config.get_setting("debug")

def isGeneric():
    return True

def mainlist(item):
    logger.info("pelisalacarta.casacinema mainlist")
    itemlist = []
    itemlist.append( Item(channel=__channel__, title="[COLOR azure]Ultimi film inseriti[/COLOR]", action="peliculas", url=sito, thumbnail="http://dc584.4shared.com/img/XImgcB94/s7/13feaf0b538/saquinho_de_pipoca_01"))
    #itemlist.append( Item(channel=__channel__, title="[COLOR azure]Categorie film[/COLOR]", action="categorias", url=sito, thumbnail="http://xbmc-repo-ackbarr.googlecode.com/svn/trunk/dev/skin.cirrus%20extended%20v2/extras/moviegenres/All%20Movies%20by%20Genre.png"))
    #itemlist.append( Item(channel=__channel__, title="[COLOR azure]Serie TV[/COLOR]" , action="peliculas", url=sito, thumbnail="http://xbmc-repo-ackbarr.googlecode.com/svn/trunk/dev/skin.cirrus%20extended%20v2/extras/moviegenres/New%20TV%20Shows.png"))
    #itemlist.append( Item(channel=__channel__, title="[COLOR azure]Anime Cartoon Italiani[/COLOR]", action="peliculas", url=sito, thumbnail="http://orig09.deviantart.net/df5a/f/2014/169/2/a/fist_of_the_north_star_folder_icon_by_minacsky_saya-d7mq8c8.png"))
    itemlist.append( Item(channel=__channel__, title="[COLOR yellow]Cerca...[/COLOR]", action="search", thumbnail="http://dc467.4shared.com/img/fEbJqOum/s7/13feaf0c8c0/Search"))
    return itemlist

def peliculas(item):
    logger.info("pelisalacarta.casacinema peliculas")
    itemlist = []

    # Descarga la pagina
    
    data = scrapertools.cache_page(item.url, headers=headers)

    # Extrae las entradas (carpetas)
    patron  = '<div class="box-single-movies">\s*'
    patron += '<a href="([^>"]+)".*?title="([^>"]+)" >.*?<img class.*?<img.*?src="([^>"]+)"'
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for scrapedurl,scrapedtitle,scrapedthumbnail in matches:
        #response = urllib2.urlopen(scrapedurl)
        #html = response.read()
        #start = html.find("<div class=\"aciklama\">")
        #end = html.find(">Fonte Trama</a>", start)
        #scrapedplot = html[start:end]
        #scrapedplot = re.sub(r'<.*?>', '', scrapedplot)
        #scrapedtitle=scrapertools.decodeHtmlentities(scrapedtitle.replace("Streaming",""))
        #if scrapedplot.startswith("<div class"):
           #scrapedplot = scrapedplot[30:]
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")
        itemlist.append( Item(channel=__channel__, action="findvideos", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl , viewmode="movie_with_plot", thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) )

    # Extrae el paginador
    patronvideos  = 'rel="next" href="([^"]+)">»</a>'
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    if len(matches)>0:
        scrapedurl = urlparse.urljoin(item.url,matches[0])
        itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR orange]Successivo >>[/COLOR]" , url=scrapedurl , thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", folder=True) )

    return itemlist

def categorias(item):
    logger.info("pelisalacarta.casacinema categorias")
    itemlist = []
    
    data = scrapertools.cache_page(item.url)
    logger.info(data)

    # Narrow search by selecting only the combo
    bloque = scrapertools.get_match(data,'<ul class="kategori_list">(.*?)</ul>')
    
    # The categories are the options for the combo  
    patron = '<li><a href="([^"]+)">([^<]+)</a></li>'
    matches = re.compile(patron,re.DOTALL).findall(bloque)
    scrapertools.printMatches(matches)

    for url,titulo in matches:
        scrapedtitle = titulo
        scrapedurl = urlparse.urljoin(item.url,url)
        scrapedthumbnail = ""
        scrapedplot = ""
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")
        itemlist.append( Item(channel=__channel__, action="peliculas" , title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot))

    return itemlist

def search(item,texto):
    logger.info("[casacinema.py] "+item.url+" search "+texto)
    item.url = "http://casa-cinema.net/?s="+texto
    try:
        return peliculas(item)
    # Se captura la excepción, para no interrumpir al buscador global si un canal falla
    except:
        import sys
        for line in sys.exc_info():
            logger.error( "%s" % line )
        return []

def test():
    from servers import servertools
    
    # mainlist
    mainlist_items = mainlist(Item())
    # Da por bueno el canal si alguno de los videos de "Novedades" devuelve mirrors
    novedades_items = peliculas(mainlist_items[0])
    bien = False
    for novedades_item in novedades_items:
        mirrors = servertools.find_video_items( item=novedades_item )
        if len(mirrors)>0:
            bien = True
            break

    return bien

Re: Casa-cinema headers - Italian channel

Publicado: 05 Ago 2015, 08:35
por zanzibar1982
Tried this way, but no luck.

Código: Seleccionar todo

# -*- coding: utf-8 -*-
#------------------------------------------------------------
# pelisalacarta - XBMC Plugin
# Canal para piratestreaming
# http://blog.tvalacarta.info/plugin-xbmc/pelisalacarta/
#------------------------------------------------------------
import urlparse,urllib2,urllib,re
import os, sys

from core import logger
from core import config
from core import scrapertools
from core.item import Item
from servers import servertools

__channel__ = "casacinema"
__category__ = "F,S,A"
__type__ = "generic"
__title__ = "casacinema"
__language__ = "IT"

headers = [
    ['Host','http://casa-cinema.net/'],
    ['User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0'],
    ['Accept-Language','it-IT,it;q=0.8,en-US;q=0.5,en;q=0.3'],
    ['Accept-Encoding','gzip, deflate'],
    ['Cookie',' __cfduid=d840bd3e785f8920764c5e4a826a58a531438745692']
]

sito="http://casa-cinema.net/"

DEBUG = config.get_setting("debug")

def isGeneric():
    return True

def mainlist(item):
    logger.info("pelisalacarta.casacinema mainlist")
    itemlist = []
    itemlist.append( Item(channel=__channel__, title="[COLOR azure]Ultimi film inseriti[/COLOR]", action="peliculas", url=sito, thumbnail="http://dc584.4shared.com/img/XImgcB94/s7/13feaf0b538/saquinho_de_pipoca_01"))
    #itemlist.append( Item(channel=__channel__, title="[COLOR azure]Categorie film[/COLOR]", action="categorias", url=sito, thumbnail="http://xbmc-repo-ackbarr.googlecode.com/svn/trunk/dev/skin.cirrus%20extended%20v2/extras/moviegenres/All%20Movies%20by%20Genre.png"))
    #itemlist.append( Item(channel=__channel__, title="[COLOR azure]Serie TV[/COLOR]" , action="peliculas", url=sito, thumbnail="http://xbmc-repo-ackbarr.googlecode.com/svn/trunk/dev/skin.cirrus%20extended%20v2/extras/moviegenres/New%20TV%20Shows.png"))
    #itemlist.append( Item(channel=__channel__, title="[COLOR azure]Anime Cartoon Italiani[/COLOR]", action="peliculas", url=sito, thumbnail="http://orig09.deviantart.net/df5a/f/2014/169/2/a/fist_of_the_north_star_folder_icon_by_minacsky_saya-d7mq8c8.png"))
    itemlist.append( Item(channel=__channel__, title="[COLOR yellow]Cerca...[/COLOR]", action="search", thumbnail="http://dc467.4shared.com/img/fEbJqOum/s7/13feaf0c8c0/Search"))
    return itemlist

def peliculas(item):
    logger.info("pelisalacarta.casacinema peliculas")
    itemlist = []

    # Descarga la pagina
    
    data = scrapertools.cache_page(item.url, headers=headers)

    # Extrae las entradas (carpetas)
    patron  = '<div class="box-single-movies">\s*'
    patron += '<a href="([^>"]+)".*?title="([^>"]+)" >.*?'
    patron += '<img.*?src="([^>"]+)"'
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for scrapedurl,scrapedtitle,scrapedthumbnail in matches:
        response = urllib2.urlopen(scrapedurl)
        html = response.read()
        start = html.find("class=\"alignleft img-responsive wp-post-image\"")
        end = html.find("rel=\"nofollow\">Fonte Trama</a></p>", start)
        scrapedplot = html[start:end]
        scrapedplot = re.sub(r'<.*?>', '', scrapedplot)
        scrapedtitle=scrapertools.decodeHtmlentities(scrapedtitle.replace("Streaming",""))
        #if scrapedplot.startswith("<div class"):
           #scrapedplot = scrapedplot[30:]
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")
        itemlist.append( Item(channel=__channel__, action="findvideos", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl , viewmode="movie_with_plot", thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) )

    # Extrae el paginador
    patronvideos  = 'rel="next" href="([^"]+)">»</a>'
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    if len(matches)>0:
        scrapedurl = urlparse.urljoin(item.url,matches[0])
        itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR orange]Successivo >>[/COLOR]" , url=scrapedurl , thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", folder=True) )

    return itemlist

def categorias(item):
    logger.info("pelisalacarta.casacinema categorias")
    itemlist = []
    
    data = scrapertools.cache_page(item.url)
    logger.info(data)

    # Narrow search by selecting only the combo
    bloque = scrapertools.get_match(data,'<ul class="kategori_list">(.*?)</ul>')
    
    # The categories are the options for the combo  
    patron = '<li><a href="([^"]+)">([^<]+)</a></li>'
    matches = re.compile(patron,re.DOTALL).findall(bloque)
    scrapertools.printMatches(matches)

    for url,titulo in matches:
        scrapedtitle = titulo
        scrapedurl = urlparse.urljoin(item.url,url)
        scrapedthumbnail = ""
        scrapedplot = ""
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")
        itemlist.append( Item(channel=__channel__, action="peliculas" , title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot))

    return itemlist

def search(item,texto):
    logger.info("[casacinema.py] "+item.url+" search "+texto)
    item.url = "http://casa-cinema.net/?s="+texto
    try:
        return peliculas(item)
    # Se captura la excepción, para no interrumpir al buscador global si un canal falla
    except:
        import sys
        for line in sys.exc_info():
            logger.error( "%s" % line )
        return []

def test():
    from servers import servertools
    
    # mainlist
    mainlist_items = mainlist(Item())
    # Da por bueno el canal si alguno de los videos de "Novedades" devuelve mirrors
    novedades_items = peliculas(mainlist_items[0])
    bien = False
    for novedades_item in novedades_items:
        mirrors = servertools.find_video_items( item=novedades_item )
        if len(mirrors)>0:
            bien = True
            break

    return bien

Re: Casa-cinema headers - Italian channel

Publicado: 05 Ago 2015, 11:21
por robalo
Mi recomendación para crear un canal es empezar de la forma más simple y observar el log de kodi y si hace falta, comprobar que se escriben las cookies en 'cookies.dat'.

Los errores de script es por scrapedplot y el 'data = ""' por la cabecera.

Te paso algo funcional con lo que podrás trabajar y personalizar. He quitado el código que no se usa para una mejor lectura

Código: Seleccionar todo

# -*- coding: utf-8 -*-
#------------------------------------------------------------
# pelisalacarta - XBMC Plugin
# Canal para casacinema
# http://blog.tvalacarta.info/plugin-xbmc/pelisalacarta/
#------------------------------------------------------------
import urlparse,urllib2,urllib,re
import os, sys

from core import logger
from core import config
from core import scrapertools
from core.item import Item
from servers import servertools

__channel__ = "casacinema"
__category__ = "F,S,A"
__type__ = "generic"
__title__ = "casacinema"
__language__ = "IT"

sito="http://casa-cinema.net/"

def isGeneric():
    return True

def mainlist( item ):
    logger.info( "pelisalacarta.casacinema mainlist" )

    itemlist = []

    itemlist.append( Item( channel=__channel__, title="[COLOR azure]Ultimi film inseriti[/COLOR]", action="peliculas", url=sito, thumbnail="http://dc584.4shared.com/img/XImgcB94/s7/13feaf0b538/saquinho_de_pipoca_01" ) )
    itemlist.append( Item( channel=__channel__, title="[COLOR yellow]Cerca...[/COLOR]", action="search", thumbnail="http://dc467.4shared.com/img/fEbJqOum/s7/13feaf0c8c0/Search" ) )
    return itemlist

def search( item, texto ):
    logger.info( "[casacinema.py] " + item.url + " search " + texto )

    item.url = sito + "?s=" + texto

    try:
        return peliculas( item )

    # Se captura la excepción, para no interrumpir al buscador global si un canal falla
    except:
        import sys
        for line in sys.exc_info():
            logger.error( "%s" % line )
        return []

def peliculas( item ):
    logger.info( "pelisalacarta.casacinema peliculas" )

    itemlist = []

    ## Descarga la pagina
    data = scrapertools.cache_page( item.url )

    ## Extrae las entradas (carpetas)
    patron  = '<div class="box-single-movies">\s*'
    patron += '<a href="([^>"]+)".*?title="([^>"]+)" >.*?<img class.*?<img.*?src="([^>"]+)"'

    matches = re.compile( patron, re.DOTALL ).findall( data )

    for scrapedurl,scrapedtitle,scrapedthumbnail in matches:

        title = scrapertools.decodeHtmlentities( scrapedtitle )

        itemlist.append( Item( channel=__channel__, action="findvideos", title="[COLOR azure]" + title + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=title, show=title ) )

    ## Paginación
    next_page  = scrapertools.find_single_match( data, 'rel="next" href="([^"]+)"' )

    if next_page != "":
        itemlist.append( Item( channel=__channel__, action="peliculas", title="[COLOR orange]Successivo >>[/COLOR]", url=next_page, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png" ) )

    return itemlist

Re: Casa-cinema headers - Italian channel

Publicado: 05 Ago 2015, 15:20
por zanzibar1982
Hola robalo

Thank you for the useful info, as usual.

I'll pay more attention to these details

in the log from now on. :roll:

Re: Casa-cinema headers - Italian channel

Publicado: 06 Ago 2015, 16:53
por zanzibar1982
Hola,

Could you please check what's wrong with my "categorias" item?

I get an empty page

Código: Seleccionar todo

# -*- coding: utf-8 -*-
#------------------------------------------------------------
# pelisalacarta - XBMC Plugin
# Canal para casacinema
# http://blog.tvalacarta.info/plugin-xbmc/pelisalacarta/
#------------------------------------------------------------
import urlparse,urllib2,urllib,re
import os, sys

from core import logger
from core import config
from core import scrapertools
from core.item import Item
from servers import servertools

__channel__ = "casacinema"
__category__ = "F,S,A"
__type__ = "generic"
__title__ = "casacinema"
__language__ = "IT"

sito="http://casa-cinema.net/"

def isGeneric():
    return True

def mainlist( item ):
    logger.info( "pelisalacarta.casacinema mainlist" )

    itemlist = []

    itemlist.append( Item( channel=__channel__, title="[COLOR azure]Film - Novita'[/COLOR]", action="peliculas", url=sito, thumbnail="http://dc584.4shared.com/img/XImgcB94/s7/13feaf0b538/saquinho_de_pipoca_01" ) )
    itemlist.append( Item( channel=__channel__, title="[COLOR azure]Categorie[/COLOR]", action="categorias", url=sito ) )
    itemlist.append( Item( channel=__channel__, title="[COLOR yellow]Cerca...[/COLOR]", action="search", thumbnail="http://dc467.4shared.com/img/fEbJqOum/s7/13feaf0c8c0/Search" ) )
    return itemlist

def search( item, texto ):
    logger.info( "[casacinema.py] " + item.url + " search " + texto )

    item.url = sito + "?s=" + texto

    try:
        return peliculas( item )

    # Se captura la excepción, para no interrumpir al buscador global si un canal falla
    except:
        import sys
        for line in sys.exc_info():
            logger.error( "%s" % line )
        return []

def peliculas( item ):
    logger.info( "pelisalacarta.casacinema peliculas" )

    itemlist = []

    ## Descarga la pagina
    data = scrapertools.cache_page( item.url )

    ## Extrae las entradas (carpetas)
    patron  = '<div class="box-single-movies">\s*'
    patron += '<a href="([^>"]+)".*?title="([^>"]+)" >.*?<img class.*?<img.*?src="([^>"]+)"'

    matches = re.compile( patron, re.DOTALL ).findall( data )

    for scrapedurl,scrapedtitle,scrapedthumbnail in matches:

        title = scrapertools.decodeHtmlentities( scrapedtitle )

        itemlist.append( Item( channel=__channel__, action="findvideos", title="[COLOR azure]" + title + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=title, show=title ) )

    ## Paginación
    next_page  = scrapertools.find_single_match( data, 'rel="next" href="([^"]+)"' )

    if next_page != "":
        itemlist.append( Item( channel=__channel__, action="peliculas", title="[COLOR orange]Successivo >>[/COLOR]", url=next_page, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png" ) )

    return itemlist

def categorias(item):
    logger.info("pelisalacarta.casacinema categorias")
    
    itemlist = []
    
    data = scrapertools.cache_page( item.url )
    
    # The categories are the options for the combo  
    patron = '<div class="table-responsive"><table class="table">\s*'
    patron += '<a href="(.*?)">.*?>(.*?)</a>'
    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedurl,scrapedtitle in matches:
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"]")
        itemlist.append( Item(channel=__channel__, action="peliculas" , title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl))

    return itemlist

Re: Casa-cinema headers - Italian channel

Publicado: 06 Ago 2015, 17:55
por robalo
El problema principal que tienes en 'categorie' es el patrón.

Empiezas con '<div class="table-responsive"><table class="table">\s*' que te limita las coincidencias a una si la siguiente línea se ajustara bien para alcanzar el item 'AL CINEMA'.

Para crear el patrón sin errores y sin que se cuelen items extraños tienes que crearlo de los datos

Código: Seleccionar todo

<td width="50%">
                <a href="/news_film/">
                    <img class="alignnone size-full wp-image-5014" src="/wp-content/uploads/2013/05/frecciablu.png" width="14" height="14" />AL CINEMA</a>
            </td>
Si lo haces bien obtendrás los 26 items.

Observa la url. Tendrás que añadirle 'sito'. Lo forma normal y sin complicaciones sería 'urlparse.urljoin( sito, scrapedurl )'

Re: Casa-cinema headers - Italian channel

Publicado: 06 Ago 2015, 18:30
por zanzibar1982
I tried these 2 ways with no luck, robalo.

Código: Seleccionar todo

def categorias(item):
    logger.info("pelisalacarta.casacinema categorias")
    
    itemlist = []
    
    data = scrapertools.cache_page( item.url )
    
    # The categories are the options for the combo  
    patron = '<td width="50%">.*?<a href="/news_film/">.*?<img class="alignnone size-full wp-image-5014" src="/wp-content/uploads/2013/05/frecciablu.png" width="14" height="14" />AL CINEMA</a>.*?</td>\s*'
    patron += '<a href="(.*?)">.*?>(.*?)</a>'
    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedurl,scrapedtitle in matches:
        scrapedurl = urlparse.urljoin( sito, scrapedurl )
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"]")
        itemlist.append( Item(channel=__channel__, action="peliculas" , title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl))

    return itemlist
and

Código: Seleccionar todo

def categorias(item):
    logger.info("pelisalacarta.casacinema categorias")
    
    itemlist = []
    
    data = scrapertools.cache_page( item.url )

    # Narrow search by selecting only the combo
    bloque = scrapertools.get_match(data,'<td width="50%">.*?<a href="/news_film/">.*?<img class="alignnone size-full wp-image-5014" src="/wp-content/uploads/2013/05/frecciablu.png" width="14" height="14" />AL CINEMA</a>.*?</td>(.*?)<!-- BEGIN TAG - DO NOT MODIFY -->')

    
    # The categories are the options for the combo  
    #patron = '<td width="50%">.*?<a href="/news_film/">.*?<img class="alignnone size-full wp-image-5014" src="/wp-content/uploads/2013/05/frecciablu.png" width="14" height="14" />AL CINEMA</a>.*?</td>\s*'
    patron = '<a href="(.*?)">.*?>(.*?)</a>'
    matches = re.compile(patron,re.DOTALL).findall(bloque)
    scrapertools.printMatches(matches)
    
    for scrapedurl,scrapedtitle in matches:
        scrapedurl = urlparse.urljoin( sito, scrapedurl )
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"]")
        itemlist.append( Item(channel=__channel__, action="peliculas" , title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl))

    return itemlist

Re: Casa-cinema headers - Italian channel

Publicado: 06 Ago 2015, 18:43
por robalo
Te estás complicando mucho

Código: Seleccionar todo

def categorias(item):
    logger.info("pelisalacarta.casacinema categorias")

    itemlist = []

    data = scrapertools.cache_page( item.url )

    # The categories are the options for the combo 
    patron = '<td[^<]+<a href="([^"]+)">[^>]+>([^<]+)</a>[^/]+/td>'

    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle in matches:

        itemlist.append( Item( channel=__channel__, action="peliculas" , title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=urlparse.urljoin( sito, scrapedurl ) ) )

    return itemlist

Re: Casa-cinema headers - Italian channel

Publicado: 06 Ago 2015, 22:52
por zanzibar1982
I see I got at least close to it. Yet again the simplest solution

is the answer. Thanks for support robalo, this works.

I'm putting this on github soon :)