Página 1 de 2

Altadefinizione.click (IT)

Publicado: 27 Ago 2015, 06:48
por fenice82
I'm trying to get a connector for Altadefinizione.click... this site use Cloudflare.
I was able to create the tree of the folders but I don't have any movie inside.

could you please explain me where I'm wrong?

Código: Seleccionar todo

# -*- coding: utf-8 -*-
# ------------------------------------------------------------
# pelisalacarta - XBMC Plugin

# Canal para altadefinizioneclick
# http://blog.tvalacarta.info/plugin-xbmc/pelisalacarta/
# ------------------------------------------------------------
import urlparse, urllib2, urllib, re
import os, sys
import urllib2
import urlparse
import re
import sys
import binascii
import time

from core import logger
from core import config
from core import scrapertools
from core.item import Item
from servers import servertools

__channel__ = "altadefinizioneclick"
__category__ = "F,S,A"
__type__ = "generic"
__title__ = "AltaDefinizioneclick"
__language__ = "IT"

sito = "http://www.altadefinizione.click/"

headers = [
    ['User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0'],
    ['Accept-Encoding', 'gzip, deflate'],
    ['Referer', 'http://altadefinizione.click/'],
    ['Connection', 'keep-alive']
]

DEBUG = config.get_setting("debug")


def isGeneric():
    return True


def mainlist(item):
    logger.info("pelisalacarta.altadefinizioneclick mainlist")

    itemlist = [Item(channel=__channel__,
                     title="[COLOR azure]Al Cinema[/COLOR]",
                     action="peliculas",

                     url=sito+"al-cinema/",
                     thumbnail="http://dc584.4shared.com/img/XImgcB94/s7/13feaf0b538/saquinho_de_pipoca_01"),
				Item(channel=__channel__,
                     title="[COLOR azure]Nuove Uscite[/COLOR]",
                     action="peliculas",
                     url=sito+"nuove-uscite/",
                     thumbnail="http://i.imgur.com/Jsw7Abj.png"),
                Item(channel=__channel__,
                     title="[COLOR azure]Film Sub-Ita[/COLOR]",
                     action="peliculas",
                     url=sito+"sub-ita/",
                     thumbnail="http://i.imgur.com/qUENzxl.png"),
                Item(channel=__channel__,
                     title="[COLOR azure]Categorie film[/COLOR]",
                     action="categorias1",
                     url=sito,
                     thumbnail="http://xbmc-repo-ackbarr.googlecode.com/svn/trunk/dev/skin.cirrus%20extended%20v2/extras/moviegenres/All%20Movies%20by%20Genre.png"),
				Item(channel=__channel__,
                     title="[COLOR azure]Anno[/COLOR]",
                     action="categorias2",
                     url=sito,
                     thumbnail="http://xbmc-repo-ackbarr.googlecode.com/svn/trunk/dev/skin.cirrus%20extended%20v2/extras/moviegenres/All%20Movies%20by%20Genre.png"),	 
				Item(channel=__channel__,
                     title="[COLOR azure]Qualità[/COLOR]",
                     action="categorias3",
                     url=sito,
                     thumbnail="http://xbmc-repo-ackbarr.googlecode.com/svn/trunk/dev/skin.cirrus%20extended%20v2/extras/moviegenres/All%20Movies%20by%20Genre.png"),	 
				Item(channel=__channel__,
                     title="[COLOR yellow]Cerca...[/COLOR]",
                     action="search",
                     thumbnail="http://dc467.4shared.com/img/fEbJqOum/s7/13feaf0c8c0/Search")]

    return itemlist


def peliculas(item):
    logger.info("pelisalacarta.altadefinizioneclick peliculas")
    itemlist = []

    # Descarga la pagina
    data = anti_cloudflare(item.url)
	
	    ## ------------------------------------------------
    cookies = ""
    matches = re.compile( '(.altadefinizione.click.*?)\n', re.DOTALL ).findall( config.get_cookie_data() )
    for cookie in matches:
        name = cookie.split( '\t' )[5]
        value = cookie.split( '\t' )[6]
        cookies+= name + "=" + value + ";"
    headers.append( ['Cookie',cookies[:-1]] )
    import urllib
    _headers = urllib.urlencode( dict( headers ) )
    ## ------------------------------------------------

    # Extrae las entradas (carpetas)
    patron = '<a\s+href="([^"]+)">\s+<img\s+width="[^"]*"\s+height="[^"]*"\s+src="([^"]+)"\s+class="[^"]*"\s+alt="([^"]+)"'
    matches = re.compile(patron, re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for scrapedurl, scrapedthumbnail, scrapedtitle in matches:
        html = scrapertools.cache_page(scrapedurl)
        start = html.find("<div class=\"aciklama\">")
        end = html.find("<div class=\'bMavi\'>Titolo originale:", start)
        scrapedplot = html[start:end]
        scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle.replace("Streaming", ""))
        scrapedplot = re.sub(r'<[^>]*>', '', scrapedplot)
        scrapedplot = scrapertools.decodeHtmlentities(scrapedplot)
        if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")
		
		 ## ------------------------------------------------
        scrapedthumbnail+= "|" + _headers
        ## ------------------------------------------------
		
        itemlist.append(
            Item(channel=__channel__,
                 action="findvid",
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl,
                 viewmode="movie_with_plot",
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot,
                 folder=True))

    # Extrae el paginador
    patronvideos = 'class="nextpostslink" rel="next" href="([^"]+)">&raquo;'
    matches = re.compile(patronvideos, re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    if len(matches) > 0:
        scrapedurl = urlparse.urljoin(item.url, matches[0])
        itemlist.append(
            Item(channel=__channel__,
                 action="peliculas",
                 title="[COLOR orange]Successivo >>[/COLOR]",
                 url=scrapedurl,
                 thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png",
                 folder=True))

    return itemlist


def categorias1(item):
    logger.info("pelisalacarta.altadefinizioneclick categorias")
    itemlist = []

    data = anti_cloudflare(item.url)
    logger.info(data)

    # Narrow search by selecting only the combo
    bloque = scrapertools.get_match(data, '<ul class="listSubCat" id="Film">(.*?)</ul>')

    # The categories are the options for the combo  
    patron = '<li><a href="([^"]+)">([^<]+)</a></li>'
    matches = re.compile(patron, re.DOTALL).findall(bloque)
    scrapertools.printMatches(matches)

    for url, titulo in matches:
        scrapedtitle = titulo
        scrapedurl = urlparse.urljoin(item.url, url)
        scrapedthumbnail = ""
        scrapedplot = ""
        if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")
        itemlist.append(
            Item(channel=__channel__,
                 action="peliculas",
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot))

    return itemlist

def categorias2(item):
    logger.info("pelisalacarta.altadefinizioneclick categorias")
    itemlist = []

    data = anti_cloudflare(item.url)
    logger.info(data)

    # Narrow search by selecting only the combo
    bloque = scrapertools.get_match(data, '<ul class="listSubCat" id="Anno">(.*?)</ul>')

    # The categories are the options for the combo  
    patron = '<li><a href="([^"]+)">([^<]+)</a></li>'
    matches = re.compile(patron, re.DOTALL).findall(bloque)
    scrapertools.printMatches(matches)

    for url, titulo in matches:
        scrapedtitle = titulo
        scrapedurl = urlparse.urljoin(item.url, url)
        scrapedthumbnail = ""
        scrapedplot = ""
        if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")
        itemlist.append(
            Item(channel=__channel__,
                 action="peliculas",
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot))

    return itemlist

def categorias3(item):
    logger.info("pelisalacarta.altadefinizioneclick categorias")
    itemlist = []

    data = anti_cloudflare(item.url)
    logger.info(data)

    # Narrow search by selecting only the combo
    bloque = scrapertools.get_match(data, '<ul class="listSubCat" id="Qualita">(.*?)</ul>')

    # The categories are the options for the combo  
    patron = '<li><a href="([^"]+)">([^<]+)</a></li>'
    matches = re.compile(patron, re.DOTALL).findall(bloque)
    scrapertools.printMatches(matches)

    for url, titulo in matches:
        scrapedtitle = titulo
        scrapedurl = urlparse.urljoin(item.url, url)
        scrapedthumbnail = ""
        scrapedplot = ""
        if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")
        itemlist.append(
            Item(channel=__channel__,
                 action="peliculas",
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot))

    return itemlist
	
def search(item, texto):
    logger.info("[altadefinizioneclick.py] " + item.url + " search " + texto)
    item.url = "%s?s=%s" % (sito, texto)
    try:
        return peliculas(item)
    # Se captura la excepción, para no interrumpir al buscador global si un canal falla
    except:
        import sys
        for line in sys.exc_info():
            logger.error("%s" % line)
        return []


def findvid(item):
    logger.info("[altadefinizioneclick.py] findvideos")

    ## Descarga la página
    data = scrapertools.cache_page(item.url)
    data = scrapertools.find_single_match(data, "(eval.function.p,a,c,k,e,.*?)\s*</script>")
    if data != "":
        from lib.jsbeautifier.unpackers import packer
        data = packer.unpack(data).replace(r'\\/', '/')
        itemlist = servertools.find_video_items(data=data)

        for videoitem in itemlist:
            videoitem.title = "".join([item.title, videoitem.title])
            videoitem.fulltitle = item.fulltitle
            videoitem.thumbnail = item.thumbnail
            videoitem.channel = __channel__
    else:
        itemlist = servertools.find_video_items(item=item)

    return itemlist
	
def anti_cloudflare(url):
    # global headers

    try:
        resp_headers = scrapertools.get_headers_from_response(url, headers=headers)
        resp_headers = {v[0]: v[1] for v in resp_headers}
    except urllib2.HTTPError, e:
        resp_headers = e.headers

    if 'refresh' in resp_headers:
        time.sleep(int(resp_headers['refresh'][:1]))

        # dict_headers = {v[0]: v[1] for v in headers}
        # dict_headers['cookie'] = resp_headers['set-cookie'].split(';')[0]

        # resp_headers = scrapertools.get_headers_from_response(sito + resp_headers['refresh'][7:], headers=[[k, v] for k, v in dict_headers.iteritems()])
        scrapertools.get_headers_from_response(sito + resp_headers['refresh'][7:], headers=headers)
        # resp_headers = {v[0]: v[1] for v in resp_headers}

        # dict_headers['cookie'] = dict_headers['cookie'] + resp_headers['set-cookie'].split(';')[0]
        # headers = [[k, v] for k, v in dict_headers.iteritems()]

    return scrapertools.cache_page(url, headers=headers)

Re: Altadefinizione.click (IT)

Publicado: 27 Ago 2015, 08:50
por robalo
Hola fenice82.

Para empezar a crear caneles debes leer un poco de sintasis de python, mirar sin miedo el código e intentar comprender que hace cada línea. Deberías empezar con canales más sencillos. Copiar y pegar está bien pero se tiene que hacer con criterio. Intenta primero hacer una o dos partes del canal funcionen e ir añadiendo poco a poco opciones al menú.

Te voy a pegar algo funcional con lo que intentes entender porqué este código funciona y el que has posteado no.

Código: Seleccionar todo

# -*- coding: utf-8 -*-
# ------------------------------------------------------------
# pelisalacarta - XBMC Plugin
# Canal para altadefinizioneclick
# http://blog.tvalacarta.info/plugin-xbmc/pelisalacarta/
# ------------------------------------------------------------
import urllib2, re
import time

from core import logger
from core import config
from core import scrapertools
from core.item import Item

__channel__ = "altadefinizioneclick"
__category__ = "F,S,A"
__type__ = "generic"
__title__ = "AltaDefinizioneclick"
__language__ = "IT"

host = "http://www.altadefinizione.click"

headers = [
    ['User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0'],
    ['Accept-Encoding', 'gzip, deflate'],
    ['Referer', 'http://altadefinizione.click/'],
    ['Connection', 'keep-alive']
]

def isGeneric():
    return True


def mainlist( item ):
    logger.info( "[altadefinizioneclick.py] mainlist" )

    itemlist = []

    itemlist.append( Item( channel=__channel__, title="Al Cinema", action="fichas", url=host + "/al-cinema/" ) )
    itemlist.append( Item( channel=__channel__, title="Buscar...", action="search", url=host ) )

    return itemlist

def search( item, texto ):
    logger.info( "[altadefinizioneclick.py] " + item.url + " search " + texto )

    item.url+= "/?s=" + texto

    try:
        return fichas( item )

    ## Se captura la excepción, para no interrumpir al buscador global si un canal falla
    except:
        import sys
        for line in sys.exc_info():
            logger.error("%s" % line)
        return []

def fichas( item ):
    logger.info( "[altadefinizioneclick.py] fichas" )

    itemlist = []

    # Descarga la pagina
    data = anti_cloudflare( item.url )

    ## ------------------------------------------------
    cookies = ""
    matches = re.compile( '(.altadefinizione.click.*?)\n', re.DOTALL ).findall( config.get_cookie_data() )
    for cookie in matches:
        name = cookie.split( '\t' )[5]
        value = cookie.split( '\t' )[6]
        cookies+= name + "=" + value + ";"
    headers.append( ['Cookie',cookies[:-1]] )
    import urllib
    _headers = urllib.urlencode( dict( headers ) )
    ## ------------------------------------------------

    if "/?s=" in item.url:
        patron = '<div class="col-lg-3 col-md-3 col-xs-3">.*?'
        patron+= 'href="([^"]+)".*?'
        patron+= '<div class="wrapperImage"[^<]+'
        patron+= '<[^>]+>([^<]+)<.*?'
        patron+= 'src="([^"]+)".*?'
        patron+= 'class="titleFilm">([^<]+)<.*?'
        patron+= 'IMDB: ([^<]+)<'
    else:
        patron = '<div class="wrapperImage"[^<]+'
        patron+= '<[^>]+>([^<]+)<.*?'
        patron+= 'href="([^"]+)".*?'
        patron+= 'src="([^"]+)".*?'
        patron+= 'href[^>]+>([^<]+)</a>.*?'
        patron+= 'IMDB: ([^<]+)<'

    matches = re.compile( patron, re.DOTALL ).findall( data )

    for scraped_1, scraped_2, scrapedthumbnail, scrapedtitle, scrapedpuntuacion in matches:

        scrapedurl = scraped_2
        scrapedcalidad = scraped_1
        if "/?s=" in item.url:
            scrapedurl = scraped_1
            scrapedcalidad = scraped_2

        title = scrapertools.decodeHtmlentities( scrapedtitle )
        title+= " (" + scrapedcalidad + ") (" + scrapedpuntuacion + ")"

        ## ------------------------------------------------
        scrapedthumbnail+= "|" + _headers
        ## ------------------------------------------------

        itemlist.append( Item( channel=__channel__, action="findvideos", title=title, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=title, show=scrapedtitle ) )

    ## Paginación
    next_page = scrapertools.find_single_match( data, '<a class="next page-numbers" href="([^"]+)">' )
    if next_page != "":
        itemlist.append( Item( channel=__channel__, action="fichas" , title=">> Página siguiente" , url=next_page ) )

    return itemlist

def findvideos( item ):
    logger.info( "[altadefinizioneclick.py] findvideos" )

    itemlist = []

    ## Descarga la página
    data = anti_cloudflare( item.url )

    locate = scrapertools.get_match( data, '<iframe width="100%" height="500px" src="([^"]+)" allowfullscreen></iframe>' )

    try:
        data = anti_cloudflare( locate )
        scrapedurl = scrapertools.get_match( data, '<source src="([^"]+)"' )
    except: scrapedurl = locate

    server = scrapedurl.split( '/' )[2].split( '.' )
    if len(server) == 3: server = server[1]
    else: server = server[0]

    title = "[" + server + "] " + item.fulltitle

    itemlist.append( Item( channel=__channel__, action="play" , title=title, url=scrapedurl, thumbnail=item.thumbnail, fulltitle=item.fulltitle, show=item.show, folder=False ) )

    return itemlist

def anti_cloudflare(url):
    # global headers

    try:
        resp_headers = scrapertools.get_headers_from_response(url, headers=headers)
        resp_headers = {v[0]: v[1] for v in resp_headers}
    except urllib2.HTTPError, e:
        resp_headers = e.headers

    if 'refresh' in resp_headers:
        time.sleep(int(resp_headers['refresh'][:1]))

        # dict_headers = {v[0]: v[1] for v in headers}
        # dict_headers['cookie'] = resp_headers['set-cookie'].split(';')[0]

        # resp_headers = scrapertools.get_headers_from_response(sito + resp_headers['refresh'][7:], headers=[[k, v] for k, v in dict_headers.iteritems()])
        scrapertools.get_headers_from_response(host + "/" + resp_headers['refresh'][7:], headers=headers)
        # resp_headers = {v[0]: v[1] for v in resp_headers}

        # dict_headers['cookie'] = dict_headers['cookie'] + resp_headers['set-cookie'].split(';')[0]
        # headers = [[k, v] for k, v in dict_headers.iteritems()]

    return scrapertools.cache_page(url, headers=headers)

Re: Altadefinizione.click (IT)

Publicado: 27 Ago 2015, 12:42
por fenice82
thank you robalo, when I started I was thinking that this channel would be easy because is the copy of Altadefinizione01.

I studied the "Guía de desarrollo de pelisalacarta" wrote by Jesus and studied the other connector.

I will work to improve my skill.

ciao

Fenice82

Re: Altadefinizione.click (IT)

Publicado: 28 Ago 2015, 04:31
por zanzibar1982
This is my sheet for the channel, in the other thread I explained what I did, fenice82.

Problem is with "sub-ita" page, where not all of the objects become extracted.

Some movies have a "different shape", but I could not invent an "if" cause for them :|

Código: Seleccionar todo

# -*- coding: utf-8 -*-
# ------------------------------------------------------------
# pelisalacarta - XBMC Plugin
# Canal para altadefinizioneclick
# http://blog.tvalacarta.info/plugin-xbmc/pelisalacarta/
# ------------------------------------------------------------
import urllib2, re
import time

from core import logger
from core import config
from core import scrapertools
from core.item import Item
from servers import servertools

__channel__ = "altadefinizioneclick"
__category__ = "F,S,A"
__type__ = "generic"
__title__ = "AltaDefinizioneclick"
__language__ = "IT"

host = "http://www.altadefinizione.click"

headers = [
    ['User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0'],
    ['Accept-Encoding', 'gzip, deflate'],
    ['Referer', 'http://altadefinizione.click/'],
    ['Connection', 'keep-alive']
]

def isGeneric():
    return True


def mainlist( item ):
    logger.info( "[altadefinizioneclick.py] mainlist" )

    itemlist = []

    itemlist.append( Item( channel=__channel__, title="[COLOR azure]Al Cinema[/COLOR]", action="fichas", url=host + "/al-cinema/" ) )
    itemlist.append( Item( channel=__channel__, title="[COLOR azure]Film per Genere[/COLOR]", action="genere", url=host ) )
    itemlist.append( Item( channel=__channel__, title="[COLOR azure]Film per Anno[/COLOR]", action="anno", url=host ) )
    itemlist.append( Item( channel=__channel__, title="[COLOR azure]Film Sub-Ita[/COLOR]", action="fichas", url=host + "/sub-ita/", extra="sub" ) )
    itemlist.append( Item( channel=__channel__, title="[COLOR orange]Cerca...[/COLOR]", action="search", url=host ) )

    return itemlist

def search( item, texto ):
    logger.info( "[altadefinizioneclick.py] " + item.url + " search " + texto )

    item.url+= "/?s=" + texto

    try:
        return fichas( item )

    ## Se captura la excepción, para no interrumpir al buscador global si un canal falla
    except:
        import sys
        for line in sys.exc_info():
            logger.error("%s" % line)
        return []

def genere(item):
    logger.info("[altadefinizioneclick.py] genere")
    itemlist = []

    data = anti_cloudflare( item.url )

    ## ------------------------------------------------
    cookies = ""
    matches = re.compile( '(.altadefinizione.click.*?)\n', re.DOTALL ).findall( config.get_cookie_data() )
    for cookie in matches:
        name = cookie.split( '\t' )[5]
        value = cookie.split( '\t' )[6]
        cookies+= name + "=" + value + ";"
    headers.append( ['Cookie',cookies[:-1]] )
    import urllib
    _headers = urllib.urlencode( dict( headers ) )
    ## ------------------------------------------------

    data = scrapertools.find_single_match(data,'<option value="http://altadefinizione.click">Seleziona Categoria Film</option>(.*?)</form>')

    patron  = '<option value="(.*?)">(.*?)</option>'
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for scrapedurl,scrapedtitle in matches:
        itemlist.append( Item(channel=__channel__, action="fichas", title=scrapedtitle, url=scrapedurl, folder=True))

    return itemlist

def anno(item):
    logger.info("[altadefinizioneclick.py] genere")
    itemlist = []

    data = anti_cloudflare( item.url )

    ## ------------------------------------------------
    cookies = ""
    matches = re.compile( '(.altadefinizione.click.*?)\n', re.DOTALL ).findall( config.get_cookie_data() )
    for cookie in matches:
        name = cookie.split( '\t' )[5]
        value = cookie.split( '\t' )[6]
        cookies+= name + "=" + value + ";"
    headers.append( ['Cookie',cookies[:-1]] )
    import urllib
    _headers = urllib.urlencode( dict( headers ) )
    ## ------------------------------------------------

    data = scrapertools.find_single_match(data,'<ul class="listSubCat" id="Anno">(.*?)</div>')

    patron  = '<li><a href="(.*?)">(.*?)</a></li>'
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for scrapedurl,scrapedtitle in matches:
        itemlist.append( Item(channel=__channel__, action="fichas", title=scrapedtitle, url=scrapedurl, folder=True))

    return itemlist


def fichas( item ):
    logger.info( "[altadefinizioneclick.py] fichas" )

    itemlist = []

    # Descarga la pagina
    data = anti_cloudflare( item.url )

    ## ------------------------------------------------
    cookies = ""
    matches = re.compile( '(.altadefinizione.click.*?)\n', re.DOTALL ).findall( config.get_cookie_data() )
    for cookie in matches:
        name = cookie.split( '\t' )[5]
        value = cookie.split( '\t' )[6]
        cookies+= name + "=" + value + ";"
    headers.append( ['Cookie',cookies[:-1]] )
    import urllib
    _headers = urllib.urlencode( dict( headers ) )
    ## ------------------------------------------------

    if "/?s=" in item.url:
        patron = '<div class="col-lg-3 col-md-3 col-xs-3">.*?'
        patron+= 'href="([^"]+)".*?'
        patron+= '<div class="wrapperImage"[^<]+'
        patron+= '<[^>]+>([^<]+)<.*?'
        patron+= 'src="([^"]+)".*?'
        patron+= 'class="titleFilm">([^<]+)<.*?'
        patron+= 'IMDB: ([^<]+)<'
    else:
        patron = '<div class="wrapperImage"[^<]+'
        patron+= '<[^>]+>([^<]+)<.*?'
        patron+= 'href="([^"]+)".*?'
        patron+= 'src="([^"]+)".*?'
        patron+= 'href[^>]+>([^<]+)</a>.*?'
        patron+= 'IMDB: ([^<]+)<'

    matches = re.compile( patron, re.DOTALL ).findall( data )

    for scraped_1, scraped_2, scrapedthumbnail, scrapedtitle, scrapedpuntuacion in matches:

        scrapedurl = scraped_2
        scrapedcalidad = scraped_1
        if "/?s=" in item.url:
            scrapedurl = scraped_1
            scrapedcalidad = scraped_2

        title = scrapertools.decodeHtmlentities( scrapedtitle )
        title+= " (" + scrapedcalidad + ") (" + scrapedpuntuacion + ")"

        ## ------------------------------------------------
        scrapedthumbnail+= "|" + _headers
        ## ------------------------------------------------

        itemlist.append( Item( channel=__channel__, action="findvideos", title=title, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=title, show=scrapedtitle ) )

    ## Paginación
    next_page = scrapertools.find_single_match( data, '<a class="next page-numbers" href="([^"]+)">' )
    if next_page != "":
        itemlist.append( Item( channel=__channel__, action="fichas" , title="[COLOR orange]Successivo >>[/COLOR]" , url=next_page ) )

    return itemlist

# def findvideos( item ):
#     logger.info( "[altadefinizioneclick.py] findvideos" )
#
#     itemlist = []
#
#     ## Descarga la página
#     data = anti_cloudflare( item.url )
#
#     locate = scrapertools.get_match( data, '<iframe width="100%" height="500px" src="([^"]+)" allowfullscreen></iframe>' )
#
#     try:
#         data = anti_cloudflare( locate )
#         scrapedurl = scrapertools.get_match( data, '<source src="([^"]+)"' )
#     except: scrapedurl = locate
#
#     server = scrapedurl.split( '/' )[2].split( '.' )
#     if len(server) == 3: server = server[1]
#     else: server = server[0]
#
#     title = "[" + server + "] " + item.fulltitle
#
#     itemlist.append( Item( channel=__channel__, action="play" , title=title, url=scrapedurl, thumbnail=item.thumbnail, fulltitle=item.fulltitle, show=item.show, folder=False ) )
#
#     return itemlist


def findvideos( item ):
    logger.info( "[altadefinizioneclick.py] findvideos" )

    ## Descarga la página
    data = anti_cloudflare( item.url )

    itemlist = servertools.find_video_items(data=data)

    for videoitem in itemlist:
        videoitem.title = "".join([item.title, videoitem.title])
        videoitem.fulltitle = item.fulltitle
        videoitem.thumbnail = item.thumbnail
        videoitem.show = item.show
        videoitem.channel = __channel__

    return itemlist


def anti_cloudflare(url):
    # global headers

    try:
        resp_headers = scrapertools.get_headers_from_response(url, headers=headers)
        resp_headers = {v[0]: v[1] for v in resp_headers}
    except urllib2.HTTPError, e:
        resp_headers = e.headers

    if 'refresh' in resp_headers:
        time.sleep(int(resp_headers['refresh'][:1]))

        # dict_headers = {v[0]: v[1] for v in headers}
        # dict_headers['cookie'] = resp_headers['set-cookie'].split(';')[0]

        # resp_headers = scrapertools.get_headers_from_response(sito + resp_headers['refresh'][7:], headers=[[k, v] for k, v in dict_headers.iteritems()])
        scrapertools.get_headers_from_response(host + "/" + resp_headers['refresh'][7:], headers=headers)
        # resp_headers = {v[0]: v[1] for v in resp_headers}

        # dict_headers['cookie'] = dict_headers['cookie'] + resp_headers['set-cookie'].split(';')[0]
        # headers = [[k, v] for k, v in dict_headers.iteritems()]

    return scrapertools.cache_page(url, headers=headers)

Re: Altadefinizione.click (IT)

Publicado: 29 Ago 2015, 12:35
por robalo
Es cierto zanzibar, cuando no contiene calidad ignora el item. Tambien ocurre que si no tiene puntuación se salta los item posteriores hasta encontrar uno con puntuación ignorando éste último también.

Probrar añadir este tramo de código y a ver que tal os va:

Código: Seleccionar todo

    ## fix - calidad
    data = re.sub(
        r'<div class="wrapperImage"[^<]+<a',
        '<div class="wrapperImage"><fix>SD</fix><a',
        data
    )
    ## fix - IMDB
    data = re.sub(
        r'</h\d> </div>',
        '<fix>IMDB: 0.0</fix>',
        data
    )

Re: Altadefinizione.click (IT)

Publicado: 30 Ago 2015, 04:02
por zanzibar1982
robalo escribió:Es cierto zanzibar, cuando no contiene calidad ignora el item. Tambien ocurre que si no tiene puntuación se salta los item posteriores hasta encontrar uno con puntuación ignorando éste último también.

Probrar añadir este tramo de código y a ver que tal os va:

Código: Seleccionar todo

    ## fix - calidad
    data = re.sub(
        r'<div class="wrapperImage"[^<]+<a',
        '<div class="wrapperImage"><fix>SD</fix><a',
        data
    )
    ## fix - IMDB
    data = re.sub(
        r'</h\d> </div>',
        '<fix>IMDB: 0.0</fix>',
        data
    )
Yep, this

Código: Seleccionar todo

def fichas( item ):
    logger.info( "[altadefinizioneclick.py] fichas" )

    itemlist = []

    # Descarga la pagina
    data = anti_cloudflare( item.url )
    ## fix - calidad
    data = re.sub(
        r'<div class="wrapperImage"[^<]+<a',
        '<div class="wrapperImage"><fix>SD</fix><a',
        data
    )

    ## ------------------------------------------------
    cookies = ""
    matches = re.compile( '(.altadefinizione.click.*?)\n', re.DOTALL ).findall( config.get_cookie_data() )
    for cookie in matches:
        name = cookie.split( '\t' )[5]
        value = cookie.split( '\t' )[6]
        cookies+= name + "=" + value + ";"
    headers.append( ['Cookie',cookies[:-1]] )
    import urllib
    _headers = urllib.urlencode( dict( headers ) )
    ## ------------------------------------------------

    if "/?s=" in item.url:
        patron = '<div class="col-lg-3 col-md-3 col-xs-3">.*?'
        patron+= 'href="([^"]+)".*?'
        patron+= '<div class="wrapperImage"[^<]+'
        patron+= '<[^>]+>([^<]+)<.*?'
        patron+= 'src="([^"]+)".*?'
        patron+= 'class="titleFilm">([^<]+)<.*?'
        patron+= 'IMDB: ([^<]+)<'
    else:
        patron = '<div class="wrapperImage"[^<]+'
        patron+= '<[^>]+>([^<]+)<.*?'
        patron+= 'href="([^"]+)".*?'
        patron+= 'src="([^"]+)".*?'
        patron+= 'href[^>]+>([^<]+)</a>.*?'
        patron+= 'IMDB: ([^<]+)<'

    matches = re.compile( patron, re.DOTALL ).findall( data )

    for scraped_1, scraped_2, scrapedthumbnail, scrapedtitle, scrapedpuntuacion in matches:

        scrapedurl = scraped_2
        scrapedcalidad = scraped_1
        if "/?s=" in item.url:
            scrapedurl = scraped_1
            scrapedcalidad = scraped_2

        title = scrapertools.decodeHtmlentities( scrapedtitle )
        title+= " (" + scrapedcalidad + ") (" + scrapedpuntuacion + ")"

        ## ------------------------------------------------
        scrapedthumbnail+= "|" + _headers
        ## ------------------------------------------------

        itemlist.append( Item( channel=__channel__, action="findvideos", title=title, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=title, show=scrapedtitle ) )

    ## Paginación
    next_page = scrapertools.find_single_match( data, '<a class="next page-numbers" href="([^"]+)">' )
    if next_page != "":
        itemlist.append( Item( channel=__channel__, action="fichas" , title="[COLOR orange]Successivo >>[/COLOR]" , url=next_page ) )

    return itemlist
gets the job done

Re: Altadefinizione.click (IT)

Publicado: 30 Ago 2015, 07:17
por robalo
zanzibar1982 escribió:Yep, this
[....]
gets the job done
Sólo hace una parte, te falta el sugundo fix.
Si haces una búsqueda con la palabra 'scan' sólo te saldrá 10 items cuando deberían de ser 12

Re: Altadefinizione.click (IT)

Publicado: 30 Ago 2015, 12:30
por zanzibar1982
Applying both of them I get two times the same results in the list of movies.

The entries list comes out doubled that's why I only put one.

Il'' check better, maybe I'm doing something wrong.

EDIT: let me explain better, the first 6 entries "repeating" issue
if I put

Código: Seleccionar todo

def fichas( item ):
    logger.info( "[altadefinizioneclick.py] fichas" )

    itemlist = []

    # Descarga la pagina
    data = anti_cloudflare( item.url )
    ## fix - calidad
    data = re.sub(
        r'<div class="wrapperImage"[^<]+<a',
        '<div class="wrapperImage"><fix>SD</fix><a',
        data
    )
    ## fix - IMDB
    data = re.sub(
        r'</h\d> </div>',
        '<fix>IMDB: 0.0</fix>',
        data
    )

    ## ------------------------------------------------
    cookies = ""
    matches = re.compile( '(.altadefinizione.click.*?)\n', re.DOTALL ).findall( config.get_cookie_data() )
    for cookie in matches:
        name = cookie.split( '\t' )[5]
        value = cookie.split( '\t' )[6]
        cookies+= name + "=" + value + ";"
    headers.append( ['Cookie',cookies[:-1]] )
    import urllib
    _headers = urllib.urlencode( dict( headers ) )
    ## ------------------------------------------------

    if "/?s=" in item.url:
        patron = '<div class="col-lg-3 col-md-3 col-xs-3">.*?'
        patron+= 'href="([^"]+)".*?'
        patron+= '<div class="wrapperImage"[^<]+'
        patron+= '<[^>]+>([^<]+)<.*?'
        patron+= 'src="([^"]+)".*?'
        patron+= 'class="titleFilm">([^<]+)<.*?'
        patron+= 'IMDB: ([^<]+)<'
    else:
        patron = '<div class="wrapperImage"[^<]+'
        patron+= '<[^>]+>([^<]+)<.*?'
        patron+= 'href="([^"]+)".*?'
        patron+= 'src="([^"]+)".*?'
        patron+= 'href[^>]+>([^<]+)</a>.*?'
        patron+= 'IMDB: ([^<]+)<'

    matches = re.compile( patron, re.DOTALL ).findall( data )

    for scraped_1, scraped_2, scrapedthumbnail, scrapedtitle, scrapedpuntuacion in matches:

        scrapedurl = scraped_2
        scrapedcalidad = scraped_1
        if "/?s=" in item.url:
            scrapedurl = scraped_1
            scrapedcalidad = scraped_2

        title = scrapertools.decodeHtmlentities( scrapedtitle )
        title+= " (" + scrapedcalidad + ") (" + scrapedpuntuacion + ")"

        ## ------------------------------------------------
        scrapedthumbnail+= "|" + _headers
        ## ------------------------------------------------

        itemlist.append( Item( channel=__channel__, action="findvideos", title=title, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=title, show=scrapedtitle ) )

    ## Paginación
    next_page = scrapertools.find_single_match( data, '<a class="next page-numbers" href="([^"]+)">' )
    if next_page != "":
        itemlist.append( Item( channel=__channel__, action="fichas" , title="[COLOR orange]Successivo >>[/COLOR]" , url=next_page ) )

    return itemlist
Imagen

Re: Altadefinizione.click (IT)

Publicado: 30 Ago 2015, 20:22
por robalo
Ok, como no ha habido feedback pensé que era una cuestión de gusto.

Se tiene que cambiar <h\d> por <h5>

Re: Altadefinizione.click (IT)

Publicado: 31 Ago 2015, 00:26
por zanzibar1982
TY robalo

so it should be

Código: Seleccionar todo

    ## fix - calidad
    data = re.sub(
        r'<div class="wrapperImage"[^<]+<a',
        '<div class="wrapperImage"><fix>SD</fix><a',
        data
    )
    ## fix - IMDB
    data = re.sub(
        r'<h5> </div>',
        '<fix>IMDB: 0.0</fix>',
        data
    )