Página 1 de 1

problema con cloudflare y headers

Publicado: 23 Dic 2016, 16:43
por josuecocr
este es el error que me está tirando la web http://cineasiaenlinea.com/ y no puedo leer el contenido porque tiene cloudflare, ocupo me orienten que debo hacer

Código: Seleccionar todo

09:35:58 T:6916  NOTICE: pelisalacarta.default init...
09:35:58 T:6916  NOTICE: pelisalacarta.core.jsontools Probando json incluido en el interprete
09:35:59 T:6916  NOTICE: pelisalacarta.platformcode.launcher run
09:35:59 T:6916  NOTICE: pelisalacarta.platformcode.launcher action= 'ultimos', category= 'cineasiaenlinea.', channel= 'cineasiaenlinea', contentType= 'list', fanart= '', infoLabels= {'mediatype': 'list'}, title= 'Ultimos Proyectos', totalItems= 0, url= 'http://cineasiaenlinea.com/archivos/estrenos/'
09:35:59 T:6916  NOTICE: pelisalacarta.platformcode.launcher channel_file=C:\Users\josue\AppData\Roaming\Kodi\addons\plugin.video.pelisalacarta\channels\cineasiaenlinea.py
09:35:59 T:6916  NOTICE: pelisalacarta.platformcode.launcher running channel channels.cineasiaenlinea C:\Users\josue\AppData\Roaming\Kodi\addons\plugin.video.pelisalacarta\channels\cineasiaenlinea.py
09:35:59 T:6916  NOTICE: pelisalacarta.platformcode.launcher executing channel 'ultimos' method
09:35:59 T:6916  NOTICE: pelisalacarta.channels.cineasiaenlinea ESTRENOS
09:35:59 T:6916  NOTICE: pelisalacarta.core.scrapertools anti_cloudflare url=http://cineasiaenlinea.com/archivos/estrenos/, host=http://cineasiaenlinea.com/, headers=[['User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'], ['Accept-Encoding:gzip, deflate, sdch'], ['Referer: http://cineasiaenlinea.com']], post=None, location=False
09:35:59 T:6916  NOTICE: pelisalacarta.core.scrapertools get_headers_from_response url=http://cineasiaenlinea.com/archivos/estrenos/
09:35:59 T:6916  NOTICE: pelisalacarta.core.scrapertools post=None
09:35:59 T:6916  NOTICE: pelisalacarta.core.scrapertools ficherocookies=C:\Users\josue\AppData\Roaming\Kodi\userdata\addon_data\plugin.video.pelisalacarta\cookies\cineasiaenlinea.com.dat
09:35:59 T:6916  NOTICE: pelisalacarta.core.scrapertools petición GET
09:35:59 T:6916  NOTICE: ######## ERROR #########
09:35:59 T:6916  NOTICE:     [pelisalacarta.platformcode.launcher.run] pelisalacarta.platformcode.launcher Traceback (most recent call last):
                                              File "C:\Users\josue\AppData\Roaming\Kodi\addons\plugin.video.pelisalacarta\platformcode\launcher.py", line 268, in run
                                                itemlist = getattr(channel, item.action)(item)
                                              File "C:\Users\josue\AppData\Roaming\Kodi\addons\plugin.video.pelisalacarta\channels\cineasiaenlinea.py", line 55, in ultimos
                                                data = scrapertools.anti_cloudflare(item.url, headers=headers, host=host)
                                              File "C:\Users\josue\AppData\Roaming\Kodi\addons\plugin.video.pelisalacarta\core\scrapertools.py", line 1598, in anti_cloudflare
                                                resp_headers = get_headers_from_response(url, headers=headers)
                                              File "C:\Users\josue\AppData\Roaming\Kodi\addons\plugin.video.pelisalacarta\core\scrapertools.py", line 1260, in get_headers_from_response
                                                txheaders[header[0]]=header[1]
                                            IndexError: list index out of range
09:36:01 T:5688   ERROR: XFILE::CDirectory::GetDirectory - Error getting plugin://plugin.video.pelisalacarta/?ewogICAgImFjdGlvbiI6ICJ1bHRpbW9zIiwgCiAgICAiY2F0ZWdvcnkiOiAiY2luZWFzaWFlbmxpbmVhLiIsIAogICAgImNoYW5uZWwiOiAiY2luZWFzaWFlbmxpbmVhIiwgCiAgICAiZmFuYXJ0IjogIiIsIAogICAgImluZm9MYWJlbHMiOiB7fSwgCiAgICAidGl0bGUiOiAiVWx0aW1vcyBQcm95ZWN0b3MiLCAKICAgICJ0b3RhbEl0ZW1zIjogMCwgCiAgICAidXJsIjogImh0dHA6Ly9jaW5lYXNpYWVubGluZWEuY29tL2FyY2hpdm9zL2VzdHJlbm9zLyIKfQ%3d%3d
09:36:01 T:5688   ERROR: CGUIMediaWindow::GetDirectory(plugin://plugin.video.pelisalacarta/?ewogICAgImFjdGlvbiI6ICJ1bHRpbW9zIiwgCiAgICAiY2F0ZWdvcnkiOiAiY2luZWFzaWFlbmxpbmVhLiIsIAogICAgImNoYW5uZWwiOiAiY2luZWFzaWFlbmxpbmVhIiwgCiAgICAiZmFuYXJ0IjogIiIsIAogICAgImluZm9MYWJlbHMiOiB7fSwgCiAgICAidGl0bGUiOiAiVWx0aW1vcyBQcm95ZWN0b3MiLCAKICAgICJ0b3RhbEl0ZW1zIjogMCwgCiAgICAidXJsIjogImh0dHA6Ly9jaW5lYXNpYWVubGluZWEuY29tL2FyY2hpdm9zL2VzdHJlbm9zLyIKfQ%3d%3d) failed



este es parte del .py que tengo para la web

Código: Seleccionar todo

# -*- coding: utf-8 -*-
#------------------------------------------------------------
# pelisalacarta - XBMC Plugin
# Canal para cineasiaenlinea
# http://blog.tvalacarta.info/plugin-xbmc/pelisalacarta/
#------------------------------------------------------------

import urlparse,urllib2,urllib,re
import os, sys

from core import logger
from core import config
from core import scrapertools
from core import jsontools
from core.item import Item
from core import servertools

DEBUG = config.get_setting("debug")

__category__ = "S"
__type__ = "generic"
__title__ = "cineasiaenlinea"
__channel__ = "cineasiaenlinea"
__language__ = "ES"
__creationdate__ = "20160216"

host = "http://cineasiaenlinea.com/"

DEFAULT_HEADERS = [
    ["User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"],
    ["Accept-Encoding:gzip, deflate, sdch"],
    ["Referer: http://cineasiaenlinea.com"]
]


def isGeneric():
    return True

def mainlist(item):
    logger.info("pelisalacarta.channels.cineasiaenlinea mainlist")
	
    itemlist = list([])
    itemlist.append(Item(channel=__channel__, action="ultimos", title="Ultimos Proyectos", url="http://cineasiaenlinea.com/archivos/estrenos/"))
    itemlist.append(Item(channel=__channel__, action="pais", title="Por País", url=host))
    itemlist.append(Item(channel=__channel__, action="ano", title="Por Año", url=host))
    itemlist.append(Item(channel=__channel__, action="generos", title=" Por Genero", url=host))
    itemlist.append(Item(channel=__channel__, action="search", title="Buscar", url=urlparse.urljoin(host, "search?cx=&ie=ISO-8859-1&q=")))

    return itemlist

def ultimos(item):
    logger.info("pelisalacarta.channels.cineasiaenlinea ESTRENOS")
    itemlist = []
    headers = DEFAULT_HEADERS[:]
    data = scrapertools.anti_cloudflare(item.url, headers=headers, host=host)
    #data = data.replace('\n','')
    #data = data.replace('\r','')
    #data = scrapertools.get_match(data,'id="content-post"(.+?)id="sidebar"')	
    patron = 'href="(.*?)".*title="(.*?)">\n.*img.*src="(.*?)".*\n.*rel="tag">(.*?)<'
    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedplot, in matches:
       #if len(matches) > 0:	
        title = scrapertools.entityunescape(scrapedtitle)
        url = urlparse.urljoin(host, scrapedurl)
        thumbnail = urlparse.urljoin(host, scrapedthumbnail)
        plot = urlparse.urljoin(host, scrapedplot)
        #if (DEBUG): logger.info(url=["+url+"], "title=["+title+"], thumbnail=["+thumbnail+"], plot=["+plot+"]").format(url, title, thumbnail, plot)
        itemlist.append(Item(channel=__channel__, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot))

    return itemlist	


gracias por su ayuda :D

Re: problema con cloudflare y headers

Publicado: 28 Dic 2016, 13:11
por SeiTaN
A mi no me ha hecho falta anticloudflare para obtener los datos de la página lo he cambiado y me descarga la página.

Código: Seleccionar todo

data = scrapertools.cache_page(item.url)
P.D: revisa la expresión regular que no devuelve bien los datos.