tvbox/libs/py_kt30.py


								#coding=utf-8

								#!/usr/bin/python

								import sys

								sys.path.append('..')

								from base.spider import Spider

								import re

								from urllib import request, parse

								import urllib

								import urllib.request

								import json

								class Spider(Spider):  # 元类 默认的元类 type

									def getName(self):

										return "卡通站(kt30)"

									def init(self,extend=""):

										pass

									def isVideoFormat(self,url):

										pass

									def manualVideoCheck(self):

										pass

									def homeContent(self,filter):

										result = {}

										cateManual = {

											"日本动漫": "r",

											"国产动漫": "g",

											"港台动漫": "gm",

											"动画电影": "v",

											"欧美动漫": "o"

										}

										classes = []

										for k in cateManual:

											classes.append({

												'type_name': k,

												'type_id': cateManual[k]

											})


										result['class'] = classes

										if (filter):

											result['filters'] = self.config['filter']

										return result

									def homeVideoContent(self):

										htmlTxt = self.webReadFile(urlStr="http://kt30.com/",header=self.header)

										videos = self.get_list(html=htmlTxt,patternTxt=r'a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')

										result = {

											'list': videos

										}

										return result


									def categoryContent(self,tid,pg,filter,extend):

										result = {}

										year='0'#年份

										types='0'#类型

										area='all'#地区

										url = 'http://kt30.com/{0}/index_{1}.html'.format(tid,pg)

										htmlTxt=self.webReadFile(urlStr=url,header=self.header)

										videos=[]

										videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')

										numvL = len(videos)

										result['list'] = videos

										result['page'] = pg

										result['pagecount'] = pg if numvL<17 else 9999

										result['limit'] = numvL

										result['total'] = numvL

										return result


									def detailContent(self,array):

										aid = array[0].split('###')

										idUrl=aid[1]

										title=aid[0]

										pic=aid[2]

										playFrom = []

										vodItems = []

										videoList=[]

										htmlTxt = self.webReadFile(urlStr=idUrl,header=self.header)

										if len(htmlTxt)<5:

											return {'list': []}

										line=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'</span><h3 class="title">(.+?)</h3></div>',Index=1)

										playFrom=[self.removeHtml(txt=vod) for vod in line]


										if len(line)<1:

											return {'list': []}

										circuit=self.get_lineList(Txt=htmlTxt,mark='<ul class="stui-content__playlist',after='</ul>')

										# print(circuit[0])

										# return

										for vod in circuit:

											vodItems = self.get_EpisodesList(html=vod,RegexText=r'<a href="(?P<url>.+?)">(?P<title>.+?)</a>')

											joinStr = "#".join(vodItems)

											videoList.append(joinStr)


										temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/----%|\w+?---------.html" target="_blank">(.+?)</a>',Index=1)

										typeName="/".join(temporary)

										year=self.get_RegexGetText(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-------------\d{4}.html" target="_blank">(\d{4})</a>',Index=1)

										temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-.+?------------.html" target="_blank">(.+?)</a>',Index=1)

										act="/".join(temporary)

										temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-----%+?|\w+?--------.html" target="_blank">(.+?)</a>',Index=1)

										dir="/".join(temporary)

										area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'地区：</b>(.*?)<b>',Index=1)


										#area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'>语言：\s{0,4}(.*?)</p>',Index=1)

										cont=self.get_RegexGetText(Text=htmlTxt,RegexText=r'简介：(.+?)<a href="#desc">详情',Index=1)


										vod = {

											"vod_id": array[0],

											"vod_name": title,

											"vod_pic": pic,

											"type_name": self.removeHtml(txt=typeName),

											"vod_year": year,

											"vod_area": self.removeHtml(txt=area),

											"vod_remarks": "",

											"vod_actor":  self.removeHtml(txt=act),

											"vod_director": self.removeHtml(txt=dir),

											"vod_content": self.removeHtml(txt=cont)

										}

										vod['vod_play_from'] = '$$$'.join(playFrom)

										vod['vod_play_url'] =  "$$$".join(videoList)


										result = {

											'list': [

												vod

											]

										}

										return result


									def verifyCode(self):

										pass


									def searchContent(self,key,quick):

										Url='http://kt30.com/vodsearch/-------------.html?wd={0}'.format(urllib.parse.quote(key))

										htmlTxt = self.webReadFile(urlStr=Url,header=self.header)

										videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="v-thumb stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?</span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')

										result = {

												'list': videos

											}

										return result


									def playerContent(self,flag,id,vipFlags):

										result = {}

										parse=1

										jx=0

										url=id

										htmlTxt=self.webReadFile(urlStr=url,header=self.header)

										temporary=self.get_lineList(Txt=htmlTxt,mark=r'var player_aaaa=',after='</script>')


										if len(temporary)>0:

											jRoot=json.loads(temporary[0][16:])

											url=jRoot['url']

											if len(url)<5:

												url=id

											else:

												parse=0

										result["parse"] = parse#1=嗅探,0=播放

										result["playUrl"] = ''

										result["url"] = url

										result['jx'] = jx#1=VIP解析,0=不解析

										result["header"] = ''

										return result

									config = {

										"player": {},

										"filter": {}

									}

									header = {

										"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",

										'Host': 'kt30.com',

										"Referer": "http://kt30.com/"

										}


									def localProxy(self,param):

										return [200, "video/MP2T", action, ""]

								#-----------------------------------------------自定义函数-----------------------------------------------

									#访问网页

									def webReadFile(self,urlStr,header):

										html=''

										req=urllib.request.Request(url=urlStr,headers=header)#,headers=header

										with  urllib.request.urlopen(req)  as response:

											html = response.read().decode('utf-8')

										return html

									#正则取文本

									def get_RegexGetText(self,Text,RegexText,Index):

										returnTxt=""

										Regex=re.search(RegexText, Text, re.M|re.S)

										if Regex is None:

											returnTxt=""

										else:

											returnTxt=Regex.group(Index)

										return returnTxt

									#取集数

									def get_EpisodesList(self,html,RegexText):

										ListRe=re.finditer(RegexText, html, re.M|re.S)

										videos = []

										for vod in ListRe:

											url = vod.group('url')

											title =vod.group('title')

											if len(url) == 0:

												continue

											if url.find('http:') <0:

												url='http://kt30.com'+url

											videos.append(title+"$"+url)

										return videos

									#取剧集区

									def get_lineList(self,Txt,mark,after):

										circuit=[]

										origin=Txt.find(mark)


										while origin>8:

											end=Txt.find(after,origin)

											circuit.append(Txt[origin:end])

											origin=Txt.find(mark,end)

										return circuit

									#正则取文本,返回数组

									def get_RegexGetTextLine(self,Text,RegexText,Index):

										returnTxt=[]

										ListRe=istRe=re.finditer(RegexText, Text, re.M|re.S)

										for value in ListRe:

											t=value.group(Index)

											if t==None:

												continue

											returnTxt.append(t)

										return returnTxt

									#分类取结果

									def get_list(self,html,patternTxt):

										ListRe=re.finditer(patternTxt, html, re.M|re.S)

										videos = []

										head="http://kt30.com"

										for vod in ListRe:

											url = vod.group('url')

											title =self.removeHtml(txt=vod.group('title'))

											img =vod.group('img')

											renew=vod.group('renew')

											if len(url) == 0:

												continue

											if len(img)<5:

												img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/CoverError.png'

											if self.get_RegexGetText(Text=img,RegexText='(https{0,1}:)',Index=1)=='':

												img=head+img

											# print(title)

											videos.append({

												"vod_id":"{0}###{1}###{2}".format(title,head+url,img),

												"vod_name":title,

												"vod_pic":img,

												"vod_remarks":renew

											})

										return videos

									#删除html标签

									def removeHtml(self,txt):

										soup = re.compile(r'<[^>]+>',re.S)

										txt =soup.sub('', txt)

										return txt.replace("&nbsp;"," ")

									#番剧

									def get_list_fanju(self,html):

										ListRe=re.finditer('class="jtxqj"><a href="(?P<url>.+?)" title="(?P<title>.+?)" target="_self">(?P<renew>.+?)</a>', html, re.M|re.S)

										videos = []

										head="http://ktkkt8.com"

										img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/%E5%B0%81%E9%9D%A2.jpeg'

										for vod in ListRe:

											url = vod.group('url')

											title =self.removeHtml(txt=vod.group('title'))

											renew=vod.group('renew')

											if len(url) == 0:

												continue

											videos.append({

												"vod_id":"{0}###{1}###{2}".format(title,head+url,img),

												"vod_name":title,

												"vod_pic":img,

												"vod_remarks":renew

											})

										return videos


								# T=Spider()

								# l=T.homeVideoContent()

								# l=T.searchContent(key='柯南',quick='')

								# l=T.categoryContent(tid='r',pg='1',filter=False,extend={})

								# for x in l['list']:

								# 	print(x['vod_id'])

								# mubiao= l['list'][1]['vod_id']

								# playTabulation=T.detailContent(array=[mubiao,])

								# # print(playTabulation)

								# vod_play_from=playTabulation['list'][0]['vod_play_from']

								# vod_play_url=playTabulation['list'][0]['vod_play_url']

								# url=vod_play_url.split('$$$')

								# vod_play_from=vod_play_from.split('$$$')[0]

								# url=url[0].split('$')

								# url=url[1].split('#')[0]

								# print(url)

								# m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)

								# print(m3u8)