tvbox/libs/py_kt30.py

#coding=utf-8
#!/usr/bin/python
import sys
sys.path.append('..')
from base.spider import Spider
import re
from urllib import request, parse
import urllib
import urllib.request
import json
class Spider(Spider):  # 元类 默认的元类 type
	def getName(self):
		return "卡通站(kt30)"
	def init(self,extend=""):
		pass
	def isVideoFormat(self,url):
		pass
	def manualVideoCheck(self):
		pass
	def homeContent(self,filter):
		result = {}
		cateManual = {
			"日本动漫": "r",
			"国产动漫": "g",
			"港台动漫": "gm",
			"动画电影": "v",
			"欧美动漫": "o"
		}
		classes = []
		for k in cateManual:
			classes.append({
				'type_name': k,
				'type_id': cateManual[k]
			})

		result['class'] = classes
		if (filter):
			result['filters'] = self.config['filter']
		return result
	def homeVideoContent(self):
		htmlTxt = self.webReadFile(urlStr="http://kt30.com/",header=self.header)
		videos = self.get_list(html=htmlTxt,patternTxt=r'a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
		result = {
			'list': videos
		}
		return result

	def categoryContent(self,tid,pg,filter,extend):
		result = {}
		year='0'#年份
		types='0'#类型
		area='all'#地区
		url = 'http://kt30.com/{0}/index_{1}.html'.format(tid,pg)
		htmlTxt=self.webReadFile(urlStr=url,header=self.header)
		videos=[]
		videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
		numvL = len(videos)
		result['list'] = videos
		result['page'] = pg
		result['pagecount'] = pg if numvL<17 else 9999
		result['limit'] = numvL
		result['total'] = numvL
		return result

	def detailContent(self,array):
		aid = array[0].split('###')
		idUrl=aid[1]
		title=aid[0]
		pic=aid[2]
		playFrom = []
		vodItems = []
		videoList=[]
		htmlTxt = self.webReadFile(urlStr=idUrl,header=self.header)
		if len(htmlTxt)<5:
			return {'list': []}
		line=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'</span><h3 class="title">(.+?)</h3></div>',Index=1)
		playFrom=[self.removeHtml(txt=vod) for vod in line]
		
		if len(line)<1:
			return {'list': []}
		circuit=self.get_lineList(Txt=htmlTxt,mark='<ul class="stui-content__playlist',after='</ul>')
		# print(circuit[0])
		# return
		for vod in circuit:
			vodItems = self.get_EpisodesList(html=vod,RegexText=r'<a href="(?P<url>.+?)">(?P<title>.+?)</a>')
			joinStr = "#".join(vodItems)
			videoList.append(joinStr)
		
		temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/----%|\w+?---------.html" target="_blank">(.+?)</a>',Index=1)
		typeName="/".join(temporary)
		year=self.get_RegexGetText(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-------------\d{4}.html" target="_blank">(\d{4})</a>',Index=1)
		temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-.+?------------.html" target="_blank">(.+?)</a>',Index=1)
		act="/".join(temporary)
		temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-----%+?|\w+?--------.html" target="_blank">(.+?)</a>',Index=1)
		dir="/".join(temporary)
		area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'地区：</b>(.*?)<b>',Index=1)
		
		#area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'>语言：\s{0,4}(.*?)</p>',Index=1)
		cont=self.get_RegexGetText(Text=htmlTxt,RegexText=r'简介：(.+?)<a href="#desc">详情',Index=1)
		

		vod = {
			"vod_id": array[0],
			"vod_name": title,
			"vod_pic": pic,
			"type_name": self.removeHtml(txt=typeName),
			"vod_year": year,
			"vod_area": self.removeHtml(txt=area),
			"vod_remarks": "",
			"vod_actor":  self.removeHtml(txt=act),
			"vod_director": self.removeHtml(txt=dir),
			"vod_content": self.removeHtml(txt=cont)
		}
		vod['vod_play_from'] = '$$$'.join(playFrom)
		vod['vod_play_url'] =  "$$$".join(videoList)

		result = {
			'list': [
				vod
			]
		}
		return result

	def verifyCode(self):
		pass

	def searchContent(self,key,quick):
		Url='http://kt30.com/vodsearch/-------------.html?wd={0}'.format(urllib.parse.quote(key))
		htmlTxt = self.webReadFile(urlStr=Url,header=self.header)
		videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="v-thumb stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?</span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
		result = {
				'list': videos
			}
		return result

	def playerContent(self,flag,id,vipFlags):
		result = {}
		parse=1
		jx=0
		url=id
		htmlTxt=self.webReadFile(urlStr=url,header=self.header)
		temporary=self.get_lineList(Txt=htmlTxt,mark=r'var player_aaaa=',after='</script>')
		
		if len(temporary)>0:
			jRoot=json.loads(temporary[0][16:])
			url=jRoot['url']
			if len(url)<5:
				url=id		
			else:	
				parse=0
		result["parse"] = parse#1=嗅探,0=播放
		result["playUrl"] = ''
		result["url"] = url
		result['jx'] = jx#1=VIP解析,0=不解析
		result["header"] = ''	
		return result
	config = {
		"player": {},
		"filter": {}
	}
	header = {
		"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
		'Host': 'kt30.com',
		"Referer": "http://kt30.com/"
		}

	def localProxy(self,param):
		return [200, "video/MP2T", action, ""]
#-----------------------------------------------自定义函数-----------------------------------------------
	#访问网页
	def webReadFile(self,urlStr,header):
		html=''
		req=urllib.request.Request(url=urlStr,headers=header)#,headers=header
		with  urllib.request.urlopen(req)  as response:
			html = response.read().decode('utf-8')
		return html
	#正则取文本
	def get_RegexGetText(self,Text,RegexText,Index):
		returnTxt=""
		Regex=re.search(RegexText, Text, re.M|re.S)
		if Regex is None:
			returnTxt=""
		else:
			returnTxt=Regex.group(Index)
		return returnTxt
	#取集数
	def get_EpisodesList(self,html,RegexText):
		ListRe=re.finditer(RegexText, html, re.M|re.S)
		videos = []
		for vod in ListRe:
			url = vod.group('url')
			title =vod.group('title')
			if len(url) == 0:
				continue
			if url.find('http:') <0:
				url='http://kt30.com'+url
			videos.append(title+"$"+url)
		return videos
	#取剧集区
	def get_lineList(self,Txt,mark,after):
		circuit=[]
		origin=Txt.find(mark)
		
		while origin>8:
			end=Txt.find(after,origin)
			circuit.append(Txt[origin:end])
			origin=Txt.find(mark,end)
		return circuit	
	#正则取文本,返回数组	
	def get_RegexGetTextLine(self,Text,RegexText,Index):
		returnTxt=[]
		ListRe=istRe=re.finditer(RegexText, Text, re.M|re.S)
		for value in ListRe:
			t=value.group(Index)
			if t==None:
				continue
			returnTxt.append(t)	
		return returnTxt
	#分类取结果
	def get_list(self,html,patternTxt):
		ListRe=re.finditer(patternTxt, html, re.M|re.S)
		videos = []
		head="http://kt30.com"
		for vod in ListRe:
			url = vod.group('url')
			title =self.removeHtml(txt=vod.group('title'))
			img =vod.group('img')
			renew=vod.group('renew')
			if len(url) == 0:
				continue
			if len(img)<5:
				img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/CoverError.png'
			if self.get_RegexGetText(Text=img,RegexText='(https{0,1}:)',Index=1)=='':
				img=head+img
			# print(title)
			videos.append({
				"vod_id":"{0}###{1}###{2}".format(title,head+url,img),
				"vod_name":title,
				"vod_pic":img,
				"vod_remarks":renew
			})
		return videos
	#删除html标签
	def removeHtml(self,txt):
		soup = re.compile(r'<[^>]+>',re.S)
		txt =soup.sub('', txt)
		return txt.replace("&nbsp;"," ")
	#番剧
	def get_list_fanju(self,html):
		ListRe=re.finditer('class="jtxqj"><a href="(?P<url>.+?)" title="(?P<title>.+?)" target="_self">(?P<renew>.+?)</a>', html, re.M|re.S)
		videos = []
		head="http://ktkkt8.com"
		img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/%E5%B0%81%E9%9D%A2.jpeg'
		for vod in ListRe:
			url = vod.group('url')
			title =self.removeHtml(txt=vod.group('title'))
			renew=vod.group('renew')
			if len(url) == 0:
				continue
			videos.append({
				"vod_id":"{0}###{1}###{2}".format(title,head+url,img),
				"vod_name":title,
				"vod_pic":img,
				"vod_remarks":renew
			})
		return videos

# T=Spider()
# l=T.homeVideoContent()
# l=T.searchContent(key='柯南',quick='')
# l=T.categoryContent(tid='r',pg='1',filter=False,extend={})
# for x in l['list']:
# 	print(x['vod_id'])
# mubiao= l['list'][1]['vod_id']
# playTabulation=T.detailContent(array=[mubiao,])
# # print(playTabulation)
# vod_play_from=playTabulation['list'][0]['vod_play_from']
# vod_play_url=playTabulation['list'][0]['vod_play_url']
# url=vod_play_url.split('$$$')
# vod_play_from=vod_play_from.split('$$$')[0]
# url=url[0].split('$')
# url=url[1].split('#')[0]
# print(url)
# m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
# print(m3u8)