You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
285 lines
8.8 KiB
285 lines
8.8 KiB
#coding=utf-8
|
|
#!/usr/bin/python
|
|
import sys
|
|
sys.path.append('..')
|
|
from base.spider import Spider
|
|
import re
|
|
from urllib import request, parse
|
|
import urllib
|
|
import urllib.request
|
|
import json
|
|
class Spider(Spider): # 元类 默认的元类 type
|
|
def getName(self):
|
|
return "卡通站(kt30)"
|
|
def init(self,extend=""):
|
|
pass
|
|
def isVideoFormat(self,url):
|
|
pass
|
|
def manualVideoCheck(self):
|
|
pass
|
|
def homeContent(self,filter):
|
|
result = {}
|
|
cateManual = {
|
|
"日本动漫": "r",
|
|
"国产动漫": "g",
|
|
"港台动漫": "gm",
|
|
"动画电影": "v",
|
|
"欧美动漫": "o"
|
|
}
|
|
classes = []
|
|
for k in cateManual:
|
|
classes.append({
|
|
'type_name': k,
|
|
'type_id': cateManual[k]
|
|
})
|
|
|
|
result['class'] = classes
|
|
if (filter):
|
|
result['filters'] = self.config['filter']
|
|
return result
|
|
def homeVideoContent(self):
|
|
htmlTxt = self.webReadFile(urlStr="http://kt30.com/",header=self.header)
|
|
videos = self.get_list(html=htmlTxt,patternTxt=r'a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
|
|
result = {
|
|
'list': videos
|
|
}
|
|
return result
|
|
|
|
def categoryContent(self,tid,pg,filter,extend):
|
|
result = {}
|
|
year='0'#年份
|
|
types='0'#类型
|
|
area='all'#地区
|
|
url = 'http://kt30.com/{0}/index_{1}.html'.format(tid,pg)
|
|
htmlTxt=self.webReadFile(urlStr=url,header=self.header)
|
|
videos=[]
|
|
videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
|
|
numvL = len(videos)
|
|
result['list'] = videos
|
|
result['page'] = pg
|
|
result['pagecount'] = pg if numvL<17 else 9999
|
|
result['limit'] = numvL
|
|
result['total'] = numvL
|
|
return result
|
|
|
|
def detailContent(self,array):
|
|
aid = array[0].split('###')
|
|
idUrl=aid[1]
|
|
title=aid[0]
|
|
pic=aid[2]
|
|
playFrom = []
|
|
vodItems = []
|
|
videoList=[]
|
|
htmlTxt = self.webReadFile(urlStr=idUrl,header=self.header)
|
|
if len(htmlTxt)<5:
|
|
return {'list': []}
|
|
line=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'</span><h3 class="title">(.+?)</h3></div>',Index=1)
|
|
playFrom=[self.removeHtml(txt=vod) for vod in line]
|
|
|
|
if len(line)<1:
|
|
return {'list': []}
|
|
circuit=self.get_lineList(Txt=htmlTxt,mark='<ul class="stui-content__playlist',after='</ul>')
|
|
# print(circuit[0])
|
|
# return
|
|
for vod in circuit:
|
|
vodItems = self.get_EpisodesList(html=vod,RegexText=r'<a href="(?P<url>.+?)">(?P<title>.+?)</a>')
|
|
joinStr = "#".join(vodItems)
|
|
videoList.append(joinStr)
|
|
|
|
temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/----%|\w+?---------.html" target="_blank">(.+?)</a>',Index=1)
|
|
typeName="/".join(temporary)
|
|
year=self.get_RegexGetText(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-------------\d{4}.html" target="_blank">(\d{4})</a>',Index=1)
|
|
temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-.+?------------.html" target="_blank">(.+?)</a>',Index=1)
|
|
act="/".join(temporary)
|
|
temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-----%+?|\w+?--------.html" target="_blank">(.+?)</a>',Index=1)
|
|
dir="/".join(temporary)
|
|
area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'地区:</b>(.*?)<b>',Index=1)
|
|
|
|
#area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'>语言:\s{0,4}(.*?)</p>',Index=1)
|
|
cont=self.get_RegexGetText(Text=htmlTxt,RegexText=r'简介:(.+?)<a href="#desc">详情',Index=1)
|
|
|
|
|
|
vod = {
|
|
"vod_id": array[0],
|
|
"vod_name": title,
|
|
"vod_pic": pic,
|
|
"type_name": self.removeHtml(txt=typeName),
|
|
"vod_year": year,
|
|
"vod_area": self.removeHtml(txt=area),
|
|
"vod_remarks": "",
|
|
"vod_actor": self.removeHtml(txt=act),
|
|
"vod_director": self.removeHtml(txt=dir),
|
|
"vod_content": self.removeHtml(txt=cont)
|
|
}
|
|
vod['vod_play_from'] = '$$$'.join(playFrom)
|
|
vod['vod_play_url'] = "$$$".join(videoList)
|
|
|
|
result = {
|
|
'list': [
|
|
vod
|
|
]
|
|
}
|
|
return result
|
|
|
|
def verifyCode(self):
|
|
pass
|
|
|
|
def searchContent(self,key,quick):
|
|
Url='http://kt30.com/vodsearch/-------------.html?wd={0}'.format(urllib.parse.quote(key))
|
|
htmlTxt = self.webReadFile(urlStr=Url,header=self.header)
|
|
videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="v-thumb stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?</span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
|
|
result = {
|
|
'list': videos
|
|
}
|
|
return result
|
|
|
|
def playerContent(self,flag,id,vipFlags):
|
|
result = {}
|
|
parse=1
|
|
jx=0
|
|
url=id
|
|
htmlTxt=self.webReadFile(urlStr=url,header=self.header)
|
|
temporary=self.get_lineList(Txt=htmlTxt,mark=r'var player_aaaa=',after='</script>')
|
|
|
|
if len(temporary)>0:
|
|
jRoot=json.loads(temporary[0][16:])
|
|
url=jRoot['url']
|
|
if len(url)<5:
|
|
url=id
|
|
else:
|
|
parse=0
|
|
result["parse"] = parse#1=嗅探,0=播放
|
|
result["playUrl"] = ''
|
|
result["url"] = url
|
|
result['jx'] = jx#1=VIP解析,0=不解析
|
|
result["header"] = ''
|
|
return result
|
|
config = {
|
|
"player": {},
|
|
"filter": {}
|
|
}
|
|
header = {
|
|
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
|
|
'Host': 'kt30.com',
|
|
"Referer": "http://kt30.com/"
|
|
}
|
|
|
|
def localProxy(self,param):
|
|
return [200, "video/MP2T", action, ""]
|
|
#-----------------------------------------------自定义函数-----------------------------------------------
|
|
#访问网页
|
|
def webReadFile(self,urlStr,header):
|
|
html=''
|
|
req=urllib.request.Request(url=urlStr,headers=header)#,headers=header
|
|
with urllib.request.urlopen(req) as response:
|
|
html = response.read().decode('utf-8')
|
|
return html
|
|
#正则取文本
|
|
def get_RegexGetText(self,Text,RegexText,Index):
|
|
returnTxt=""
|
|
Regex=re.search(RegexText, Text, re.M|re.S)
|
|
if Regex is None:
|
|
returnTxt=""
|
|
else:
|
|
returnTxt=Regex.group(Index)
|
|
return returnTxt
|
|
#取集数
|
|
def get_EpisodesList(self,html,RegexText):
|
|
ListRe=re.finditer(RegexText, html, re.M|re.S)
|
|
videos = []
|
|
for vod in ListRe:
|
|
url = vod.group('url')
|
|
title =vod.group('title')
|
|
if len(url) == 0:
|
|
continue
|
|
if url.find('http:') <0:
|
|
url='http://kt30.com'+url
|
|
videos.append(title+"$"+url)
|
|
return videos
|
|
#取剧集区
|
|
def get_lineList(self,Txt,mark,after):
|
|
circuit=[]
|
|
origin=Txt.find(mark)
|
|
|
|
while origin>8:
|
|
end=Txt.find(after,origin)
|
|
circuit.append(Txt[origin:end])
|
|
origin=Txt.find(mark,end)
|
|
return circuit
|
|
#正则取文本,返回数组
|
|
def get_RegexGetTextLine(self,Text,RegexText,Index):
|
|
returnTxt=[]
|
|
ListRe=istRe=re.finditer(RegexText, Text, re.M|re.S)
|
|
for value in ListRe:
|
|
t=value.group(Index)
|
|
if t==None:
|
|
continue
|
|
returnTxt.append(t)
|
|
return returnTxt
|
|
#分类取结果
|
|
def get_list(self,html,patternTxt):
|
|
ListRe=re.finditer(patternTxt, html, re.M|re.S)
|
|
videos = []
|
|
head="http://kt30.com"
|
|
for vod in ListRe:
|
|
url = vod.group('url')
|
|
title =self.removeHtml(txt=vod.group('title'))
|
|
img =vod.group('img')
|
|
renew=vod.group('renew')
|
|
if len(url) == 0:
|
|
continue
|
|
if len(img)<5:
|
|
img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/CoverError.png'
|
|
if self.get_RegexGetText(Text=img,RegexText='(https{0,1}:)',Index=1)=='':
|
|
img=head+img
|
|
# print(title)
|
|
videos.append({
|
|
"vod_id":"{0}###{1}###{2}".format(title,head+url,img),
|
|
"vod_name":title,
|
|
"vod_pic":img,
|
|
"vod_remarks":renew
|
|
})
|
|
return videos
|
|
#删除html标签
|
|
def removeHtml(self,txt):
|
|
soup = re.compile(r'<[^>]+>',re.S)
|
|
txt =soup.sub('', txt)
|
|
return txt.replace(" "," ")
|
|
#番剧
|
|
def get_list_fanju(self,html):
|
|
ListRe=re.finditer('class="jtxqj"><a href="(?P<url>.+?)" title="(?P<title>.+?)" target="_self">(?P<renew>.+?)</a>', html, re.M|re.S)
|
|
videos = []
|
|
head="http://ktkkt8.com"
|
|
img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/%E5%B0%81%E9%9D%A2.jpeg'
|
|
for vod in ListRe:
|
|
url = vod.group('url')
|
|
title =self.removeHtml(txt=vod.group('title'))
|
|
renew=vod.group('renew')
|
|
if len(url) == 0:
|
|
continue
|
|
videos.append({
|
|
"vod_id":"{0}###{1}###{2}".format(title,head+url,img),
|
|
"vod_name":title,
|
|
"vod_pic":img,
|
|
"vod_remarks":renew
|
|
})
|
|
return videos
|
|
|
|
# T=Spider()
|
|
# l=T.homeVideoContent()
|
|
# l=T.searchContent(key='柯南',quick='')
|
|
# l=T.categoryContent(tid='r',pg='1',filter=False,extend={})
|
|
# for x in l['list']:
|
|
# print(x['vod_id'])
|
|
# mubiao= l['list'][1]['vod_id']
|
|
# playTabulation=T.detailContent(array=[mubiao,])
|
|
# # print(playTabulation)
|
|
# vod_play_from=playTabulation['list'][0]['vod_play_from']
|
|
# vod_play_url=playTabulation['list'][0]['vod_play_url']
|
|
# url=vod_play_url.split('$$$')
|
|
# vod_play_from=vod_play_from.split('$$$')[0]
|
|
# url=url[0].split('$')
|
|
# url=url[1].split('#')[0]
|
|
# print(url)
|
|
# m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
|
|
# print(m3u8)
|