You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

297 lines
16 KiB

  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import json
  7. import time
  8. import base64
  9. import re
  10. from urllib import request, parse
  11. import urllib
  12. import urllib.request
  13. import time
  14. class Spider(Spider): # 元类 默认的元类 type
  15. def getName(self):
  16. return "樱花动漫6"#6才是本体
  17. def init(self,extend=""):
  18. print("============{0}============".format(extend))
  19. pass
  20. def isVideoFormat(self,url):
  21. pass
  22. def manualVideoCheck(self):
  23. pass
  24. def homeContent(self,filter):
  25. result = {}
  26. cateManual = {
  27. "日本动漫":"1",
  28. "国产动漫":"4",
  29. "动漫电影":"2",
  30. "欧美动漫":"3"
  31. }
  32. classes = []
  33. for k in cateManual:
  34. classes.append({
  35. 'type_name':k,
  36. 'type_id':cateManual[k]
  37. })
  38. result['class'] = classes
  39. if(filter):
  40. result['filters'] = self.config['filter']
  41. return result
  42. def homeVideoContent(self):
  43. htmlTxt=self.custom_webReadFile(urlStr='https://yhdm6.top/')
  44. videos = self.custom_list(html=htmlTxt,patternTxt=r'<a class="vodlist_thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?><span class=".*?<span class="pic_text text_right">(?P<renew>.+?)</span></a>')
  45. result = {
  46. 'list':videos
  47. }
  48. return result
  49. def categoryContent(self,tid,pg,filter,extend):
  50. result = {}
  51. videos=[]
  52. types=""
  53. if 'types' in extend.keys():
  54. if extend['types'].find('全部')<0:
  55. types='class/{0}/'.format(urllib.parse.quote(extend['types']))
  56. letter=''
  57. if 'letter' in extend.keys():
  58. if extend['letter'].find('全部')<0:
  59. letter='letter/{0}/'.format(extend['letter'])
  60. year=''
  61. if 'year' in extend.keys():
  62. if extend['year'].find('全部')<0:
  63. year='/year/{0}'.format(extend['year'])
  64. by=''
  65. if 'by' in extend.keys():
  66. by='by/{0}/'.format(extend['by'])
  67. Url='https://yhdm6.top/index.php/vod/show/{2}{3}id/{0}/{5}{1}{4}.html'.format(tid,'page/'+pg,by,types,year,letter)
  68. # print(url)
  69. #https://yhdm6.top/index.php/vod/show/by/score/class/%E7%A7%91%E5%B9%BB/id/3/letter/W/year/2022.html
  70. htmlTxt=self.custom_webReadFile(urlStr=Url)
  71. videos = self.custom_list(html=htmlTxt,patternTxt=r'<a class="vodlist_thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)"><span class="play hidden_xs"></span><span class="pic_text text_right">(?P<renew>.+?)</span></a>')
  72. result['list'] = videos
  73. result['page'] = pg
  74. result['pagecount'] = pg if len(videos)<60 else int(pg)+1
  75. result['limit'] = 90
  76. result['total'] = 999999
  77. return result
  78. def detailContent(self,array):
  79. aid = array[0].split('###')
  80. idUrl=aid[1]
  81. title=aid[0]
  82. pic=aid[2]
  83. url=idUrl
  84. htmlTxt = self.custom_webReadFile(urlStr=url,codeName='utf-8')
  85. line=self.custom_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="javascript:void\(0\);" alt="(.+?)">',Index=1)
  86. if len(line)<1:
  87. return {'list': []}
  88. playFrom = []
  89. videoList=[]
  90. vodItems = []
  91. circuit=self.custom_lineList(Txt=htmlTxt,mark=r'<ul class="content_playlist',after='</ul>')
  92. playFrom=line
  93. for v in circuit:
  94. vodItems = self.custom_EpisodesList(html=v,RegexText=r'<li><a href="(?P<url>.+?)">(?P<title>.+?)</a></li>')
  95. joinStr = "#".join(vodItems)
  96. videoList.append(joinStr)
  97. vod_play_from='$$$'.join(playFrom)
  98. vod_play_url = "$$$".join(videoList)
  99. typeName=self.custom_RegexGetText(Text=htmlTxt,RegexText=r'<a href="/index.php/vod/search/class/.+?" target="_blank">(.+?)</a>',Index=1)
  100. year=self.custom_RegexGetText(Text=htmlTxt,RegexText=r'<a href="/index.php/vod/search/year/\d{4}.html" target="_blank">(\d{4})</a>',Index=1)
  101. area=typeName
  102. act=self.custom_RegexGetText(Text=htmlTxt,RegexText=r'<a href="/index.php/vod/search/actor/.+?.html" target="_blank">(.+?)</a>',Index=1)
  103. temporary=self.custom_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/index.php/vod/search/director/.+?.html" target="_blank">(.+?)</a>',Index=1)
  104. dir='/'.join(temporary)
  105. cont=self.custom_RegexGetText(Text=htmlTxt,RegexText=r'剧情介绍</h2>(.+?)</span>',Index=1)
  106. vod = {
  107. "vod_id": array[0],
  108. "vod_name": title,
  109. "vod_pic": pic,
  110. "type_name":self.custom_removeHtml(txt=typeName),
  111. "vod_year": self.custom_removeHtml(txt=year),
  112. "vod_area": area,
  113. "vod_remarks": '',
  114. "vod_actor": self.custom_removeHtml(txt=act),
  115. "vod_director": self.custom_removeHtml(txt=dir),
  116. "vod_content": self.custom_removeHtml(txt=cont)
  117. }
  118. vod['vod_play_from'] = vod_play_from
  119. vod['vod_play_url'] = vod_play_url
  120. result = {
  121. 'list': [
  122. vod
  123. ]
  124. }
  125. return result
  126. def searchContent(self,key,quick):
  127. url='https://yhdm6.top/index.php/vod/search.html?wd={0}&submit='.format(urllib.parse.quote(key))
  128. htmlTxt=self.custom_webReadFile(urlStr=url)
  129. videos = self.custom_list(html=htmlTxt,patternTxt=r'<a class="vodlist_thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?><span class=".*?<span class="pic_text text_right">(?P<renew>.+?)</span></a>')
  130. result = {
  131. 'list':videos
  132. }
  133. return result
  134. def playerContent(self,flag,id,vipFlags):
  135. result = {}
  136. Url=id
  137. htmlTxt =self.custom_webReadFile(urlStr=Url,codeName='utf-8')
  138. parse=0
  139. UrlStr=''
  140. temporary=self.custom_lineList(Txt=htmlTxt,mark=r'var player_aaaa=',after=r'}</script>')
  141. if len(temporary)==1:
  142. jo=json.loads(temporary[0][16:]+"}")
  143. UrlStr=urllib.parse.unquote(jo['url'])
  144. if UrlStr.find('.m3u8')<2:
  145. Url=id
  146. parse=1
  147. else:
  148. Url=UrlStr
  149. result["parse"] = parse#0=直接播放、1=嗅探
  150. result["playUrl"] =''
  151. result["url"] = Url
  152. # result['jx'] = jx#VIP解析,0=不解析、1=解析
  153. result["header"] = ''
  154. return result
  155. config = {
  156. "player": {},
  157. "filter": {
  158. "1":[
  159. {"key":"types","name":"类型:","value":[{"n":"全部","v":"全部"},{"n":"喜剧","v":"喜剧"},{"n":"爱情","v":"爱情"},{"n":"恐怖","v":"恐怖"},{"n":"动作","v":"动作"},{"n":"科幻","v":"科幻"},{"n":"剧情","v":"剧情"},{"n":"战争","v":"战争"},{"n":"犯罪","v":"犯罪"},{"n":"奇幻","v":"奇幻"},{"n":"冒险","v":"冒险"},{"n":"悬疑","v":"悬疑"},{"n":"惊悚","v":"惊悚"},{"n":"古装","v":"古装"},{"n":"历史","v":"历史"},{"n":"运动","v":"运动"},{"n":"儿童","v":"儿童"}]},
  160. {"key":"year","name":"年份:","value":[{"n":"全部","v":"全部"},{"n":"2023","v":"2023"},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"},{"n":"2007","v":"2007"},{"n":"2006","v":"2006"},{"n":"2005","v":"2005"},{"n":"2004","v":"2004"},{"n":"2003","v":"2003"},{"n":"2002","v":"2002"},{"n":"2001","v":"2001"},{"n":"2000","v":"2000"}]},
  161. {"key":"letter","name":"字母:","value":[{"n":"全部","v":"全部"},{"n":"A","v":"A"},{"n":"B","v":"B"},{"n":"C","v":"C"},{"n":"D","v":"D"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]},
  162. {"key":"by","name":"排序:","value":[{"n":"按最新","v":"time"},{"n":"按最热","v":"按最热"},{"n":"按评分","v":"按评分"}]}
  163. ],
  164. "4":[
  165. {"key":"types","name":"类型:","value":[{"n":"全部","v":"全部"},{"n":"喜剧","v":"喜剧"},{"n":"爱情","v":"爱情"},{"n":"恐怖","v":"恐怖"},{"n":"动作","v":"动作"},{"n":"科幻","v":"科幻"},{"n":"剧情","v":"剧情"},{"n":"战争","v":"战争"},{"n":"犯罪","v":"犯罪"},{"n":"奇幻","v":"奇幻"},{"n":"冒险","v":"冒险"},{"n":"悬疑","v":"悬疑"},{"n":"惊悚","v":"惊悚"},{"n":"古装","v":"古装"},{"n":"历史","v":"历史"},{"n":"运动","v":"运动"},{"n":"儿童","v":"儿童"}]},
  166. {"key":"year","name":"年份:","value":[{"n":"全部","v":"全部"},{"n":"2023","v":"2023"},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"},{"n":"2007","v":"2007"},{"n":"2006","v":"2006"},{"n":"2005","v":"2005"},{"n":"2004","v":"2004"},{"n":"2003","v":"2003"},{"n":"2002","v":"2002"},{"n":"2001","v":"2001"},{"n":"2000","v":"2000"}]},
  167. {"key":"letter","name":"字母:","value":[{"n":"全部","v":"全部"},{"n":"A","v":"A"},{"n":"B","v":"B"},{"n":"C","v":"C"},{"n":"D","v":"D"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]},
  168. {"key":"by","name":"排序:","value":[{"n":"按最新","v":"time"},{"n":"按最热","v":"hits"},{"n":"按评分","v":"score"}]}
  169. ],
  170. "2":[
  171. {"key":"types","name":"类型:","value":[{"n":"全部","v":"全部"},{"n":"喜剧","v":"喜剧"},{"n":"爱情","v":"爱情"},{"n":"恐怖","v":"恐怖"},{"n":"动作","v":"动作"},{"n":"科幻","v":"科幻"},{"n":"剧情","v":"剧情"},{"n":"战争","v":"战争"},{"n":"犯罪","v":"犯罪"},{"n":"奇幻","v":"奇幻"},{"n":"冒险","v":"冒险"},{"n":"悬疑","v":"悬疑"},{"n":"惊悚","v":"惊悚"},{"n":"古装","v":"古装"},{"n":"历史","v":"历史"},{"n":"运动","v":"运动"},{"n":"儿童","v":"儿童"}]},
  172. {"key":"year","name":"年份:","value":[{"n":"全部","v":"全部"},{"n":"2023","v":"2023"},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"},{"n":"2007","v":"2007"},{"n":"2006","v":"2006"},{"n":"2005","v":"2005"},{"n":"2004","v":"2004"},{"n":"2003","v":"2003"},{"n":"2002","v":"2002"},{"n":"2001","v":"2001"},{"n":"2000","v":"2000"}]},
  173. {"key":"letter","name":"字母:","value":[{"n":"全部","v":"全部"},{"n":"A","v":"A"},{"n":"B","v":"B"},{"n":"C","v":"C"},{"n":"D","v":"D"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]},
  174. {"key":"by","name":"排序:","value":[{"n":"按最新","v":"time"},{"n":"按最热","v":"hits"},{"n":"按评分","v":"score"}]}
  175. ],
  176. "3":[
  177. {"key":"types","name":"类型:","value":[{"n":"全部","v":"全部"},{"n":"喜剧","v":"喜剧"},{"n":"爱情","v":"爱情"},{"n":"恐怖","v":"恐怖"},{"n":"动作","v":"动作"},{"n":"科幻","v":"科幻"},{"n":"剧情","v":"剧情"},{"n":"战争","v":"战争"},{"n":"犯罪","v":"犯罪"},{"n":"奇幻","v":"奇幻"},{"n":"冒险","v":"冒险"},{"n":"悬疑","v":"悬疑"},{"n":"惊悚","v":"惊悚"},{"n":"古装","v":"古装"},{"n":"历史","v":"历史"},{"n":"运动","v":"运动"},{"n":"儿童","v":"儿童"}]},
  178. {"key":"year","name":"年份:","value":[{"n":"全部","v":"全部"},{"n":"2023","v":"2023"},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"},{"n":"2007","v":"2007"},{"n":"2006","v":"2006"},{"n":"2005","v":"2005"},{"n":"2004","v":"2004"},{"n":"2003","v":"2003"},{"n":"2002","v":"2002"},{"n":"2001","v":"2001"},{"n":"2000","v":"2000"}]},
  179. {"key":"letter","name":"字母:","value":[{"n":"全部","v":"全部"},{"n":"A","v":"A"},{"n":"B","v":"B"},{"n":"C","v":"C"},{"n":"D","v":"D"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]},
  180. {"key":"by","name":"排序:","value":[{"n":"按最新","v":"time"},{"n":"按最热","v":"hits"},{"n":"按评分","v":"score"}]}
  181. ]
  182. }
  183. }
  184. header = {
  185. 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36'
  186. }
  187. def localProxy(self,param):
  188. return [200, "video/MP2T", action, ""]
  189. #-----------------------------------------------自定义函数-----------------------------------------------
  190. #正则取文本
  191. def custom_RegexGetText(self,Text,RegexText,Index):
  192. returnTxt=""
  193. Regex=re.search(RegexText, Text, re.M|re.S)
  194. if Regex is None:
  195. returnTxt=""
  196. else:
  197. returnTxt=Regex.group(Index)
  198. return returnTxt
  199. #分类取结果
  200. def custom_list(self,html,patternTxt):
  201. ListRe=re.finditer(patternTxt, html, re.M|re.S)
  202. videos = []
  203. head="https://yhdm6.top"
  204. for vod in ListRe:
  205. url = vod.group('url')
  206. title =self.custom_removeHtml(txt=vod.group('title'))
  207. img =vod.group('img')
  208. renew=vod.group('renew')
  209. if len(url) == 0:
  210. continue
  211. # print(renew)
  212. videos.append({
  213. "vod_id":"{0}###{1}###{2}".format(title,head+url,img),
  214. "vod_name":title,
  215. "vod_pic":img,
  216. "vod_remarks":renew
  217. })
  218. return videos
  219. #删除html标签
  220. def custom_removeHtml(self,txt):
  221. soup = re.compile(r'<[^>]+>',re.S)
  222. txt =soup.sub('', txt)
  223. return txt.replace("&nbsp;"," ")
  224. #访问网页
  225. def custom_webReadFile(self,urlStr,header=None,codeName='utf-8'):
  226. html=''
  227. if header==None:
  228. header={
  229. "Referer":urlStr,
  230. 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
  231. "Host":self.custom_RegexGetText(Text=urlStr,RegexText='https*://(.*?)(/|$)',Index=1)
  232. }
  233. # import ssl
  234. # ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证
  235. req=urllib.request.Request(url=urlStr,headers=header)#,headers=header
  236. with urllib.request.urlopen(req) as response:
  237. html = response.read().decode(codeName)
  238. return html
  239. #判断是否要调用vip解析
  240. def ifJx(self,urlTxt):
  241. Isjiexi=0
  242. RegexTxt=r'(youku.com|v.qq|bilibili|iqiyi.com)'
  243. if self.get_RegexGetText(Text=urlTxt,RegexText=RegexTxt,Index=1)!='':
  244. Isjiexi=1
  245. return Isjiexi
  246. #取集数
  247. def custom_EpisodesList(self,html,RegexText):
  248. ListRe=re.finditer(RegexText, html, re.M|re.S)
  249. videos = []
  250. head="https://yhdm6.top"
  251. for vod in ListRe:
  252. url = vod.group('url')
  253. title =vod.group('title')
  254. if len(url) == 0:
  255. continue
  256. videos.append(title+"$"+head+url)
  257. return videos
  258. #取剧集区
  259. def custom_lineList(self,Txt,mark,after):
  260. circuit=[]
  261. origin=Txt.find(mark)
  262. while origin>8:
  263. end=Txt.find(after,origin)
  264. circuit.append(Txt[origin:end])
  265. origin=Txt.find(mark,end)
  266. return circuit
  267. #正则取文本,返回数组
  268. def custom_RegexGetTextLine(self,Text,RegexText,Index):
  269. returnTxt=[]
  270. pattern = re.compile(RegexText, re.M|re.S)
  271. ListRe=pattern.findall(Text)
  272. if len(ListRe)<1:
  273. return returnTxt
  274. for value in ListRe:
  275. returnTxt.append(value)
  276. return returnTxt
  277. # T=Spider()
  278. # l=T.searchContent(key='柯南',quick='')
  279. # l=T.homeVideoContent()
  280. # extend={'types':'科幻',"by":"score"}
  281. # l=T.categoryContent(tid='1',pg='1',filter=False,extend={})
  282. # for x in l['list']:
  283. # print(x['vod_name'])
  284. # mubiao= '机动警察###https://yhdm6.top/index.php/vod/detail/id/18293.html###https://pic.lzzypic.com/upload/vod/20230825-1/c4b3a6eb89c83879b81e5aa996d5e212.jpg'
  285. # playTabulation=T.detailContent(array=[mubiao,])
  286. # print(playTabulation)
  287. # m3u8=T.playerContent(flag='',id='https://yhdm6.top/index.php/vod/play/id/18293/sid/1/nid/6.html',vipFlags=True)
  288. # print(m3u8)
  289. # print(T.config['filter'])