上传文件至 json

2024-05-09 20:41:13 +08:00 · 2024-05-09 20:41:13 +08:00 · 03c4cf24f1
parent af66b214a0
commit 03c4cf24f1
1 changed files with 285 additions and 0 deletions
--- a/json/py_kt30.json
+++ b/json/py_kt30.json
@ -0,0 +1,285 @@
+#coding=utf-8
+#!/usr/bin/python
+import sys
+sys.path.append('..')
+from base.spider import Spider
+import re
+from urllib import request, parse
+import urllib
+import urllib.request
+import json
+class Spider(Spider):  # 元类 默认的元类 type
+	def getName(self):
+		return "卡通站(kt30)"
+	def init(self,extend=""):
+		pass
+	def isVideoFormat(self,url):
+		pass
+	def manualVideoCheck(self):
+		pass
+	def homeContent(self,filter):
+		result = {}
+		cateManual = {
+			"日本动漫": "r",
+			"国产动漫": "g",
+			"港台动漫": "gm",
+			"动画电影": "v",
+			"欧美动漫": "o"
+		}
+		classes = []
+		for k in cateManual:
+			classes.append({
+				'type_name': k,
+				'type_id': cateManual[k]
+			})
+
+		result['class'] = classes
+		if (filter):
+			result['filters'] = self.config['filter']
+		return result
+	def homeVideoContent(self):
+		htmlTxt = self.webReadFile(urlStr="http://kt30.com/",header=self.header)
+		videos = self.get_list(html=htmlTxt,patternTxt=r'a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
+		result = {
+			'list': videos
+		}
+		return result
+
+	def categoryContent(self,tid,pg,filter,extend):
+		result = {}
+		year='0'#年份
+		types='0'#类型
+		area='all'#地区
+		url = 'http://kt30.com/{0}/index_{1}.html'.format(tid,pg)
+		htmlTxt=self.webReadFile(urlStr=url,header=self.header)
+		videos=[]
+		videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
+		numvL = len(videos)
+		result['list'] = videos
+		result['page'] = pg
+		result['pagecount'] = pg if numvL<17 else 9999
+		result['limit'] = numvL
+		result['total'] = numvL
+		return result
+
+	def detailContent(self,array):
+		aid = array[0].split('###')
+		idUrl=aid[1]
+		title=aid[0]
+		pic=aid[2]
+		playFrom = []
+		vodItems = []
+		videoList=[]
+		htmlTxt = self.webReadFile(urlStr=idUrl,header=self.header)
+		if len(htmlTxt)<5:
+			return {'list': []}
+		line=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'</span><h3 class="title">(.+?)</h3></div>',Index=1)
+		playFrom=[self.removeHtml(txt=vod) for vod in line]
+		
+		if len(line)<1:
+			return {'list': []}
+		circuit=self.get_lineList(Txt=htmlTxt,mark='<ul class="stui-content__playlist',after='</ul>')
+		# print(circuit[0])
+		# return
+		for vod in circuit:
+			vodItems = self.get_EpisodesList(html=vod,RegexText=r'<a href="(?P<url>.+?)">(?P<title>.+?)</a>')
+			joinStr = "#".join(vodItems)
+			videoList.append(joinStr)
+		
+		temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/----%|\w+?---------.html" target="_blank">(.+?)</a>',Index=1)
+		typeName="/".join(temporary)
+		year=self.get_RegexGetText(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-------------\d{4}.html" target="_blank">(\d{4})</a>',Index=1)
+		temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-.+?------------.html" target="_blank">(.+?)</a>',Index=1)
+		act="/".join(temporary)
+		temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-----%+?|\w+?--------.html" target="_blank">(.+?)</a>',Index=1)
+		dir="/".join(temporary)
+		area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'地区：</b>(.*?)<b>',Index=1)
+		
+		#area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'>语言：\s{0,4}(.*?)</p>',Index=1)
+		cont=self.get_RegexGetText(Text=htmlTxt,RegexText=r'简介：(.+?)<a href="#desc">详情',Index=1)
+		
+
+		vod = {
+			"vod_id": array[0],
+			"vod_name": title,
+			"vod_pic": pic,
+			"type_name": self.removeHtml(txt=typeName),
+			"vod_year": year,
+			"vod_area": self.removeHtml(txt=area),
+			"vod_remarks": "",
+			"vod_actor":  self.removeHtml(txt=act),
+			"vod_director": self.removeHtml(txt=dir),
+			"vod_content": self.removeHtml(txt=cont)
+		}
+		vod['vod_play_from'] = '$$$'.join(playFrom)
+		vod['vod_play_url'] =  "$$$".join(videoList)
+
+		result = {
+			'list': [
+				vod
+			]
+		}
+		return result
+
+	def verifyCode(self):
+		pass
+
+	def searchContent(self,key,quick):
+		Url='http://kt30.com/vodsearch/-------------.html?wd={0}'.format(urllib.parse.quote(key))
+		htmlTxt = self.webReadFile(urlStr=Url,header=self.header)
+		videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="v-thumb stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?</span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
+		result = {
+				'list': videos
+			}
+		return result
+
+	def playerContent(self,flag,id,vipFlags):
+		result = {}
+		parse=1
+		jx=0
+		url=id
+		htmlTxt=self.webReadFile(urlStr=url,header=self.header)
+		temporary=self.get_lineList(Txt=htmlTxt,mark=r'var player_aaaa=',after='</script>')
+		
+		if len(temporary)>0:
+			jRoot=json.loads(temporary[0][16:])
+			url=jRoot['url']
+			if len(url)<5:
+				url=id		
+			else:	
+				parse=0
+		result["parse"] = parse#1=嗅探,0=播放
+		result["playUrl"] = ''
+		result["url"] = url
+		result['jx'] = jx#1=VIP解析,0=不解析
+		result["header"] = ''	
+		return result
+	config = {
+		"player": {},
+		"filter": {}
+	}
+	header = {
+		"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
+		'Host': 'kt30.com',
+		"Referer": "http://kt30.com/"
+		}
+
+	def localProxy(self,param):
+		return [200, "video/MP2T", action, ""]
+#-----------------------------------------------自定义函数-----------------------------------------------
+	#访问网页
+	def webReadFile(self,urlStr,header):
+		html=''
+		req=urllib.request.Request(url=urlStr,headers=header)#,headers=header
+		with  urllib.request.urlopen(req)  as response:
+			html = response.read().decode('utf-8')
+		return html
+	#正则取文本
+	def get_RegexGetText(self,Text,RegexText,Index):
+		returnTxt=""
+		Regex=re.search(RegexText, Text, re.M|re.S)
+		if Regex is None:
+			returnTxt=""
+		else:
+			returnTxt=Regex.group(Index)
+		return returnTxt
+	#取集数
+	def get_EpisodesList(self,html,RegexText):
+		ListRe=re.finditer(RegexText, html, re.M|re.S)
+		videos = []
+		for vod in ListRe:
+			url = vod.group('url')
+			title =vod.group('title')
+			if len(url) == 0:
+				continue
+			if url.find('http:') <0:
+				url='http://kt30.com'+url
+			videos.append(title+"$"+url)
+		return videos
+	#取剧集区
+	def get_lineList(self,Txt,mark,after):
+		circuit=[]
+		origin=Txt.find(mark)
+		
+		while origin>8:
+			end=Txt.find(after,origin)
+			circuit.append(Txt[origin:end])
+			origin=Txt.find(mark,end)
+		return circuit	
+	#正则取文本,返回数组	
+	def get_RegexGetTextLine(self,Text,RegexText,Index):
+		returnTxt=[]
+		ListRe=istRe=re.finditer(RegexText, Text, re.M|re.S)
+		for value in ListRe:
+			t=value.group(Index)
+			if t==None:
+				continue
+			returnTxt.append(t)	
+		return returnTxt
+	#分类取结果
+	def get_list(self,html,patternTxt):
+		ListRe=re.finditer(patternTxt, html, re.M|re.S)
+		videos = []
+		head="http://kt30.com"
+		for vod in ListRe:
+			url = vod.group('url')
+			title =self.removeHtml(txt=vod.group('title'))
+			img =vod.group('img')
+			renew=vod.group('renew')
+			if len(url) == 0:
+				continue
+			if len(img)<5:
+				img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/CoverError.png'
+			if self.get_RegexGetText(Text=img,RegexText='(https{0,1}:)',Index=1)=='':
+				img=head+img
+			# print(title)
+			videos.append({
+				"vod_id":"{0}###{1}###{2}".format(title,head+url,img),
+				"vod_name":title,
+				"vod_pic":img,
+				"vod_remarks":renew
+			})
+		return videos
+	#删除html标签
+	def removeHtml(self,txt):
+		soup = re.compile(r'<[^>]+>',re.S)
+		txt =soup.sub('', txt)
+		return txt.replace("&nbsp;"," ")
+	#番剧
+	def get_list_fanju(self,html):
+		ListRe=re.finditer('class="jtxqj"><a href="(?P<url>.+?)" title="(?P<title>.+?)" target="_self">(?P<renew>.+?)</a>', html, re.M|re.S)
+		videos = []
+		head="http://ktkkt8.com"
+		img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/%E5%B0%81%E9%9D%A2.jpeg'
+		for vod in ListRe:
+			url = vod.group('url')
+			title =self.removeHtml(txt=vod.group('title'))
+			renew=vod.group('renew')
+			if len(url) == 0:
+				continue
+			videos.append({
+				"vod_id":"{0}###{1}###{2}".format(title,head+url,img),
+				"vod_name":title,
+				"vod_pic":img,
+				"vod_remarks":renew
+			})
+		return videos
+
+# T=Spider()
+# l=T.homeVideoContent()
+# l=T.searchContent(key='柯南',quick='')
+# l=T.categoryContent(tid='r',pg='1',filter=False,extend={})
+# for x in l['list']:
+# 	print(x['vod_id'])
+# mubiao= l['list'][1]['vod_id']
+# playTabulation=T.detailContent(array=[mubiao,])
+# # print(playTabulation)
+# vod_play_from=playTabulation['list'][0]['vod_play_from']
+# vod_play_url=playTabulation['list'][0]['vod_play_url']
+# url=vod_play_url.split('$$$')
+# vod_play_from=vod_play_from.split('$$$')[0]
+# url=url[0].split('$')
+# url=url[1].split('#')[0]
+# print(url)
+# m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
+# print(m3u8)