上传文件至 json
This commit is contained in:
parent
af66b214a0
commit
03c4cf24f1
|
@ -0,0 +1,285 @@
|
|||
#coding=utf-8
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
import re
|
||||
from urllib import request, parse
|
||||
import urllib
|
||||
import urllib.request
|
||||
import json
|
||||
class Spider(Spider): # 元类 默认的元类 type
|
||||
def getName(self):
|
||||
return "卡通站(kt30)"
|
||||
def init(self,extend=""):
|
||||
pass
|
||||
def isVideoFormat(self,url):
|
||||
pass
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
def homeContent(self,filter):
|
||||
result = {}
|
||||
cateManual = {
|
||||
"日本动漫": "r",
|
||||
"国产动漫": "g",
|
||||
"港台动漫": "gm",
|
||||
"动画电影": "v",
|
||||
"欧美动漫": "o"
|
||||
}
|
||||
classes = []
|
||||
for k in cateManual:
|
||||
classes.append({
|
||||
'type_name': k,
|
||||
'type_id': cateManual[k]
|
||||
})
|
||||
|
||||
result['class'] = classes
|
||||
if (filter):
|
||||
result['filters'] = self.config['filter']
|
||||
return result
|
||||
def homeVideoContent(self):
|
||||
htmlTxt = self.webReadFile(urlStr="http://kt30.com/",header=self.header)
|
||||
videos = self.get_list(html=htmlTxt,patternTxt=r'a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
|
||||
result = {
|
||||
'list': videos
|
||||
}
|
||||
return result
|
||||
|
||||
def categoryContent(self,tid,pg,filter,extend):
|
||||
result = {}
|
||||
year='0'#年份
|
||||
types='0'#类型
|
||||
area='all'#地区
|
||||
url = 'http://kt30.com/{0}/index_{1}.html'.format(tid,pg)
|
||||
htmlTxt=self.webReadFile(urlStr=url,header=self.header)
|
||||
videos=[]
|
||||
videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?"><span class="play hidden-xs"></span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
|
||||
numvL = len(videos)
|
||||
result['list'] = videos
|
||||
result['page'] = pg
|
||||
result['pagecount'] = pg if numvL<17 else 9999
|
||||
result['limit'] = numvL
|
||||
result['total'] = numvL
|
||||
return result
|
||||
|
||||
def detailContent(self,array):
|
||||
aid = array[0].split('###')
|
||||
idUrl=aid[1]
|
||||
title=aid[0]
|
||||
pic=aid[2]
|
||||
playFrom = []
|
||||
vodItems = []
|
||||
videoList=[]
|
||||
htmlTxt = self.webReadFile(urlStr=idUrl,header=self.header)
|
||||
if len(htmlTxt)<5:
|
||||
return {'list': []}
|
||||
line=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'</span><h3 class="title">(.+?)</h3></div>',Index=1)
|
||||
playFrom=[self.removeHtml(txt=vod) for vod in line]
|
||||
|
||||
if len(line)<1:
|
||||
return {'list': []}
|
||||
circuit=self.get_lineList(Txt=htmlTxt,mark='<ul class="stui-content__playlist',after='</ul>')
|
||||
# print(circuit[0])
|
||||
# return
|
||||
for vod in circuit:
|
||||
vodItems = self.get_EpisodesList(html=vod,RegexText=r'<a href="(?P<url>.+?)">(?P<title>.+?)</a>')
|
||||
joinStr = "#".join(vodItems)
|
||||
videoList.append(joinStr)
|
||||
|
||||
temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/----%|\w+?---------.html" target="_blank">(.+?)</a>',Index=1)
|
||||
typeName="/".join(temporary)
|
||||
year=self.get_RegexGetText(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-------------\d{4}.html" target="_blank">(\d{4})</a>',Index=1)
|
||||
temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-.+?------------.html" target="_blank">(.+?)</a>',Index=1)
|
||||
act="/".join(temporary)
|
||||
temporary=self.get_RegexGetTextLine(Text=htmlTxt,RegexText=r'<a href="/vodsearch/-----%+?|\w+?--------.html" target="_blank">(.+?)</a>',Index=1)
|
||||
dir="/".join(temporary)
|
||||
area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'地区:</b>(.*?)<b>',Index=1)
|
||||
|
||||
#area=self.get_RegexGetText(Text=htmlTxt,RegexText=r'>语言:\s{0,4}(.*?)</p>',Index=1)
|
||||
cont=self.get_RegexGetText(Text=htmlTxt,RegexText=r'简介:(.+?)<a href="#desc">详情',Index=1)
|
||||
|
||||
|
||||
vod = {
|
||||
"vod_id": array[0],
|
||||
"vod_name": title,
|
||||
"vod_pic": pic,
|
||||
"type_name": self.removeHtml(txt=typeName),
|
||||
"vod_year": year,
|
||||
"vod_area": self.removeHtml(txt=area),
|
||||
"vod_remarks": "",
|
||||
"vod_actor": self.removeHtml(txt=act),
|
||||
"vod_director": self.removeHtml(txt=dir),
|
||||
"vod_content": self.removeHtml(txt=cont)
|
||||
}
|
||||
vod['vod_play_from'] = '$$$'.join(playFrom)
|
||||
vod['vod_play_url'] = "$$$".join(videoList)
|
||||
|
||||
result = {
|
||||
'list': [
|
||||
vod
|
||||
]
|
||||
}
|
||||
return result
|
||||
|
||||
def verifyCode(self):
|
||||
pass
|
||||
|
||||
def searchContent(self,key,quick):
|
||||
Url='http://kt30.com/vodsearch/-------------.html?wd={0}'.format(urllib.parse.quote(key))
|
||||
htmlTxt = self.webReadFile(urlStr=Url,header=self.header)
|
||||
videos = self.get_list(html=htmlTxt,patternTxt=r'<a class="v-thumb stui-vodlist__thumb lazyload" href="(?P<url>.+?)" title="(?P<title>.+?)" data-original="(?P<img>.+?)".+?</span><span class="pic-text text-right">(?P<renew>.+?)</span></a>')
|
||||
result = {
|
||||
'list': videos
|
||||
}
|
||||
return result
|
||||
|
||||
def playerContent(self,flag,id,vipFlags):
|
||||
result = {}
|
||||
parse=1
|
||||
jx=0
|
||||
url=id
|
||||
htmlTxt=self.webReadFile(urlStr=url,header=self.header)
|
||||
temporary=self.get_lineList(Txt=htmlTxt,mark=r'var player_aaaa=',after='</script>')
|
||||
|
||||
if len(temporary)>0:
|
||||
jRoot=json.loads(temporary[0][16:])
|
||||
url=jRoot['url']
|
||||
if len(url)<5:
|
||||
url=id
|
||||
else:
|
||||
parse=0
|
||||
result["parse"] = parse#1=嗅探,0=播放
|
||||
result["playUrl"] = ''
|
||||
result["url"] = url
|
||||
result['jx'] = jx#1=VIP解析,0=不解析
|
||||
result["header"] = ''
|
||||
return result
|
||||
config = {
|
||||
"player": {},
|
||||
"filter": {}
|
||||
}
|
||||
header = {
|
||||
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
|
||||
'Host': 'kt30.com',
|
||||
"Referer": "http://kt30.com/"
|
||||
}
|
||||
|
||||
def localProxy(self,param):
|
||||
return [200, "video/MP2T", action, ""]
|
||||
#-----------------------------------------------自定义函数-----------------------------------------------
|
||||
#访问网页
|
||||
def webReadFile(self,urlStr,header):
|
||||
html=''
|
||||
req=urllib.request.Request(url=urlStr,headers=header)#,headers=header
|
||||
with urllib.request.urlopen(req) as response:
|
||||
html = response.read().decode('utf-8')
|
||||
return html
|
||||
#正则取文本
|
||||
def get_RegexGetText(self,Text,RegexText,Index):
|
||||
returnTxt=""
|
||||
Regex=re.search(RegexText, Text, re.M|re.S)
|
||||
if Regex is None:
|
||||
returnTxt=""
|
||||
else:
|
||||
returnTxt=Regex.group(Index)
|
||||
return returnTxt
|
||||
#取集数
|
||||
def get_EpisodesList(self,html,RegexText):
|
||||
ListRe=re.finditer(RegexText, html, re.M|re.S)
|
||||
videos = []
|
||||
for vod in ListRe:
|
||||
url = vod.group('url')
|
||||
title =vod.group('title')
|
||||
if len(url) == 0:
|
||||
continue
|
||||
if url.find('http:') <0:
|
||||
url='http://kt30.com'+url
|
||||
videos.append(title+"$"+url)
|
||||
return videos
|
||||
#取剧集区
|
||||
def get_lineList(self,Txt,mark,after):
|
||||
circuit=[]
|
||||
origin=Txt.find(mark)
|
||||
|
||||
while origin>8:
|
||||
end=Txt.find(after,origin)
|
||||
circuit.append(Txt[origin:end])
|
||||
origin=Txt.find(mark,end)
|
||||
return circuit
|
||||
#正则取文本,返回数组
|
||||
def get_RegexGetTextLine(self,Text,RegexText,Index):
|
||||
returnTxt=[]
|
||||
ListRe=istRe=re.finditer(RegexText, Text, re.M|re.S)
|
||||
for value in ListRe:
|
||||
t=value.group(Index)
|
||||
if t==None:
|
||||
continue
|
||||
returnTxt.append(t)
|
||||
return returnTxt
|
||||
#分类取结果
|
||||
def get_list(self,html,patternTxt):
|
||||
ListRe=re.finditer(patternTxt, html, re.M|re.S)
|
||||
videos = []
|
||||
head="http://kt30.com"
|
||||
for vod in ListRe:
|
||||
url = vod.group('url')
|
||||
title =self.removeHtml(txt=vod.group('title'))
|
||||
img =vod.group('img')
|
||||
renew=vod.group('renew')
|
||||
if len(url) == 0:
|
||||
continue
|
||||
if len(img)<5:
|
||||
img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/CoverError.png'
|
||||
if self.get_RegexGetText(Text=img,RegexText='(https{0,1}:)',Index=1)=='':
|
||||
img=head+img
|
||||
# print(title)
|
||||
videos.append({
|
||||
"vod_id":"{0}###{1}###{2}".format(title,head+url,img),
|
||||
"vod_name":title,
|
||||
"vod_pic":img,
|
||||
"vod_remarks":renew
|
||||
})
|
||||
return videos
|
||||
#删除html标签
|
||||
def removeHtml(self,txt):
|
||||
soup = re.compile(r'<[^>]+>',re.S)
|
||||
txt =soup.sub('', txt)
|
||||
return txt.replace(" "," ")
|
||||
#番剧
|
||||
def get_list_fanju(self,html):
|
||||
ListRe=re.finditer('class="jtxqj"><a href="(?P<url>.+?)" title="(?P<title>.+?)" target="_self">(?P<renew>.+?)</a>', html, re.M|re.S)
|
||||
videos = []
|
||||
head="http://ktkkt8.com"
|
||||
img='https://agit.ai/lanhaidixingren/Tvbox/raw/branch/master/%E5%B0%81%E9%9D%A2.jpeg'
|
||||
for vod in ListRe:
|
||||
url = vod.group('url')
|
||||
title =self.removeHtml(txt=vod.group('title'))
|
||||
renew=vod.group('renew')
|
||||
if len(url) == 0:
|
||||
continue
|
||||
videos.append({
|
||||
"vod_id":"{0}###{1}###{2}".format(title,head+url,img),
|
||||
"vod_name":title,
|
||||
"vod_pic":img,
|
||||
"vod_remarks":renew
|
||||
})
|
||||
return videos
|
||||
|
||||
# T=Spider()
|
||||
# l=T.homeVideoContent()
|
||||
# l=T.searchContent(key='柯南',quick='')
|
||||
# l=T.categoryContent(tid='r',pg='1',filter=False,extend={})
|
||||
# for x in l['list']:
|
||||
# print(x['vod_id'])
|
||||
# mubiao= l['list'][1]['vod_id']
|
||||
# playTabulation=T.detailContent(array=[mubiao,])
|
||||
# # print(playTabulation)
|
||||
# vod_play_from=playTabulation['list'][0]['vod_play_from']
|
||||
# vod_play_url=playTabulation['list'][0]['vod_play_url']
|
||||
# url=vod_play_url.split('$$$')
|
||||
# vod_play_from=vod_play_from.split('$$$')[0]
|
||||
# url=url[0].split('$')
|
||||
# url=url[1].split('#')[0]
|
||||
# print(url)
|
||||
# m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
|
||||
# print(m3u8)
|
Loading…
Reference in New Issue