#!/usr/bin python
# coding:utf-8
import urllib
import fileinput
from xml.etree.ElementTree import ElementTree
import re
print "Now loadind rss..."
categorylist = [ "g_ent"
, "ent"
, "music"
, "sport"
, "g_life"
, "animal"
, "cooking"
, "diary"
, "nature"
, "science"
, "history"
, "radio"
, "lecture"
, "g_politics"
, "politics"
, "g_try"
, "sing"
, "play"
, "dance"
, "draw"
, "tech"
, "g_culture"
, "anime"
, "game"
, "g_popular"
, "imas"
, "toho"
, "vocaloid"
, "are"
, "other"
, "g_r18"
, "r18" ]
typelist = [ "fav" , "view" , "res" , "mylist" ]
xmllist = []
for typein in typelist :
outputfile = 'all' + typein + '.xml'
urllib.urlretrieve('http://www.nicovideo.jp/ranking/' + typein + '/monthly/all?rss=2.0' , outputfile )
xmllist = xmllist + [ outputfile ]
for page in range( 2 , 11 ):
outputfile = 'all' + str(page) + typein + '.xml'
urllib.urlretrieve('http://www.nicovideo.jp/ranking/' + typein + '/monthly/all?page=' + str(page) + '&rss=2.0' , outputfile )
xmllist = xmllist + [ outputfile ]
for category in categorylist :
outputfile = category + typein + '.xml'
urllib.urlretrieve('http://www.nicovideo.jp/ranking/' + typein + '/monthly/' + category + '?rss=2.0' , outputfile )
xmllist = xmllist + [ outputfile ]
print "Finish load rss files."
#rssダウンロード完了
#rss抽出処理
outputfile2 = open("output2.txt","a")
jyuni = 1
for xmlfile in xmllist:
xmltree = ElementTree(file = open(xmlfile))
titlelist = xmltree.findall("channel/item/title")
urllist = xmltree.findall("channel/item/link")
destlist = xmltree.findall("channel/item/description")
for titletemp , idtemp , destemp in zip(titlelist , urllist , destlist):
title = titletemp.text.split(u"位:",1)[1]
idmovie = idtemp.text.rsplit("/",1)[1]
destemp = destemp.text.split("<strong class=\"nico-info-date\">",1)[1]
datetemp = destemp.split("</strong>",1)
date = datetemp[0]
destemp = datetemp[1]
destemp = destemp.split("<strong class=\"nico-info-monthly-view\">",1)[1]
viewtemp = destemp.split("</strong>",1)
view = int("".join(viewtemp[0].split(",")))
destemp = viewtemp[1]
destemp = destemp.split("<strong class=\"nico-info-monthly-res\">",1)[1]
restemp = destemp.split("</strong>",1)
res = int("".join(restemp[0].split(",")))
destemp = restemp[1]
destemp = destemp.split("<strong class=\"nico-info-monthly-mylist\">",1)[1]
mylisttemp = destemp.split("</strong>",1)
mylist = int("".join(mylisttemp[0].split(",")))
if view >= res : hosei = 1.00
else: hosei = (view * 2.00 + mylist) / (view + res + mylist)
if mylist == 0 : mylisdo = 0
else: mylisdo = 1.00/(view / mylist) * 100.00
total = view + res * hosei + mylist * 20
outputtemp = []
outputtemp = [idmovie] + [str(jyuni)] + [str(view)] + [str(res)] + [str(mylist)] + [str(hosei)] + [str(mylisdo)] + [str(total)] + [title.encode("utf-8")] + [date.encode("utf-8")] + [idmovie + ".png" ]
output = "\t".join(outputtemp)
outputfile2.write(output)
outputfile2.write("\n")
jyuni = jyuni + 1
outputfile2.close()
print "The point was calculated."
#rss抽出終了
#重複を除く処理
listin = open("output2.txt","r")
listout = open("output3.txt","w")
idlist = []
for line in listin.readlines() :
idtemp = line.split("\t")[0]
try:
idlist.index(idtemp)
except ValueError:
idlist.append(idtemp)
listout.write(line)
listin.close()
listout.close()
print "Repetition was removed."
#重複を除く処理終了
#並び替える処理
listin = open("output3.txt","r")
listout = open("outputfin.txt","w")
outputlist = []
for line in listin.readlines() :
point = float(line.split("\t")[7])
inputline = [ point , line , 0 ]
outputlist.append(inputline)
print "loaded list."
listin.close()
outputlist.sort()
outputlist.reverse()
jyunni = 1
jyunni2 = 1
pointold = 0
for line2 in outputlist:
pointline2 = line2[0]
if pointold == pointline2 :
line2[2] = jyunni2
else:
line2[2] = jyunni
jyunni2 = jyunni
pointold = pointline2
jyunni = jyunni + 1
line3 = line2[1].split("\t",3)
line3[1] = str(line2[2])
listout.write( "\t".join( line3 ) )
listout.close()
print "finish sort."
#並び替え処理完了
最終更新:2010年07月28日 22:10