#!/usr/bin python
# coding:utf-8
 
import urllib
import fileinput
from xml.etree.ElementTree import ElementTree
import re
 
print "Now loadind rss..."
 
categorylist = [ "g_ent"
, "ent"
, "music"
, "sport"
, "g_life"
, "animal"
, "cooking"
, "diary"
, "nature"
, "science"
, "history"
, "radio"
, "lecture"
, "g_politics"
, "politics"
, "g_try"
, "sing"
, "play"
, "dance"
, "draw"
, "tech"
, "g_culture"
, "anime"
, "game"
, "g_popular"
, "imas"
, "toho"
, "vocaloid"
, "are"
, "other"
, "g_r18"
, "r18" ]
 
typelist = [ "fav" , "view" , "res" , "mylist" ]
 
xmllist = [] 
 
for typein in typelist :
	outputfile = 'all' + typein + '.xml'
	urllib.urlretrieve('http://www.nicovideo.jp/ranking/' + typein + '/monthly/all?rss=2.0' , outputfile )
	xmllist = xmllist + [ outputfile ]
 
	for page in range( 2 , 11 ):
		outputfile = 'all' + str(page) + typein + '.xml'
		urllib.urlretrieve('http://www.nicovideo.jp/ranking/' + typein + '/monthly/all?page=' + str(page) + '&rss=2.0' , outputfile )
		xmllist = xmllist + [ outputfile ]
 
	for category in categorylist :
		outputfile = category + typein + '.xml'
		urllib.urlretrieve('http://www.nicovideo.jp/ranking/' + typein + '/monthly/' + category + '?rss=2.0' , outputfile )
		xmllist = xmllist + [ outputfile ]	
 
print "Finish load rss files."
#rssダウンロード完了
 
#rss抽出処理
outputfile2 = open("output2.txt","a")
jyuni = 1
 
for xmlfile in xmllist:
	xmltree = ElementTree(file = open(xmlfile))
 
	titlelist = xmltree.findall("channel/item/title")
	urllist = xmltree.findall("channel/item/link")
	destlist = xmltree.findall("channel/item/description")
 
 
	for titletemp , idtemp , destemp in zip(titlelist , urllist , destlist):
 
		title = titletemp.text.split(u"位:",1)[1]
 
		idmovie = idtemp.text.rsplit("/",1)[1]
 
		destemp = destemp.text.split("<strong class=\"nico-info-date\">",1)[1]
		datetemp = destemp.split("</strong>",1)
		date = datetemp[0]
		destemp = datetemp[1]
 
		destemp = destemp.split("<strong class=\"nico-info-monthly-view\">",1)[1]
		viewtemp = destemp.split("</strong>",1)
		view = int("".join(viewtemp[0].split(",")))
		destemp = viewtemp[1]
 
		destemp = destemp.split("<strong class=\"nico-info-monthly-res\">",1)[1]
		restemp = destemp.split("</strong>",1)
		res = int("".join(restemp[0].split(",")))
		destemp = restemp[1]
 
		destemp = destemp.split("<strong class=\"nico-info-monthly-mylist\">",1)[1]
		mylisttemp = destemp.split("</strong>",1)
		mylist = int("".join(mylisttemp[0].split(",")))
 
		if view >= res : hosei = 1.00
		else: hosei = (view * 2.00 + mylist) / (view + res + mylist)
 
		if mylist == 0 : mylisdo = 0
		else: mylisdo = 1.00/(view / mylist) * 100.00
 
		total = view + res * hosei + mylist * 20
 
		outputtemp = []
 
		outputtemp = [idmovie] + [str(jyuni)] + [str(view)] + [str(res)] + [str(mylist)] + [str(hosei)] + [str(mylisdo)] + [str(total)] + [title.encode("utf-8")] + [date.encode("utf-8")] + [idmovie + ".png" ]
		output = "\t".join(outputtemp)
 
		outputfile2.write(output)
		outputfile2.write("\n")
 
		jyuni = jyuni + 1
 
outputfile2.close()
print "The point was calculated."
#rss抽出終了
 
#重複を除く処理
listin = open("output2.txt","r")
listout = open("output3.txt","w")
idlist = []
 
for line in listin.readlines() :
	idtemp = line.split("\t")[0]
 
	try:	
		idlist.index(idtemp)
	except ValueError:
		idlist.append(idtemp)
		listout.write(line)
 
listin.close()
listout.close()
 
print "Repetition was removed."
#重複を除く処理終了
 
#並び替える処理
listin = open("output3.txt","r")
listout = open("outputfin.txt","w")
 
outputlist = []
 
for line in listin.readlines() :
	point = float(line.split("\t")[7])
 
	inputline = [ point , line , 0 ]
	outputlist.append(inputline)
 
print "loaded list."
 
listin.close()
 
outputlist.sort()
outputlist.reverse()
 
jyunni = 1
jyunni2 = 1
pointold = 0
 
for line2 in outputlist:
	pointline2 = line2[0]
	if pointold == pointline2 :
		line2[2] = jyunni2
	else:
		line2[2] = jyunni
		jyunni2 = jyunni
 
	pointold = pointline2
	jyunni = jyunni + 1
 
	line3 = line2[1].split("\t",3)
	line3[1] = str(line2[2])
 
	listout.write( "\t".join( line3 ) )
 
listout.close()
 
print "finish sort."
#並び替え処理完了
 
最終更新:2010年07月28日 22:10