Wikipedia:Nokkakala/lähdekoodi
Siirry navigaatioon
Siirry hakuun
Tämä sivu on arkisto. Älä muokkaa tätä sivua. |
- Tarkennus: Käytöstä poistetun botin lähdekoodi. Voit haluta tutustua edelleen käytössä oleviin tapoihin seurata tuoreita muutoksia reaaliajassa.
#!/usr/bin/env python
# Nagano - Parse IP addresses from IRC and resolve the attached hostname
#
# This work is in the public domain; Santtu Pajukanta, the original author,
# hereby disclaims all copyright and/or IP interests on this program.
# Blah blah blah bla blha blah blah blah blah blal bla blal blah blah
# Blah blah blah bla blha blah blah blah blah blal bla blal phks blah
# Blah blah blah bla blha blah blah blah blah blal bla blal blah blah
# Blah blah blah bla blha blah blah blah blah blal bla blal blah blah
from twisted.protocols import irc
from twisted.internet import reactor, protocol
from twisted.names import client
from twisted.web.client import getPage
from urllib import urlencode
from re import compile, sub, match
from xml.dom import minidom
import socket
import time
import os
pidfile = "wha.pid"
fhandle = open (pidfile, 'w')
print >> fhandle, os.getpid()
fhandle.close ()
def normalizeLine(line):
line = toUnicode(line)
line = sub("\x02|\x16|\x1F|\x0F","",line)
line = sub(k + r"\d?\d?,\d?\d?","",line)
line = sub(k + r"\d?\d?","",line)
return line
# Guess wheter it is utf-8 or iso-8859-15, fail if something else
def toUnicode(line):
try:
return line.decode('utf-8')
except:
try:
return line.decode('iso-8859-15')
except:
return line
ipRe = compile(r".*?((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)).*?")
whole = compile("^\[\[(.+)\]\] (M?N?) (http[^ ]+) \* ([^*]+) \* (.*)$")
logLine = compile("^\[\[(.+)\]\] ([a-z]+)() +\* ([^*]+) \* +(.*)$")
textRe = compile(".*<text>(.*)</text>.*")
fetchRe = compile("!fetch (.*)")
lenRe = compile("!len (.*)")
k = u"\x03"
b = u"\x02"
strings = {
"a" : "b",
}
class Nagano:
def __init__(self):
Nagano.resolve = 1;
Nagano.fetch = 1;
Nagano.fetchURL = 'http://fi.$1.org/w/index.php?'
Nagano.iChans = "#fi.wikipedia,#fi.wikibooks,#fi.wikiquote,#fi.wikinews,#fi.wikisource"
Nagano.oChans = "#wikipedia-fi-rc"
Nagano.iNick = "nokkakala"
Nagano.oNick = "nokkakala"
Nagano.lineRate = 0.5
self.iPort = 6667;
self.oPort = 6667;
self.oFact = Nagano.NaganoFactory("output",self);
self.iFact = Nagano.NaganoFactory("input",self);
reactor.connectTCP("irc.wikimedia.org", self.iPort, self.iFact)
reactor.connectTCP("irc.freenode.net", self.oPort, self.oFact)
reactor.run()
class InputNagano(irc.IRCClient):
def __init__(self):
self.nickname = Nagano.iNick
self.lineRate = Nagano.lineRate
def signedOn(self):
self.join(Nagano.iChans)
Nagano.inn = self
def sayline(self, line):
if Nagano.out:
Nagano.out.sayline(line)
def privmsg(self, user, channel, message):
# No styles and in unicode
msg = normalizeLine(message)
data = {}
# if channel not in Nagano.iChans.split(','): return
if channel == "#fi.wikipedia":
data["pref"] = ""
elif channel == "#fi.wikinews":
data["pref"] = "n:"
elif channel == "#fi.wikibooks":
data["pref"] = "b:"
elif channel == "#fi.wikiquote":
data["pref"] = "q:"
elif channel == "#fi.wikisource":
data["pref"] = "s:"
else:
data["pref"] = "bogus:"
prefix = data["pref"]
mtc = logLine.match(msg)
if mtc:
data["type"] = "log/" + mtc.group(2)
data["page"] = mtc.group(1)
data["flag"] = ""
data["url" ] = ""
data["user"] = mtc.group(4)
data["text"] = mtc.group(5)
else:
# 0 whole 1 article 2 flags 3 url 4 who 5 comments
mtc = whole.match(msg)
if mtc:
data["type"] = "page"
data["page"] = mtc.group(1)
data["flag"] = mtc.group(2)
data["url" ] = mtc.group(3)
data["user"] = mtc.group(4)
data["text"] = mtc.group(5)
else:
print "Got no match"
return
# New and not minor page, still fall trough
if data["flag"] == "N":
Nagano.out.fetchPage(prefix + data["page"].encode('utf-8'),1)
# Logged in edit
if not ipRe.match(data["user"]):
self.sayline(Nagano.out.sayclean(data))
return
# Try to resolve the ip address
addr = ipRe.match(mtc.group(4)).group(1)
ptr = '.'.join(addr.split('.')[::-1]) + '.in-addr.arpa'
# Do the asynchronous DNS query
d = client.lookupPointer(ptr)
# Set callbacks
d.addCallback(self.sayResolved, prefix + mtc.group(1), mtc.group(2) + "R", mtc.group(3), addr, mtc.group(5))
d.addErrback(self.handleResolveError, prefix + mtc.group(1), mtc.group(2) + "R", mtc.group(3), addr, mtc.group(5))
def lineReceived(self, line):
timestamp = time.strftime("[%H:%M:%S]", time.localtime(time.time()))
print timestamp + " " + line
Nagano.inn = self
irc.IRCClient.lineReceived(self, line)
def sayResolved(self, dns, article, flags, url, who, comments):
print dns
self.sayline(Nagano.out.saycolor(article, flags, url, who + " (%s)" % dns[0][0].payload.name, comments))
def handleResolveError(self, err, article, flags, url, who, comments):
self.sayline(Nagano.out.saycolor(article, flags, url, who, comments))
class OutputNagano(irc.IRCClient):
def __init__(self):
self.nickname = Nagano.oNick
Nagano.put = None
def signedOn(self):
# Join the target channel
self.join(Nagano.oChans)
# Keep the thing alive
#self.join("#en.wikipedia")
# Sailyta topic vanhalla kanavalla
#self.join("#firk.wikipedia")
# err
#self.say(Nagano.oChans, "I'm so retarted I should kill you right there")
# Keek up
Nagano.out = self
def sayline(self, line):
self.say(Nagano.oChans, line);
# this is new one!!
def sayclean(self, data):
for i in ('page', 'user', 'flag', 'text', 'url'):
if data[i].strip() == "":
data[i] = ""
else:
data[i] = data[i].strip() + " "
data["url"] = sub(r'''http://(.+)/w/index.php\?title=.+&diff=([0-9]+)&oldid=([0-9]+)&rcid=([0-9]+)''', r'''http://\1/wiki/?diff=\2&oldid=\3&rcid=\4''', data["url"])
if data["type"] == "log/newusers":
msg = k + u"04" + b + data["type"] + b + k + u"10" + data["user"]
else:
msg = k + u"03" + data["pref"] + data["page"] + k + u"04" + b + data["flag"] + b + k + u"10" +\
data["user"] + k + u"15" + data["text"] + k + u"14" + data["url"]
# Output in UTF-8?
try:
return msg.encode('UTF-8')
#return msg.encode('iso-8859-15','replace')
except:
return "DIE DIE DIE IN SAY"
def saycolor(self, article, flags, url, who, comments):
article = article.strip() + " "
if flags.strip() == "":
flags = ""
else:
flags = flags.strip() + " "
if url.strip() == "":
url = ""
else:
url = url.strip() + " "
if who.strip() == "":
who = ""
else:
who = who.strip() + " "
if comments.strip() == "":
comments = ""
else:
comments = comments.strip() + " "
# Colors and stuff
msg = k + u"03" + article + k + u"04" + b + flags + b + k + u"10" + who + k + u"15" + comments + k + u"14" + url
# Output in iso-8859-15
try:
return msg.encode('iso-8859-15','replace')
except:
return "DIE DIE DIE IN SAY"
def privmsg(self, user, channel, message):
# No styles and in unicode
msg = normalizeLine(message)
if message == "!quit" and user == "ilaiho!i=MULLEEISPAMMATA":
reactor.stop()
return
if message == "!tryfix":
self.quit("Testing")
if Nagano.inn:
Nagano.inn.quit("Testing")
return
# Fetch command?
if match ("!fetch (.*)",msg):
page = fetchRe.match(msg).group(1)
self.fetchPage(page.encode('utf-8'),1)
return
# Len command?
if match ("!len (.*)",msg):
page = lenRe.match(msg).group(1)
self.fetchPage(page.encode('utf-8'),0)
return
def lineReceived(self, line):
# Keep up2date
Nagano.out = self
timestamp = time.strftime("[%H:%M:%S]", time.localtime(time.time()))
print timestamp + " " + line
irc.IRCClient.lineReceived(self, line)
def fetchPage(self, page, fetch):
self.fetchPageReal(page).addCallback(self.gotPage, page, fetch).addErrback(self.handleFetchError, page)
def fetchPageReal(self, page):
# Normalize the shit
items = page.split(':')
finalpage = page.split(':')
domain = 'wikipedia'
for item in items[:-1]:
if item not in ('', 'w', 'n', 'wikt', 'b', 'q', 's'): break
finalpage = finalpage[1:]
if item == '' or item == 'w': domain = 'wikipedia'
if item == 'n': domain = 'wikinews'
if item == 'wikt': domain = 'wiktionary'
if item == 'b': domain = 'wikibooks'
if item == 'q': domain = 'wikiquote'
if item == 's': domain = 'wikisource'
finalpage = ':'.join(finalpage)
url = Nagano.fetchURL.replace('$1', domain)
url += urlencode([('title',finalpage),('action','raw')])
return getPage(url)
def gotPage(self, data, page, snippet):
data = data.decode("utf-8").replace("\n", u"\xB6")
length = len(data);
# remove wikilinks to fit more content
data = sub('\[\[([^\x5d]+?)\|([^\x5d]+)\]\]', '\\2', data)
data = sub('\[\[([^\x5d]+?)\]\]', '\\1', data)
data= data[:380]
page = page.decode('utf-8').replace("_"," ")
if snippet:
# Does it fit?
if len(data) == 380: data += "..."
# Say it
self.sayline(self.saycolor(page, "F", "", str(length), data))
else:
self.say(Nagano.oChans, "Page %s length is %s" % (page.encode('iso-8859-15','replace'), length))
def handleFetchError(self, err, page):
self.say(Nagano.oChans, "Could not fetch page %s: %s" % (page.decode('utf-8').encode('iso-8859-15','replace'), err.value))
def connectionLost(self, reason):
# Nagano.out = None
irc.IRCClient.connectionLost(self,reason)
class NaganoFactory(protocol.ReconnectingClientFactory):
def __init__(self, put, host):
self.dire = put;
self.host = host;
if put == "input":
self.protocol = Nagano.InputNagano
if put == "output":
self.protocol = Nagano.OutputNagano
if __name__ == "__main__":
Nagano()