#!/usr/bin/env python
# -- coding: utf-8 --
import sys, os, site
myPath = os.path.dirname(os.path.abspath(__file__))
os.chdir(myPath)
sys.path.append(myPath)
##
import urllib2, time, re
from random import choice, randint, shuffle
from math import fmod
from cStringIO import StringIO
from gzip import GzipFile
from urllib import urlencode
import urllib
import core, group, text
from cgi import parse_qs
try:
import sqlite3
except ImportError:
from pysqlite2 import dbapi2 as sqlite3
try:
import cgitb
cgitb.enable()
except ImportError:
sys.stderr = sys.stdout
def quote(word):
try:
return urllib.quote(word)
except:
return urllib.quote(word.encode('utf-8'))
def upfirst(string):
return string.replace(string[0],string[0].upper(),1)
def dwfirst(string):
return string.replace(string[0],string[0].lower(),1)
def getUrl(url):
req = urllib2.Request(url)
req.add_header('User-Agent',choice(ua))
req.add_header('Accept-Encoding','gzip, deflate')
try:
page = urllib2.urlopen(req).read()
except Exception, msg:
return ''
try:
return GzipFile('','r',0,StringIO(page)).read()
except Exception, msg:
return page
def matchAll(where,start,end):
i1 = 0
result = []
while 1:
i0 = where.find(start,i1)
i1 = where.find(end,i0)
if (i0 == -1) or (i1 == -1):
break
result.append(where[i0+len(start):i1])
return result
def matchOne(where,start,end):
i0 = where.find(start)
i1 = where.find(end,i0+len(start))
if (i0 == -1) or (i1 == -1):
return ''
return where[i0+len(start):i1]
def fromXml(name='config.xml'):
xmlCfg = {}
handle = open(name)
for line in handle:
key = matchOne(line,'<','>')
value = matchOne(line,'<%s>' % key,'%s>' % key)
xmlCfg[key] = value
return xmlCfg
def getTag(where,tag):
return matchOne(where,'<%s>' % tag,'%s>' % tag).decode('utf-8')
def getUA():
handle = open('system/ua.txt')
ua = []
for line in handle:
line = line.replace('\n','').replace('\r','')
ua.append(line)
handle.close()
return ua
def initPinger():
cmd = 'python ping.py "%s" "%s" &> /dev/null &' % (pageName.encode('utf-8'),makeUrl(pageName))
os.system(cmd)
def whatIsIt(it):
length = len(it)
words = it.split(' ')
count = len(words)
wlen = 0.0
for word in words:
wlen += len(word)
avglen = wlen / count
w = 0
if it.find(' to ') > -1:
w += 1
if avglen > 4:
w += 1
if count > 3:
w += 1
if w > 1:
return 1
else:
return 0
def check(word):
no = randint(0,100)
tags = cfg['tags'].split(',')
if no <= int(cfg['check']):
tag = choice(tags)
word = '<%s>%s%s>' % (tag,word,tag)
return word
def keyInTo(sent,mode):
# global pageName, doorName, groupName
if mode == 'relative':
keyword = choice(relatives)
if mode == 'simply':
keyword = pageName
if mode == 'links':
tmp = choice(relatives)
keyword = '%s' % (makeUrl(tmp),upfirst(tmp))
rate = whatIsIt(tmp)
else:
rate = whatIsIt(keyword)
if rate == 0:
no = randint(0,100)
if no > 50:
sent = sent.replace('.',', %s.' % check(keyword).lower())
else:
sent = '%s, %s' % (check(upfirst(keyword)), sent)
else:
no = randint(0,100)
if no <= 25:
sent = '%s: %s' % (check(upfirst(keyword)), sent)
if 25 > no >= 50:
sent = sent.replace('.','; %s.' % check(keyword).lower())
if 50 > no >= 75:
sent = sent.replace('.',' - %s.' % check(keyword).lower())
if no > 75:
sent = sent.replace('.','. %s.' % check(upfirst(keyword)))
return sent
def genText(count,mode,fq):
# global pageName, doorName, groupName
result, no = '', 0
for i in xrange(0,count):
sent = choice(text.dictry['SNTS'])
if fmod(no,fq) == 0:
sent = keyInTo(sent,mode)
while 1:
i0 = sent.find('{') + 1
i1 = sent.find('}',i0)
if i0 == 0 or i1 == -1:
break
macro = sent[i0:i1]
#try:
sent = sent.replace('{%s}' % macro, choice(text.dictry[macro]).lower(), 1)
#except:
# # macro, dictry[macro]
# #sys.exit(0)
result += '%s \n' % upfirst(sent)
no += 1
result = result.replace(' ,',',')
return result
def notFound():
return readTmpl(mode='404')
def readCache():
# global pageName, doorName, groupName
try:
cid = quote('%s&%s' % (doorName,pageName))
handle = open('cache/%s' % cid)
page = handle.read()
handle.close()
return page.decode('utf-8')
except Exception,msg:
pass
def addCache(source):
# global pageName, doorName, groupName
try:
cid = quote('%s&%s' % (doorName,pageName))
handle = open('cache/%s' % cid,'w')
handle.write(source.encode('utf-8'))
handle.close()
except Exception,msg:
pass
def readTmpl(mode='page'):
try:
handle = open('system/%s.xml' % (mode))
source = handle.read()
handle.close()
return source.decode('utf-8')
except Exception,msg:
pass
def makeUrl(page,doorway='',domain=''):
if domain == '': domain = task['domain']
if doorway == '':
try:
doorway = doornames[doorways.index(doorName)]
except:
pass
else:
doorway = doornames[doorways.index(doorway)]
url = cfg['urlmask'].replace('{domain}',domain)
if page == 'main':
url = url.replace('{doorway}',cfg['mainname'])
url = url.replace('{page}','')
url = url[:len(url)-1]
elif page == 'rss':
url = url.replace('{doorway}',cfg['rssname'])
url = url.replace('{page}','')
url = url[:len(url)-1]
elif page == 'map':
url = url.replace('{doorway}',doorway)
url = url.replace('{page}',cfg['mapname'])
elif page == 'index':
url = url.replace('{doorway}',doorway)
url = url.replace('{page}',cfg['indexname'])
else:
url = url.replace('{doorway}',doorway)
url = url.replace('{page}',pagenames[relatives.index(page)])
return url
def getRSS(url):
source = getUrl(url)
rss = matchAll(source,'- ','
')
items = []
for item in rss:
item = item.decode('utf-8')
item = item.replace('<','<').replace('>','>')
item = item.replace('<','<')
item = item.replace('&','&')
item = item.replace('','')
items.append(item)
return items
def getSnipets(word):
url = 'http://www.google.com/search?hl=en&q=%s&num=100&lr=lang_%s' % (quote(word),cfg['lang'])
source = getUrl(url)
items = matchAll(source,'
','
')
snipets = []
for item in items:
item = item.decode('utf-8')
item = item.replace('','').replace('','')
link = matchOne(item,'','')
if title == '':
title = matchOne(item,'>','')
discr = matchOne(item,'','
')
snipets.append({'link':link,'discr':discr,'title':title})
return snipets
def translate(text,into):
result = u""
url = 'http://www.google.com/translate_t?sl=auto&tl=%s' % into
data = {'hl':'en','ie':'UTF8','text':text,'sl':'auto','tl':into}
req = urllib2.Request(url,urlencode(data))
req.add_header('Host','www.google.com')
req.add_header('User-Agent',choice(ua))
req.add_header('Accept-Encoding','gzip, deflate')
req.add_header('Referer',url)
try:
page = urllib2.urlopen(req).read()
except Exception, msg:
return ''
try:
source = GzipFile('','r',0,StringIO(page)).read()
except Exception, msg:
source = page
result = matchOne(source,'','
')
result = result.decode('utf-8')
return result.replace('<','<').replace('>','>').replace('
','')
def useTmpl(tmpl):
# global pageName, doorName, groupName
## loop
b = 0
while 1:
i0 = tmpl.find('{loop:') + 6
i1 = tmpl.find('{/loop}',b)
if (i0 == 5) or (i1 == b-1):
break
i2 = tmpl.find('}',i0)
mi,ma = tmpl[i0:i2].split(':')
count = randint(int(mi),int(ma))
body = tmpl[i2+1:i1]
c1 = body.count('{loop:')
c2 = body.count('{/loop}')
if c1 != c2:
b = i1 + 7
continue
replaced = ''
for i in xrange(count):
replaced += body.replace('{num}',str(i+1))
tmpl = tmpl.replace('{loop:%s:%s}%s{/loop}' % (mi,ma,body),replaced)
b = 0
##
tmpl = tmpl.replace('{keyword}',pageName)
tmpl = tmpl.replace('{keyword:quote}',quote(pageName))
tmpl = tmpl.replace('{keyword:up}',upfirst(pageName))
##
tmpl = tmpl.replace('{mainword}',doorName)
tmpl = tmpl.replace('{mainword:quote}',quote(doorName))
tmpl = tmpl.replace('{mainword:up}',upfirst(doorName))
##
tmpl = tmpl.replace('{groupname}',groupName)
tmpl = tmpl.replace('{groupname:quote}',quote(groupName))
tmpl = tmpl.replace('{groupname:up}',upfirst(groupName))
tmpl = tmpl.replace('{domain}',task['domain'])
## random keyword
while 1:
i0 = tmpl.find('{randword')
if i0 == -1:
break
tmpl = tmpl.replace('{randword}',choice(relatives),1)
tmpl = tmpl.replace('{randword:up}',upfirst(choice(relatives)),1)
## random number
while 1:
i0 = tmpl.find('{rand:') + 6
i1 = tmpl.find('}',i0)
if (i0 == 5) or (i1 == -1):
break
mi,ma = tmpl[i0:i1].split(':')
tmpl = tmpl.replace('{rand:%s:%s}' % (mi,ma), str(randint(int(mi),int(ma))),1)
# random choice
while 1:
i0 = tmpl.find('{choice:') + 8
i1 = tmpl.find('/}',i0)
if (i0 == 7) or (i1 == -1):
break
cortege = tmpl[i0:i1].split('.:')
tmpl = tmpl.replace('{choice:%s/}' % tmpl[i0:i1],choice(cortege),1)
# from file
while 1:
i0 = tmpl.find('{file:') + 6
i1 = tmpl.find('/}',i0)
if (i0 == 5) or (i1 == -1):
break
name = tmpl[i0:i1]
try:
handle = open(name)
string = choice(handle.readlines()).replace('\n','').replace('\r','')
handle.close()
tmpl = tmpl.replace('{file:%s/}' % name,string.decode('utf-8'),1)
except Exception,msg:
break
# from url
while 1:
i0 = tmpl.find('{url:') + 5
i1 = tmpl.find('/}',i0)
if (i0 == 4) or (i1 == -1):
break
url = tmpl[i0:i1]
try:
req = urllib2.Request(url)
handle = urllib2.urlopen(req)
source = handle.read()
tmpl = tmpl.replace('{url:%s/}' % url,source.decode('utf-8'),1)
except Exception,msg:
# msg
break
# categories links
while 1:
i0 = tmpl.find('{links:doors:') + 13
i1 = tmpl.find('{/links}',i0)
if (i0 == 12) or (i1 == -1):
break
i2 = tmpl.find('}',i0)
count = int(tmpl[i0:i2])
body = tmpl[i2+1:i1]
replaced, no = '', 0
for key in doorways:
if no > count:
break
new = body.replace('{url}',makeUrl('index',key)).replace('{anchor}',key)
new = new.replace('{anchor:up}',upfirst(key)).replace('{num}',str(no+1))
replaced += new
no += 1
tmpl = tmpl.replace('{links:doors:%s}%s{/links}' % (count,body),replaced)
#
# local links
while 1:
i0 = tmpl.find('{links:pages:') + 13
i1 = tmpl.find('{/links}',i0)
if (i0 == 12) or (i1 == -1):
break
i2 = tmpl.find('}',i0)
count = int(tmpl[i0:i2])
first = randint(0,max([len(relatives)-count,0]))
links = []
try:
if first < 0: first = 0
links = relatives[first:first+count]
except:
links = relatives
shuffle(links)
body = tmpl[i2+1:i1]
replaced, no = '', 0
for key in links:
new = body.replace('{url}',makeUrl(key)).replace('{anchor}',key)
new = new.replace('{anchor:up}',upfirst(key)).replace('{num}',str(no+1))
replaced += new
no += 1
tmpl = tmpl.replace('{links:pages:%s}%s{/links}' % (count,body),replaced)
#
try:
pagId = relatives.index(pageName)
if pagId == len(relatives) - 1:
pagNext = relatives[0]
else:
pagNext = relatives[pagId+1]
if pagId == 0:
pagBefore = relatives[len(relatives) - 1]
else:
pagBefore = relatives[pagId-1]
tmpl = tmpl.replace('{link:next}',makeUrl(pagNext)).replace('{link:before}',makeUrl(pagBefore))
tmpl = tmpl.replace('{relative:next}',pagNext).replace('{relative:before}',pagBefore)
except:
pass
tmpl = tmpl.replace('{link:rss}',makeUrl('rss'))
tmpl = tmpl.replace('{link:index:door}',makeUrl('index'))
tmpl = tmpl.replace('{link:index:group}',makeUrl('main'))
tmpl = tmpl.replace('{link:map}',makeUrl('map'))
#
while 1:
i0 = tmpl.find('{relative:') + 10
i1 = tmpl.find('/}',i0)
if (i0 == 9) or (i1 == -1):
break
no = int(tmpl[i0:i1])
try:
tmpl = tmpl.replace('{relative:%s/}' % no,relatives[no])
except:
tmpl = tmpl.replace('{relative:%s/}' % no,relatives[0])
while 1:
i0 = tmpl.find('{doorname:') + 10
i1 = tmpl.find('/}',i0)
if (i0 == 9) or (i1 == -1):
break
no = int(tmpl[i0:i1])
try:
tmpl = tmpl.replace('{doorname:%s/}' % no,doorways[no])
except:
tmpl = tmpl.replace('{doorname:%s/}' % no,doorName)
# date and time
while 1:
i0 = tmpl.find('{date:') + 6
i1 = tmpl.find('/}',i0)
if (i0 == 5) or (i1 == -1):
break
format = tmpl[i0:i1]
try:
tmpl = tmpl.replace('{date:%s/}' % format,time.strftime(format))
except Exception,msg:
# msg
break
# text
while 1:
i0 = tmpl.find('{text:') + 6
i1 = tmpl.find('/}',i0)
if (i0 == 5) or (i1 == -1):
break
mode,count,fq = tmpl[i0:i1].split(':')
tmpl = tmpl.replace('{text:%s:%s:%s/}' % (mode,count,fq),genText(int(count),mode,int(fq)),1)
# use memory
while 1:
i0 = tmpl.find('{setvar:') + 8
i1 = tmpl.find('/}',i0)
if (i0 == 7) or (i1 == -1):
break
key,value = tmpl[i0:i1].split(':')
next = tmpl.find('{setvar:%s' % key,i0)
while 1:
if next == -1:
i3 = tmpl.find('{usevar:%s}' % key,i0)
else:
i3 = tmpl.find('{usevar:%s}' % key,i0,next)
if i3 == -1:
tmpl = tmpl.replace('{setvar:%s:%s/}' % (key,value),'',1)
break
tmpl = tmpl.replace('{usevar:%s}' % key, value,1)
# rss
while 1:
i0 = tmpl.find('{rss:') + 5
i1 = tmpl.find('{/rss}',i0)
if (i0 == 5) or (i1 == -1):
break
i2 = tmpl.find('}',i0)
url,lim = tmpl[i0:i2].split(':')
rss = getRSS('http://%s' % url)
body = tmpl[i2+1:i1]
replaced,num = '',1
for item in rss:
if num > int(lim):
break
new = body.replace('{title}',matchOne(item,'',''))
new = new.replace('{link}',matchOne(item,'',''))
new = new.replace('{description}',matchOne(item,'',''))
replaced += new.replace('{num}','%s' % num)
num += 1
tmpl = tmpl.replace('{rss:%s:%s}%s{/rss}' % (url,lim,body),replaced,1)
# snipets
while 1:
i0 = tmpl.find('{snipets:') + 9
i1 = tmpl.find('{/snipets}',i0)
if (i0 == 8) or (i1 == -1):
break
i2 = tmpl.find('}',i0)
word,lim = tmpl[i0:i2].split(':')
snipets = getSnipets(word)
shuffle(snipets)
body = tmpl[i2+1:i1]
replaced,num = '',1
for item in snipets:
if num > int(lim):
break
new = body.replace('{title}',item['title'])
new = new.replace('{link}',item['link'])
new = new.replace('{discription}',item['discr'])
replaced += new.replace('{num}','%s' % num)
num += 1
tmpl = tmpl.replace('{snipets:%s:%s}%s{/snipets}' % (word,lim,body),replaced,1)
while 1:
i0 = tmpl.find('{translate:') + 11
i1 = tmpl.find('{/translate}',i0)
if (i0 == 10) or (i1 == -1):
break
i2 = tmpl.find('}',i0)
lang = tmpl[i0:i2]
body = tmpl[i2+1:i1]
tmpl = tmpl.replace('{translate:%s}%s{/translate}' % (lang,body),translate(body,lang),1)
return tmpl
def matchUpd(tmpl):
i0 = tmpl.find('{update}') + 8
i1 = tmpl.find('{/update}',i0)
zone = tmpl[i0:i1]
new = zone.replace('{','<<').replace('}','>>')
tmpl = tmpl.replace('{update}%s{/update}' % zone,'{update}%s{/update}' % new)
return tmpl
def delUpd(tmpl):
i0 = tmpl.find('{update}')
i1 = tmpl.find('{/update}',i0) + 9
return tmpl.replace(tmpl[i0:i1],'')
def reUpd(tmpl):
i0 = tmpl.find('{update}') + 8
i1 = tmpl.find('{/update}',i0)
zone = tmpl[i0:i1]
new = zone.replace('<<','{').replace('>>','}')
new = useTmpl(new)
tmpl = tmpl.replace('{update}%s{/update}' % zone, '{update}%s{/update} \n %s' % (zone,new))
return tmpl
def mapTmpl(tmpl):
mymap = ''
i0 = tmpl.find('{map}') + 5
i1 = tmpl.find('{/map}',i0)
body = tmpl[i0:i1]
for key in relatives:
mymap += body.replace('{url}',makeUrl(key)).replace('{anchor}',key)
tmpl = tmpl.replace('{map}%s{/map}' % body,mymap)
return tmpl
def indexTmpl(tmpl,category='general'):
# global pageName, doorName, groupName
while 1:
i0 = tmpl.find('{last:') + 6
i1 = tmpl.find('{/last}',i0)
if (i0 == 5) or (i1 == -1):
break
i2 = tmpl.find('}',i0)
count, body = int(tmpl[i0:i2]), tmpl[i2+1:i1]
handle = open('last.xml')
source = handle.read()
handle.close()
pages = matchAll(source,'','')
pages.reverse()
if count > len(pages):
mycount = len(pages)
else:
mycount = count
last, no = '', 1
for page in pages:
if no > mycount:
break
if category != 'general':
if category != getTag(page,'category'):
continue
new = body.replace('{title}',getTag(page,'title'))
new = new.replace('{link}',getTag(page,'link'))
new = new.replace('{category}',getTag(page,'category'))
new = new.replace('{description}',getTag(page,'description'))
new = new.replace('{num}',str(no))
i0 = new.find('{date:') + 6
i1 = new.find('/}',i0)
format = new[i0:i1]
new = new.replace('{date:%s/}' % format,time.strftime(format,time.localtime(int(getTag(page,'date')))))
last += new
no += 1
tmpl = tmpl.replace('{last:%s}%s{/last}' % (count,body),last)
return tmpl
def upLast(source):
# global pageName, doorName, groupName
i0 = source.find(' len(sents):
limit = len(sents)
else:
limit = int(cfg['tolent'])
title = upfirst(pageName)
try:
preview = ' '.join(sents[1:limit]).decode('utf-8')
except:
return 0
data = '''
%s
%s
%s
%s
%s ...
''' % (title,int(time.time()),makeUrl(pageName,doorName),doorName,preview)
handle = open('last.xml','a')
handle.write(data.encode('utf-8'))
handle.close()
#
task = fromXml('system/task.xml')
cfg = fromXml('system/config.xml')
groupName = task['group'].decode('utf-8')
dictry = text.dictry
ua = getUA()
def application(environ, start_response):
global pageName, doorName, doorways, doornames, relatives, pagenames
query = {}
for key, value in parse_qs(environ['QUERY_STRING'], '').iteritems():
query[key] = value[0]
try:
major = query['d']
except:
major = 'index'
try:
minor = query['q']
except:
minor = 'index'
#
if major == 'index':
relatives = []
doorways, doornames = group.infDoor(groupName.encode('utf-8'))
pageName = doorName = groupName
tmplSrc = readTmpl('main')
pageSrc = useTmpl(indexTmpl(tmplSrc))
update = False
elif major == 'rss':
relatives = []
doorways, doornames = group.infDoor(groupName.encode('utf-8'))
pageName = doorName = groupName
tmplSrc = readTmpl('rss')
pageSrc = useTmpl(indexTmpl(tmplSrc))
update = False
else:
update = True
valid, doorways, doornames = group.isItValid(groupName.encode('utf-8'),major)
if valid:
doorName = doorways[doornames.index(major)]
else:
pageSrc = notFound()
update = False
#
if minor == 'index':
relatives, pagenames = core.infKwd(doorName.encode('utf-8'))
pageName = relatives[0]
tmplSrc = readTmpl('index')
tmplSrc = indexTmpl(tmplSrc,doorName)
pageSrc = useTmpl(tmplSrc)
elif minor == 'map':
relatives, pagenames = core.infKwd(doorName.encode('utf-8'))
pageName = relatives[0]
tmplSrc = readTmpl('map')
pageSrc = mapTmpl(useTmpl(tmplSrc))
else:
valid,relatives,pagenames = core.isItValid(doorName.encode('utf-8'),minor)
if valid:
change = False
pageName = relatives[pagenames.index(minor)]
if core.isItActive(doorName.encode('utf-8'),pageName.encode('utf-8')):
pageSrc = readCache()
if core.isItUpd(doorName.encode('utf-8'),pageName.encode('utf-8')) == 1:
change = True
#openDict(cfg['dict'])
pageSrc = reUpd(pageSrc)
core.updKwd(doorName.encode('utf-8'),pageName.encode('utf-8'),0)
else:
change = True
#openDict(cfg['dict'])
pageSrc = readTmpl()
pageSrc = useTmpl(matchUpd(pageSrc))
core.activeKwd(doorName.encode('utf-8'),pageName.encode('utf-8'))
upLast(pageSrc)
if change == True:
addCache(pageSrc)
core.setLast(doorName.encode('utf-8'),pageName.encode('utf-8'))
if randint(0,100) < int(cfg['toping']):
try:
initPinger()
except:
pass
else:
pageSrc = notFound()
update = False
##
if update:
doorName = doorName.encode('utf-8')
pageName = pageName.encode('utf-8')
core.incrReq(doorName,pageName)
if fmod(core.getAllReq(doorName),int(cfg['ufnum'])) == 0:
core.unFreeze(doorName,randint(int(cfg['ufmin']),int(cfg['ufmax'])))
hours = (int(time.time()) - core.getAllLast(doorName)) / 3600.0
if hours > float(cfg['ufhrs']):
core.unFreeze(doorName,randint(int(cfg['ufmin']),int(cfg['ufmax'])))
if fmod(core.getReq(doorName,pageName),int(cfg['updnum'])) == 0:
core.updKwd(doorName,pageName,1)
hours = (int(time.time()) - core.getLast(doorName,pageName)) / 3600.0
if hours > float(cfg['updhrs']):
core.updKwd(doorName,pageName,1)
pageSrc = delUpd(pageSrc)
#
start_response('200 OK', [('Content-Type', 'text/html')])
#
path_info = environ.get('PATH_INFO', '')
return [pageSrc.encode('utf-8')]