#!/usr/bin/env python # -- coding: utf-8 -- import sys, os, site myPath = os.path.dirname(os.path.abspath(__file__)) os.chdir(myPath) sys.path.append(myPath) ## import urllib2, time, re from random import choice, randint, shuffle from math import fmod from cStringIO import StringIO from gzip import GzipFile from urllib import urlencode import urllib import core, group, text from cgi import parse_qs try: import sqlite3 except ImportError: from pysqlite2 import dbapi2 as sqlite3 try: import cgitb cgitb.enable() except ImportError: sys.stderr = sys.stdout def quote(word): try: return urllib.quote(word) except: return urllib.quote(word.encode('utf-8')) def upfirst(string): return string.replace(string[0],string[0].upper(),1) def dwfirst(string): return string.replace(string[0],string[0].lower(),1) def getUrl(url): req = urllib2.Request(url) req.add_header('User-Agent',choice(ua)) req.add_header('Accept-Encoding','gzip, deflate') try: page = urllib2.urlopen(req).read() except Exception, msg: return '' try: return GzipFile('','r',0,StringIO(page)).read() except Exception, msg: return page def matchAll(where,start,end): i1 = 0 result = [] while 1: i0 = where.find(start,i1) i1 = where.find(end,i0) if (i0 == -1) or (i1 == -1): break result.append(where[i0+len(start):i1]) return result def matchOne(where,start,end): i0 = where.find(start) i1 = where.find(end,i0+len(start)) if (i0 == -1) or (i1 == -1): return '' return where[i0+len(start):i1] def fromXml(name='config.xml'): xmlCfg = {} handle = open(name) for line in handle: key = matchOne(line,'<','>') value = matchOne(line,'<%s>' % key,'' % key) xmlCfg[key] = value return xmlCfg def getTag(where,tag): return matchOne(where,'<%s>' % tag,'' % tag).decode('utf-8') def getUA(): handle = open('system/ua.txt') ua = [] for line in handle: line = line.replace('\n','').replace('\r','') ua.append(line) handle.close() return ua def initPinger(): cmd = 'python ping.py "%s" "%s" &> /dev/null &' % (pageName.encode('utf-8'),makeUrl(pageName)) os.system(cmd) def whatIsIt(it): length = len(it) words = it.split(' ') count = len(words) wlen = 0.0 for word in words: wlen += len(word) avglen = wlen / count w = 0 if it.find(' to ') > -1: w += 1 if avglen > 4: w += 1 if count > 3: w += 1 if w > 1: return 1 else: return 0 def check(word): no = randint(0,100) tags = cfg['tags'].split(',') if no <= int(cfg['check']): tag = choice(tags) word = '<%s>%s' % (tag,word,tag) return word def keyInTo(sent,mode): # global pageName, doorName, groupName if mode == 'relative': keyword = choice(relatives) if mode == 'simply': keyword = pageName if mode == 'links': tmp = choice(relatives) keyword = '%s' % (makeUrl(tmp),upfirst(tmp)) rate = whatIsIt(tmp) else: rate = whatIsIt(keyword) if rate == 0: no = randint(0,100) if no > 50: sent = sent.replace('.',', %s.' % check(keyword).lower()) else: sent = '%s, %s' % (check(upfirst(keyword)), sent) else: no = randint(0,100) if no <= 25: sent = '%s: %s' % (check(upfirst(keyword)), sent) if 25 > no >= 50: sent = sent.replace('.','; %s.' % check(keyword).lower()) if 50 > no >= 75: sent = sent.replace('.',' - %s.' % check(keyword).lower()) if no > 75: sent = sent.replace('.','. %s.' % check(upfirst(keyword))) return sent def genText(count,mode,fq): # global pageName, doorName, groupName result, no = '', 0 for i in xrange(0,count): sent = choice(text.dictry['SNTS']) if fmod(no,fq) == 0: sent = keyInTo(sent,mode) while 1: i0 = sent.find('{') + 1 i1 = sent.find('}',i0) if i0 == 0 or i1 == -1: break macro = sent[i0:i1] #try: sent = sent.replace('{%s}' % macro, choice(text.dictry[macro]).lower(), 1) #except: # # macro, dictry[macro] # #sys.exit(0) result += '%s \n' % upfirst(sent) no += 1 result = result.replace(' ,',',') return result def notFound(): return readTmpl(mode='404') def readCache(): # global pageName, doorName, groupName try: cid = quote('%s&%s' % (doorName,pageName)) handle = open('cache/%s' % cid) page = handle.read() handle.close() return page.decode('utf-8') except Exception,msg: pass def addCache(source): # global pageName, doorName, groupName try: cid = quote('%s&%s' % (doorName,pageName)) handle = open('cache/%s' % cid,'w') handle.write(source.encode('utf-8')) handle.close() except Exception,msg: pass def readTmpl(mode='page'): try: handle = open('system/%s.xml' % (mode)) source = handle.read() handle.close() return source.decode('utf-8') except Exception,msg: pass def makeUrl(page,doorway='',domain=''): if domain == '': domain = task['domain'] if doorway == '': try: doorway = doornames[doorways.index(doorName)] except: pass else: doorway = doornames[doorways.index(doorway)] url = cfg['urlmask'].replace('{domain}',domain) if page == 'main': url = url.replace('{doorway}',cfg['mainname']) url = url.replace('{page}','') url = url[:len(url)-1] elif page == 'rss': url = url.replace('{doorway}',cfg['rssname']) url = url.replace('{page}','') url = url[:len(url)-1] elif page == 'map': url = url.replace('{doorway}',doorway) url = url.replace('{page}',cfg['mapname']) elif page == 'index': url = url.replace('{doorway}',doorway) url = url.replace('{page}',cfg['indexname']) else: url = url.replace('{doorway}',doorway) url = url.replace('{page}',pagenames[relatives.index(page)]) return url def getRSS(url): source = getUrl(url) rss = matchAll(source,'','') items = [] for item in rss: item = item.decode('utf-8') item = item.replace('<','<').replace('>','>') item = item.replace('<','<') item = item.replace('&','&') item = item.replace('','') items.append(item) return items def getSnipets(word): url = 'http://www.google.com/search?hl=en&q=%s&num=100&lr=lang_%s' % (quote(word),cfg['lang']) source = getUrl(url) items = matchAll(source,'
','
') snipets = [] for item in items: item = item.decode('utf-8') item = item.replace('','').replace('

','') link = matchOne(item,'','') if title == '': title = matchOne(item,'>','') discr = matchOne(item,'
','
') snipets.append({'link':link,'discr':discr,'title':title}) return snipets def translate(text,into): result = u"" url = 'http://www.google.com/translate_t?sl=auto&tl=%s' % into data = {'hl':'en','ie':'UTF8','text':text,'sl':'auto','tl':into} req = urllib2.Request(url,urlencode(data)) req.add_header('Host','www.google.com') req.add_header('User-Agent',choice(ua)) req.add_header('Accept-Encoding','gzip, deflate') req.add_header('Referer',url) try: page = urllib2.urlopen(req).read() except Exception, msg: return '' try: source = GzipFile('','r',0,StringIO(page)).read() except Exception, msg: source = page result = matchOne(source,'
','
') result = result.decode('utf-8') return result.replace('<','<').replace('>','>').replace('
','') def useTmpl(tmpl): # global pageName, doorName, groupName ## loop b = 0 while 1: i0 = tmpl.find('{loop:') + 6 i1 = tmpl.find('{/loop}',b) if (i0 == 5) or (i1 == b-1): break i2 = tmpl.find('}',i0) mi,ma = tmpl[i0:i2].split(':') count = randint(int(mi),int(ma)) body = tmpl[i2+1:i1] c1 = body.count('{loop:') c2 = body.count('{/loop}') if c1 != c2: b = i1 + 7 continue replaced = '' for i in xrange(count): replaced += body.replace('{num}',str(i+1)) tmpl = tmpl.replace('{loop:%s:%s}%s{/loop}' % (mi,ma,body),replaced) b = 0 ## tmpl = tmpl.replace('{keyword}',pageName) tmpl = tmpl.replace('{keyword:quote}',quote(pageName)) tmpl = tmpl.replace('{keyword:up}',upfirst(pageName)) ## tmpl = tmpl.replace('{mainword}',doorName) tmpl = tmpl.replace('{mainword:quote}',quote(doorName)) tmpl = tmpl.replace('{mainword:up}',upfirst(doorName)) ## tmpl = tmpl.replace('{groupname}',groupName) tmpl = tmpl.replace('{groupname:quote}',quote(groupName)) tmpl = tmpl.replace('{groupname:up}',upfirst(groupName)) tmpl = tmpl.replace('{domain}',task['domain']) ## random keyword while 1: i0 = tmpl.find('{randword') if i0 == -1: break tmpl = tmpl.replace('{randword}',choice(relatives),1) tmpl = tmpl.replace('{randword:up}',upfirst(choice(relatives)),1) ## random number while 1: i0 = tmpl.find('{rand:') + 6 i1 = tmpl.find('}',i0) if (i0 == 5) or (i1 == -1): break mi,ma = tmpl[i0:i1].split(':') tmpl = tmpl.replace('{rand:%s:%s}' % (mi,ma), str(randint(int(mi),int(ma))),1) # random choice while 1: i0 = tmpl.find('{choice:') + 8 i1 = tmpl.find('/}',i0) if (i0 == 7) or (i1 == -1): break cortege = tmpl[i0:i1].split('.:') tmpl = tmpl.replace('{choice:%s/}' % tmpl[i0:i1],choice(cortege),1) # from file while 1: i0 = tmpl.find('{file:') + 6 i1 = tmpl.find('/}',i0) if (i0 == 5) or (i1 == -1): break name = tmpl[i0:i1] try: handle = open(name) string = choice(handle.readlines()).replace('\n','').replace('\r','') handle.close() tmpl = tmpl.replace('{file:%s/}' % name,string.decode('utf-8'),1) except Exception,msg: break # from url while 1: i0 = tmpl.find('{url:') + 5 i1 = tmpl.find('/}',i0) if (i0 == 4) or (i1 == -1): break url = tmpl[i0:i1] try: req = urllib2.Request(url) handle = urllib2.urlopen(req) source = handle.read() tmpl = tmpl.replace('{url:%s/}' % url,source.decode('utf-8'),1) except Exception,msg: # msg break # categories links while 1: i0 = tmpl.find('{links:doors:') + 13 i1 = tmpl.find('{/links}',i0) if (i0 == 12) or (i1 == -1): break i2 = tmpl.find('}',i0) count = int(tmpl[i0:i2]) body = tmpl[i2+1:i1] replaced, no = '', 0 for key in doorways: if no > count: break new = body.replace('{url}',makeUrl('index',key)).replace('{anchor}',key) new = new.replace('{anchor:up}',upfirst(key)).replace('{num}',str(no+1)) replaced += new no += 1 tmpl = tmpl.replace('{links:doors:%s}%s{/links}' % (count,body),replaced) # # local links while 1: i0 = tmpl.find('{links:pages:') + 13 i1 = tmpl.find('{/links}',i0) if (i0 == 12) or (i1 == -1): break i2 = tmpl.find('}',i0) count = int(tmpl[i0:i2]) first = randint(0,max([len(relatives)-count,0])) links = [] try: if first < 0: first = 0 links = relatives[first:first+count] except: links = relatives shuffle(links) body = tmpl[i2+1:i1] replaced, no = '', 0 for key in links: new = body.replace('{url}',makeUrl(key)).replace('{anchor}',key) new = new.replace('{anchor:up}',upfirst(key)).replace('{num}',str(no+1)) replaced += new no += 1 tmpl = tmpl.replace('{links:pages:%s}%s{/links}' % (count,body),replaced) # try: pagId = relatives.index(pageName) if pagId == len(relatives) - 1: pagNext = relatives[0] else: pagNext = relatives[pagId+1] if pagId == 0: pagBefore = relatives[len(relatives) - 1] else: pagBefore = relatives[pagId-1] tmpl = tmpl.replace('{link:next}',makeUrl(pagNext)).replace('{link:before}',makeUrl(pagBefore)) tmpl = tmpl.replace('{relative:next}',pagNext).replace('{relative:before}',pagBefore) except: pass tmpl = tmpl.replace('{link:rss}',makeUrl('rss')) tmpl = tmpl.replace('{link:index:door}',makeUrl('index')) tmpl = tmpl.replace('{link:index:group}',makeUrl('main')) tmpl = tmpl.replace('{link:map}',makeUrl('map')) # while 1: i0 = tmpl.find('{relative:') + 10 i1 = tmpl.find('/}',i0) if (i0 == 9) or (i1 == -1): break no = int(tmpl[i0:i1]) try: tmpl = tmpl.replace('{relative:%s/}' % no,relatives[no]) except: tmpl = tmpl.replace('{relative:%s/}' % no,relatives[0]) while 1: i0 = tmpl.find('{doorname:') + 10 i1 = tmpl.find('/}',i0) if (i0 == 9) or (i1 == -1): break no = int(tmpl[i0:i1]) try: tmpl = tmpl.replace('{doorname:%s/}' % no,doorways[no]) except: tmpl = tmpl.replace('{doorname:%s/}' % no,doorName) # date and time while 1: i0 = tmpl.find('{date:') + 6 i1 = tmpl.find('/}',i0) if (i0 == 5) or (i1 == -1): break format = tmpl[i0:i1] try: tmpl = tmpl.replace('{date:%s/}' % format,time.strftime(format)) except Exception,msg: # msg break # text while 1: i0 = tmpl.find('{text:') + 6 i1 = tmpl.find('/}',i0) if (i0 == 5) or (i1 == -1): break mode,count,fq = tmpl[i0:i1].split(':') tmpl = tmpl.replace('{text:%s:%s:%s/}' % (mode,count,fq),genText(int(count),mode,int(fq)),1) # use memory while 1: i0 = tmpl.find('{setvar:') + 8 i1 = tmpl.find('/}',i0) if (i0 == 7) or (i1 == -1): break key,value = tmpl[i0:i1].split(':') next = tmpl.find('{setvar:%s' % key,i0) while 1: if next == -1: i3 = tmpl.find('{usevar:%s}' % key,i0) else: i3 = tmpl.find('{usevar:%s}' % key,i0,next) if i3 == -1: tmpl = tmpl.replace('{setvar:%s:%s/}' % (key,value),'',1) break tmpl = tmpl.replace('{usevar:%s}' % key, value,1) # rss while 1: i0 = tmpl.find('{rss:') + 5 i1 = tmpl.find('{/rss}',i0) if (i0 == 5) or (i1 == -1): break i2 = tmpl.find('}',i0) url,lim = tmpl[i0:i2].split(':') rss = getRSS('http://%s' % url) body = tmpl[i2+1:i1] replaced,num = '',1 for item in rss: if num > int(lim): break new = body.replace('{title}',matchOne(item,'','')) new = new.replace('{link}',matchOne(item,'','')) new = new.replace('{description}',matchOne(item,'','')) replaced += new.replace('{num}','%s' % num) num += 1 tmpl = tmpl.replace('{rss:%s:%s}%s{/rss}' % (url,lim,body),replaced,1) # snipets while 1: i0 = tmpl.find('{snipets:') + 9 i1 = tmpl.find('{/snipets}',i0) if (i0 == 8) or (i1 == -1): break i2 = tmpl.find('}',i0) word,lim = tmpl[i0:i2].split(':') snipets = getSnipets(word) shuffle(snipets) body = tmpl[i2+1:i1] replaced,num = '',1 for item in snipets: if num > int(lim): break new = body.replace('{title}',item['title']) new = new.replace('{link}',item['link']) new = new.replace('{discription}',item['discr']) replaced += new.replace('{num}','%s' % num) num += 1 tmpl = tmpl.replace('{snipets:%s:%s}%s{/snipets}' % (word,lim,body),replaced,1) while 1: i0 = tmpl.find('{translate:') + 11 i1 = tmpl.find('{/translate}',i0) if (i0 == 10) or (i1 == -1): break i2 = tmpl.find('}',i0) lang = tmpl[i0:i2] body = tmpl[i2+1:i1] tmpl = tmpl.replace('{translate:%s}%s{/translate}' % (lang,body),translate(body,lang),1) return tmpl def matchUpd(tmpl): i0 = tmpl.find('{update}') + 8 i1 = tmpl.find('{/update}',i0) zone = tmpl[i0:i1] new = zone.replace('{','<<').replace('}','>>') tmpl = tmpl.replace('{update}%s{/update}' % zone,'{update}%s{/update}' % new) return tmpl def delUpd(tmpl): i0 = tmpl.find('{update}') i1 = tmpl.find('{/update}',i0) + 9 return tmpl.replace(tmpl[i0:i1],'') def reUpd(tmpl): i0 = tmpl.find('{update}') + 8 i1 = tmpl.find('{/update}',i0) zone = tmpl[i0:i1] new = zone.replace('<<','{').replace('>>','}') new = useTmpl(new) tmpl = tmpl.replace('{update}%s{/update}' % zone, '{update}%s{/update} \n %s' % (zone,new)) return tmpl def mapTmpl(tmpl): mymap = '' i0 = tmpl.find('{map}') + 5 i1 = tmpl.find('{/map}',i0) body = tmpl[i0:i1] for key in relatives: mymap += body.replace('{url}',makeUrl(key)).replace('{anchor}',key) tmpl = tmpl.replace('{map}%s{/map}' % body,mymap) return tmpl def indexTmpl(tmpl,category='general'): # global pageName, doorName, groupName while 1: i0 = tmpl.find('{last:') + 6 i1 = tmpl.find('{/last}',i0) if (i0 == 5) or (i1 == -1): break i2 = tmpl.find('}',i0) count, body = int(tmpl[i0:i2]), tmpl[i2+1:i1] handle = open('last.xml') source = handle.read() handle.close() pages = matchAll(source,'','') pages.reverse() if count > len(pages): mycount = len(pages) else: mycount = count last, no = '', 1 for page in pages: if no > mycount: break if category != 'general': if category != getTag(page,'category'): continue new = body.replace('{title}',getTag(page,'title')) new = new.replace('{link}',getTag(page,'link')) new = new.replace('{category}',getTag(page,'category')) new = new.replace('{description}',getTag(page,'description')) new = new.replace('{num}',str(no)) i0 = new.find('{date:') + 6 i1 = new.find('/}',i0) format = new[i0:i1] new = new.replace('{date:%s/}' % format,time.strftime(format,time.localtime(int(getTag(page,'date'))))) last += new no += 1 tmpl = tmpl.replace('{last:%s}%s{/last}' % (count,body),last) return tmpl def upLast(source): # global pageName, doorName, groupName i0 = source.find(' len(sents): limit = len(sents) else: limit = int(cfg['tolent']) title = upfirst(pageName) try: preview = ' '.join(sents[1:limit]).decode('utf-8') except: return 0 data = ''' %s %s %s %s %s ... ''' % (title,int(time.time()),makeUrl(pageName,doorName),doorName,preview) handle = open('last.xml','a') handle.write(data.encode('utf-8')) handle.close() # task = fromXml('system/task.xml') cfg = fromXml('system/config.xml') groupName = task['group'].decode('utf-8') dictry = text.dictry ua = getUA() def application(environ, start_response): global pageName, doorName, doorways, doornames, relatives, pagenames query = {} for key, value in parse_qs(environ['QUERY_STRING'], '').iteritems(): query[key] = value[0] try: major = query['d'] except: major = 'index' try: minor = query['q'] except: minor = 'index' # if major == 'index': relatives = [] doorways, doornames = group.infDoor(groupName.encode('utf-8')) pageName = doorName = groupName tmplSrc = readTmpl('main') pageSrc = useTmpl(indexTmpl(tmplSrc)) update = False elif major == 'rss': relatives = [] doorways, doornames = group.infDoor(groupName.encode('utf-8')) pageName = doorName = groupName tmplSrc = readTmpl('rss') pageSrc = useTmpl(indexTmpl(tmplSrc)) update = False else: update = True ok = True valid, doorways, doornames = group.isItValid(groupName.encode('utf-8'),major) if valid: doorName = doorways[doornames.index(major)] else: #pageSrc = notFound() pageSrc = str(valid); update = False ok = False # if minor == 'index' and ok: relatives, pagenames = core.infKwd(doorName.encode('utf-8')) pageName = relatives[0] tmplSrc = readTmpl('index') tmplSrc = indexTmpl(tmplSrc,doorName) pageSrc = useTmpl(tmplSrc) elif minor == 'map' and ok: relatives, pagenames = core.infKwd(doorName.encode('utf-8')) pageName = relatives[0] tmplSrc = readTmpl('map') pageSrc = mapTmpl(useTmpl(tmplSrc)) elif ok: valid,relatives,pagenames = core.isItValid(doorName.encode('utf-8'),minor) if valid: change = False pageName = relatives[pagenames.index(minor)] if core.isItActive(doorName.encode('utf-8'),pageName.encode('utf-8')): pageSrc = readCache() if core.isItUpd(doorName.encode('utf-8'),pageName.encode('utf-8')) == 1: change = True #openDict(cfg['dict']) pageSrc = reUpd(pageSrc) core.updKwd(doorName.encode('utf-8'),pageName.encode('utf-8'),0) else: change = True #openDict(cfg['dict']) pageSrc = readTmpl() pageSrc = useTmpl(matchUpd(pageSrc)) core.activeKwd(doorName.encode('utf-8'),pageName.encode('utf-8')) upLast(pageSrc) if change == True: addCache(pageSrc) core.setLast(doorName.encode('utf-8'),pageName.encode('utf-8')) if randint(0,100) < int(cfg['toping']): try: initPinger() except: pass else: pageSrc = notFound() update = False ## if update: doorName = doorName.encode('utf-8') pageName = pageName.encode('utf-8') core.incrReq(doorName,pageName) if fmod(core.getAllReq(doorName),int(cfg['ufnum'])) == 0: core.unFreeze(doorName,randint(int(cfg['ufmin']),int(cfg['ufmax']))) hours = (int(time.time()) - core.getAllLast(doorName)) / 3600.0 if hours > float(cfg['ufhrs']): core.unFreeze(doorName,randint(int(cfg['ufmin']),int(cfg['ufmax']))) if fmod(core.getReq(doorName,pageName),int(cfg['updnum'])) == 0: core.updKwd(doorName,pageName,1) hours = (int(time.time()) - core.getLast(doorName,pageName)) / 3600.0 if hours > float(cfg['updhrs']): core.updKwd(doorName,pageName,1) if ok: pageSrc = delUpd(pageSrc) # start_response('200 OK', [('Content-Type', 'text/html')]) # path_info = environ.get('PATH_INFO', '') return [pageSrc.encode('utf-8')]