#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, re, urllib, codecs
from xml.etree import ElementTree
from BeautifulSoup import BeautifulSoup, Tag


def usage():
	print '''elevator URL topic1 topci2 ...
 - URL: the URL of the post
 - topic: just a string phrase used for search
'''

gb = codecs.lookup('gb2312')

def shred(url):
	return re.sub('&star=\d*', '', url)

def append(url, page):
	return url + '&star=%d' % page

def get_totalpage(content, post_per_page=10):
	pattern = re.compile(u'(?<=<b class="page">总数 )(?P<total>\d+)</b>', re.UNICODE)
	g = pattern.search(content)
	if g:
		return int(g.group('total')) / post_per_page + 1

def build_elevator(l, topics, url):
	content = urllib.urlopen(url).read()
	soup = BeautifulSoup(content, fromEncoding="gb2312")
	for post in soup.findAll('table', 'bbslist border'):
		doc = post.find(id=re.compile("textstyle_\d+"))
		id = re.match(u'textstyle_(?P<floor>\d+)', doc[u'id']).group('floor')
		author = post.find('span', 'username').div.font.b.string
		print u"Processing %s by %s" % (id, author)

		# no quote is allowed
		if doc.find('div', 'quote'):
			continue

		contents = unicode(doc)
		s = [ contents.find(topic) >= 0 for topic in topics]
		if any(s):
			# bingo
			m = [id, author] + [ z and 'Yes' or 'No' for z in s ]
			l.append(m)


def render_table(head, content):
	soup = BeautifulSoup('<table><tbody></tbody></table>')

	def render_tr(items):
		tr = Tag(soup, 'tr')
		for h in items:
			th = Tag(soup, 'th')
			th.append(h)
			tr.append(th)
		return tr

	soup.tbody.append(render_tr(head))

	for c in content:
		soup.tbody.append(render_tr(c))
	return soup
	
if __name__ == "__main__":
	if len(sys.argv) < 3:
		usage()
		sys.exit();

	url = shred(sys.argv[1])
	topics = sys.argv[2:]

	# load the page
	content = gb.streamreader(urllib.urlopen(url)).read()
	total_page = get_totalpage(content)
	print "Total %d pages to handle." % total_page

	l = []
	for x in range(1, total_page):
		build_elevator(l, topics, append(url, x))

	print render_table([u'楼层', u'id'] + topics, l).prettify()

