#!/usr/bin/env python2.4
#
# Requires: python-twitter, mwclient 
#
# Download mwclient thusly:
# svn co https://mwclient.svn.sourceforge.net/svnroot/mwclient/trunk/mwclient
#
# python-twitter:
# http://code.google.com/p/python-twitter/


import urllib as ul
import urllib2 as ul2
import re
import codecs
import time
import mwclient
import twitter

from logindetails import username as identicausername
from logindtails import password as identicapassword

# constants
enwp = "en.wikipedia.org"
dyk = "Template:Did you know"
lastrevcontentsfile = "/home/briannalaugher/dyk2identica.modernthings.org/code/lastrevcontents.txt"
lastrevidfile = "/home/briannalaugher/dyk2identica.modernthings.org/code/lastrevid.txt"
redyk = re.compile("\{\{\*mp\}\}\.\.\.(.*?\?) ?\n")
renewpage = re.compile("'''(.*?)'''")
repipelink = re.compile("(\[\[(.*?)\|(.*?)\]\])")
     # capture both target page and link text
repipelink2 = re.compile("(\[\[[^[]*?\|(.*?)\]\])")
     # only capture link text
repictured = re.compile("('?'?\((?:.*?)pictured(?:.*?)\)'?'? ?)")
relink = re.compile("\[\[(.*?)\]\]")
updatedelay = 3.5*60 # how many seconds between identica updates


#############################################

def get_last_rev_info():
    f = codecs.open(lastrevcontentsfile,'r','utf-8')
    contents = f.read()
    f.close()
    g = open(lastrevidfile,'r')
    id = int(g.read().strip())
    g.close()
    return (id,contents)

def update_last_rev_info(newid,newcontents):
    f = codecs.open(lastrevcontentsfile,'w','utf-8')
    f.write(newcontents)
    f.close()
    g = open(lastrevidfile,'w')
    g.write(str(newid))
    g.close()
    return

def get_entries_from_wp(lastrevid):
    site = mwclient.Site(enwp)
    page = site.Pages[dyk]

    newestid = page.revision

    dyks = []
    ids = []

    for r in page.revisions(startid=newestid, endid=lastrevid, prop='ids|content'):
        if r['revid'] == newestid:
            newestcontent = r['*']
        if r['revid'] == lastrevid:
            # don't repeat these ones
            break
        dyks += redyk.findall(r['*'])
        ids.append(r['revid'])
    update_last_rev_info(newestid,newestcontent)
    return dyks

def fix_links(text):
    # fix any pipe links
    bits = repipelink2.split(text)
    # e.g. b = ['This is a ', '[[python string|string]]', 'string', ' with multiple ', '[[piped links|Piped links]]', 'Piped links']

    notpipedbits = [b for b in bits if not ("|" in b and "[[" in b)]
    newtext = "".join(notpipedbits)

    # now fix any regular links
    newtext = newtext.replace("[[","")
    newtext = newtext.replace("]]","")
    newtext = newtext.replace("<nowiki>","")
    newtext = newtext.replace("</nowiki>","")
    newtext = newtext.replace("&nbsp;"," ")
    return newtext

def handle_bolded_link(text):
        # stupid ships
        if text.startswith("{{") and text.endswith("}}"):
            newtext = text[2:-2].replace("|"," ")
            return (newtext, newtext)

        m = repipelink.search(text)
        if m:
            targetpage = m.groups(1)[1]
            linktext =   m.groups(1)[2]
            (start,end) = m.span()
        else:
            m2 = relink.search(text)
            if m2:
                targetpage = m2.groups(1)[0]
                linktext = targetpage
                (start,end) = m2.span()
            else:
                print text
                raise NoNewPageLinkError
        newtext = text[:start] + linktext + text[end:]
        return (newtext, targetpage)

def remove_pictured_aside(text):
        # remove a possible aside "(pictured)"
        picturebits = repictured.split(text)
        if len(picturebits) == 1:
            newsentence = picturebits[0]
        elif len(picturebits) == 3:
            newsentence = picturebits[0] + picturebits[2]
        else:
            # don't know what happened!
            print newsentence
            raise SomethingWeirdPicturedAsideError
        return newsentence

def get_link(targetpage):
        # now include the link at the end.
        link = " http://enwp.org/" + targetpage.replace(" ","_")
        # this was fixed, thanks @brion
        #if link[-1] == ")":
        #    link = link[:-1] + "%29" # need to encode closing brackets
        return link

def mw_to_plaintext(dyks):
    plaintexts = {}
    for d in dyks:
        newsentence = "DYK"
        sentparts = renewpage.split(d)
        if len(sentparts) < 3:
            print d
            raise TooManyOrFewBoldedLinksError
        if len(sentparts) >= 3:
             newsentence += fix_links(sentparts[0])
             try:
                 (newsentencemiddle, targetpage1) = handle_bolded_link(sentparts[1])
             except:
                 #print d
                 continue #raise LinkError

             newsentence += newsentencemiddle
             newsentence += fix_links(sentparts[2])
        if len(sentparts) == 5:
            try:
                (newsentence4th, targetpage2) = handle_bolded_link(sentparts[3])
            except:
                #print d
                continue #raise LinkError

            newsentence += fix_links(sentparts[4])
            link2 = get_link(targetpage2)

        newsentence = remove_pictured_aside(newsentence)
        newsentence = newsentence.replace("'''","") # bold       
        newsentence = newsentence.replace("''","") # italics

        link1 = get_link(targetpage1)
        newsentence += link1
        if len(sentparts) == 5:
            newsentence += link2

        key = targetpage1.lower()
        plaintexts[key] = newsentence

    return plaintexts



def wp_to_identica(lastrevcontents,dyks):
    lastrevdyks = redyk.findall(lastrevcontents)
    dyks = list(set(dyks)) # remove obvious dupes
    dyks = [d for d in dyks if d not in lastrevdyks]

    lastupdates = mw_to_plaintext(lastrevdyks)

    newupdates = mw_to_plaintext(dyks)

    reallynew = []
    for k in newupdates.keys():
        if k not in lastupdates.keys():
            reallynew.append(newupdates[k])

    return reallynew


def post_to_identica(updates):
    api = twitter.Api(username=identicausername, password=identicapassword)

    for update in updates:
        update = update.encode('ascii', 'ignore') #??!
        length = len(update)
        if length < 160:
            try:
                status = api.PostUpdate(update)
            except:
                print "update failed"
                print update
                raise
        else:
            update1 = update[:length/2]
            restbits = update[length/2:].split(" ",1)
            if len(restbits) == 2:
                update1 += restbits[0]
                update2 = restbits[1]
            else:
                # there are no more spaces?? that would be weird...
                update2 = restbits[0]
            update1 += "..."
            update2 = "..." + update2
            status1 = api.PostUpdate(update1)
            status2 = api.PostUpdate(update2)

        time.sleep(updatedelay)

    return

def main():
    (lastrevid,lastrevcontents) = get_last_rev_info()
    dyks = get_entries_from_wp(lastrevid)
    updates = wp_to_identica(lastrevcontents,dyks)
    post_to_identica(updates)

    return

if __name__ == "__main__":
    main()