import swda # this script requires C Potts switchboard corpus reader and his annotated data

def findTurn(utterances,i):
    # this function takes a list of utterances and an index.
    # it computes the turn the utterance lies in.
    # backchannels and fragments are not considered turn-changes.
    
    # this works as follows:
    # we go as far backwards and forwards as we can without changing speaker
    # (unless the other speaker is uttering a backchannel, then go on anyway)

    # sometimes, i is itself a non-turn-taking utterance
    # then we move forward until we find a proper turn
    tmp = i
    while True:
        try:
            if utterances[tmp].damsl_act_tag() in ['b','x']:
                tmp += 1
            else:
                break
        except IndexError:
            break

    # i = tmp
    utt = utterances[i]
    speaker = utt.caller

    start = i
    end = i+1
    length = 1
    
    # go forward
    while True:
        try:
            if utterances[end].caller == speaker:
                end += 1
                length += 1
            elif utterances[end].damsl_act_tag() in ['b','x']:
                end += 1
            else:
                break
        except IndexError:
            break

    # go backward
    while True:
        try:
            if utterances[start-1].caller == speaker:
                start -= 1
                length += 1
            elif utterances[start-1].damsl_act_tag() in ['b','x']:
                start -= 1
            else:
                break
        except IndexError:
            break

    # these two ensure that start and end-1 are actually
    # spoken by the speaker of the turn
    while True:
        try:
            if not utterances[start].caller == speaker:
                start += 1
            else:
                break
        except IndexError:
            break

    while True:
        try:
            if not utterances[end-1].caller == speaker:
                end -= 1
            else:
                break
        except IndexError:
            break

    # note that utterances[end] is the first utterance *not* part of the turn.
    # also note that length is *not* end-start, because length only counts the
    # utterances by the turn-speaker (i.e. it does not count backchannels).
    return (start,end,length)

def getWords(utt):
    # this takes all actual words (no annotations etc.)
    # from an utterance
    text = utt.text.split(" ") # tokenize the utterance
    text2 = []
    for word in text:
        word = word.strip()
        # Remove non-words:
        if not word in ['#','/','[',']','}','--',',','-/','+','-','((','))']:
            if not (word.startswith('{') or
                    word.startswith('<') or word.endswith('>')
                    or word.startswith('(')):
                if word:
                    text2.append(word)
    text2 = [word.lower() for word in text2]
    text2 = [word.strip().strip(',.-#') for word in text2]
    return text2

def printSegment(utterances,i,j,mark):
    # this can be used to neatly print a segment (debugging purposes)
    # it prints the segment from i to j and
    # puts a marking arrow at the index "mark"
    for k in range(i,j):
        #print utterances[k].damsl_act_tag(),utterances[k].caller, utterances[k].text
        if k == mark:
            print "-->", utterances[k].caller+":", utterances[k].text
        else:
            print utterances[k].caller+":", utterances[k].text

def num_turns(transcript,speaker):
    # finds how many turns in total are spoken by speaker
    n = 0
    i = 0
    # this iterates the corpus *by turn*
    while i < len(transcript.utterances):
        turn = findTurn(transcript.utterances,i)
        i = turn[1] #jump to the end of the turn
        n += 1
    return n

def base_frequency(transcript,speaker,feature):
    n = 0
    i = 0
    # this iterates the corpus *by turn* again
    while i < len(transcript.utterances):
        turn = findTurn(transcript.utterances,i)
        i = turn[1]
        found = False
        # check the whole turn for an occurrence of the marker
        for j in range(turn[0],turn[1]):
            utt = transcript.utterances[j]
            if utt.caller == speaker:
                words = getWords(utt)
                for word in words:
                    if word in feature:
                        found = True
                        break
                if found: #if we found one, we are done with this turn
                    n += 1
                    break
    return 1.0*n # multiplying by 1.0 makes this return a float

def coordination(transcript,speaker,feature):
    # this crawls the corpus in turn-tuples (t1,t2)
    # where t2 is spoken by "speaker"
    # it counts how often "feature" appears in both
    # t1 and t2 simultaneously

    # this function might not be the most clever way to do it.
    # turns can be very long, so you might be overestimating
    # alignments, in particular when the features are very
    # common words.

    n = 0
    i = 0

    # this iterates the corpus *by turn*
    while i < len(transcript.utterances):
        turn = findTurn(transcript.utterances,i)
        found1 = False #marks if marker in previous turn
        found2 = False #marks if marker in current turn
        for j in range(turn[0],turn[1]):
            #check the whole turn for the occurrence of the marker
            utt = transcript.utterances[j]
            # this turn is the one of the other speaker
            if not (utt.caller == speaker):
                words = getWords(utt)
                for word in words:
                    if word in feature:
                        found1 = True
                        break
                if found1:
                    break
        # this might have been the last turn
        if not turn[1] < len(transcript.utterances):
            break

        # find the next turn, i.e., find the turn at the end of the current one
        turn = findTurn(transcript.utterances,turn[1])
        for j in range(turn[0],turn[1]): #check the whole turn for the occurrence of the marker
            utt = transcript.utterances[j]
            if utt.caller == speaker:
                words = getWords(utt)
                for word in words:
                    if word in feature:
                        found2 = True
                        break
                if found2:
                    break
        if found1 and found2:
            n += 1
        i = turn[1]
    return 1.0*n #return a float


def alignment(transcript,speaker1,speaker2,feature):
    # follows Echoes of Power, equation (1)
    # this is C^m(b,a) for m = feature, b = speaker2, a = speaker1
    # i.e. the coordination of speaker2 towards speaker1

    # feel free to modify this to other alignment measures

    # the population in the probababilistic space is the set of all
    # tuples (t1,t2) of turns, where t2 is spoken by speaker2

    # if you make modifications where the population is something else
    # take great care to modify the math here accordingly!

    # this is the size of the population
    n2 = num_turns(transcript,speaker2)

    # if the first turn is by speaker2, then there is no tuple
    # with that turn in the population
    if transcript.utterances[0].caller == speaker2:
        n2 -= 1

    # compute how often the speaker of t1 uses feature
    # this is E^m_u1
    base1 = base_frequency(transcript,speaker1,feature)/n2

    # compute how often the speaker2 uses the feature in reply to speaker1
    # this is E^m_u2->u1 (there are only 2 speakers, so everything counts)
    # (I mean that the "in reply to" doesn't mean anything in this setting)
    base2 = base_frequency(transcript,speaker2,feature)/n2

    # compute the conditional probability P(E^m_u2->u1|E^m_u1)
    # conditional probability: P(A|B) = P(A * B)/P(B)
    # i.e. the probability that A and B appear together over the
    # base probability that B appears
    # the function "coordination" computes how often the feature
    # is used by *both* speakers in the same tuple
    con = coordination(transcript,speaker2,feature)/(n2*base1)

    # alignment = how often the speaker uses the feature conditioned on the
    # previous speaker using it, minus how often he uses it generally
    return con - base2

corpus = swda.CorpusReader('swda')

# if you use multiple features it is much more clever to read this from a file
# this is for demonstration purposes only
feature_pp = ["i","you","he","they","she","we","who","them","him","me","her","us","himself","themselves","someone","herself","anyone","everyone","whom","myself","each other","yourself","no one","somebody","nobody","everybody","anybody","his","mine","ourselves","yours","hers","no-one","ours","theirs","his","their","her","my","your","our","one another"]

# simply iterates over all transcripts in the corpus and computes alignment

for transcript in corpus.iter_transcripts(display_progress=False):
    print alignment(transcript,"A","B",feature_pp)
    # sanity check:
    # if this is not something between -1 and 1, you've done something wrong

# you probably should do some statistical analysis here

## this just crawls the whole corpus:
# for transcript in corpus.iter_transcripts(display_progress=False):
#     for i in range(len(transcript.utterances)):
#         utt = transcript.utterances[i]
#         pre = transcript.utterances[i-1]
# 
#         tag = utt.damsl_act_tag()[:2]
#         if tag in ["ar"] and not (pre.damsl_act_tag().startswith("q")) and not (utt.caller == pre.caller):
#             if utt.text.strip().lower().startswith("no"):
#                 continue
#             for j in [i-2,i-1,i,i+1]:
#                 try:
#                     c = transcript.utterances[j]
#                     print c.caller, c.text.strip()
#                 except:
#                     pass
#             print ""