#!/usr/bin/python
# coding: utf-8

# All My Smooth Body, copyright (c) 2016 Nick Montfort <nickm@nickm.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice and
# this notice are preserved. This file is offered as-is, without any warranty.
#
# 1 page program + all of Shakespeare + for NaNoGenMo + 28 November 2016
# Typeset as monospace, do not break lines; landscape/wide page suggested
#
# Updated 31 May 2018, changed "print" for Python 2 & 3 compatibility

import re
from textblob import Word
from textblob.wordnet import Synset

frontmatter = "All My Smooth Body\n\nNick Montfort\f\n\n\n\n"
not_parts = \
 "back,backs,behind,behinds,bottom,bottoms,can,cans,frame,frames," + \
 "genus,lien,liens,middle,middles,pump,pumps,rear,seat,seats,side," + \
 "sides,stern,tail,tail end,tail ends,tails"

with open('shakespeare.txt') as complete: # pg100.txt, license info removed
    lines = complete.readlines()

for i in range(len(lines)): # strip character names, stage directions
    lines[i] = re.sub(r'(  )?([A-Z]+ )*\w+\. ', '', lines[i])
    lines[i] = re.sub(r'   +(Exit|Exeunt)\.? *', '', lines[i])
    lines[i] = re.sub(r'\[.*\]', '', lines[i])
    lines[i] = lines[i][:-1]
    if re.search(r'([A-Z][A-Z][A-Z]|      |\[|\])', lines[i]):
        lines[i] = ''

body = Synset('body.n.01') # build a list of body parts via WordNet
parts = []
for first in body.part_meronyms():
    parts += first.lemma_names()
    for second in first.part_meronyms():
        parts += second.lemma_names()
for i in range(len(parts)):
    parts.append(Word(parts[i]).pluralize())
parts = list(set(parts))
for i in range(len(parts)):
    parts[i] = re.sub(r'_', ' ', parts[i])
parts.sort()
for confusing in not_parts.split(','):
     parts.remove(confusing)
parts = '|'.join(parts)
found_parts = []
speech = ''

for line in lines: # keep lines mentioning body parts and align/smooth
    mo = re.search(r'\b(' + parts + r')\b', line)
    if mo:
        found_parts.append(mo.group(0))
        speech += ' ' * (70 - mo.start()) + line + '\n'

print(frontmatter)
print(speech[:-3] + '.\n\n')
print(' + '.join(found_parts))