/*
*   copyright 2002 Allen B. Downey
*   copyleft  2002 Allen B. Downey

*   This program is free software; you can redistribute it and/or modify
*   it under the terms of the GNU General Public License as published by
*   the Free Software Foundation; either version 2 of the License, or
*   (at your option) any later version.

*   This program is distributed in the hope that it will be useful,
*   but WITHOUT ANY WARRANTY; without even the implied warranty of
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*   GNU General Public License for more details.
*/

/* Haiku.java: this program takes one command-line argument, the
   name of a file.  It searches the file for instances of complete
   sentences that form a haiku (a three-line stanza with 5 syllables
   in the first line, seven in the second, and 5 in the last).

   It uses a file named c06d to build a hashtable of words and their
   syllable counts.  This file is from the ftp site

ftp://ftp.cs.cmu.edu/afs/cs.cmu.edu/data/anonftp/project/fgdata/dict/

   at CMU.  The idea for this comes from danny@spesh.com

http://www.oblomovka.com/code/haiku/

   which provides a similar program in Python.  I looked at his
   code before writing mine, but we use somewhat different algorithms.
   He has some capabilities I don't (like guessing the syllable
   count of unknown words).  But I have some capabilities he doesn't
   (like recognizing multiple-stanza haiku).

*/

import java.io.*;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;

public class Haiku {
    Hashtable syltab;         // table of words and syllable counts
    Vector v;                 // the current line of word pairs
    int syllables;            // number of syllables in the current line
    Vector lines;             // the accumulated lines
    BufferedReader fin;       // the file we are reading
    StringTokenizer tokens;   // tokens we have read

    // constructor: initialize instance vars
    public Haiku () {
	syltab = new Hashtable ();
	v = new Vector ();
	syllables = 0;
	lines = new Vector ();

	String filename = "c06d";
	try {
	    readDictionary (filename);
	}
	catch (Exception e) {
	    System.out.println ("Couldn't read the dictionary: " + filename);
	    System.exit (-1);
	}
    }

    // readDictionary: read c06d and build syltab
    public void readDictionary (String filename)
                throws FileNotFoundException, IOException {

	FileReader fileReader = new FileReader (filename);
	BufferedReader in = new BufferedReader (fileReader);
	
	while (true) {
	    String s = in.readLine();
	    if (s == null) break;
	    if (s.charAt(0) == '#') continue;
	    dictionaryEntry (s);
	}
    }

    // dictionaryEntry: make an entry in syltab
    public void dictionaryEntry (String s) {
        StringTokenizer st = new StringTokenizer (s);
	if (!st.hasMoreTokens ()) return;
	String word = st.nextToken().toLowerCase();

	int syls = 0;
        while (st.hasMoreTokens ()) {
            String phone = st.nextToken ();
	    if (hasDigit (phone)) syls++;
        }
	//System.out.println (word + " " + syls);
	syltab.put (word, new Integer (syls));
    }

    // hasDigit: return true if the string contains a digit
    public boolean hasDigit (String s) {
	for (int i=0; i<s.length(); i++) {
	    if (Character.isDigit (s.charAt (i))) return true;
	}
	return false;
    }

    // getPair: get the next word-syllable pair from the file
    public Pair getPair () {
	String word = getWord ();
	Integer syls = (Integer) syltab.get (clean (word));
	if (syls == null) {
	    return new Pair (word, -1);
	} else {
	    return new Pair (word, syls.intValue());
	}
    }

    // clean: clean a string by tokenizing it and taking the
    // first token that begins with a letter
    // BUG: expressions like object.method will have the syllable
    // count of the first word only
    public String clean (String s) {
	StringTokenizer st = new StringTokenizer (s, 
			     "0123456789@#$%^&*\"`'()<>[]{}.,:;?!+=/\\");
        while (st.hasMoreTokens ()) {
            String word = st.nextToken ();
	    if (Character.isLetter (word.charAt (0))) 
		return word.toLowerCase();
	}	    
	return "";
    }

    // getWord: get the next word from the file
    public String getWord () {
	while (tokens == null || tokens.hasMoreTokens() == false) {
	    tokens = getTokens ();
	}
	return tokens.nextToken ();
    }

    // getTokens: read a line from the file and tokenize it
    // BUG: gets rid of hyphens
    public StringTokenizer getTokens () {
	try {
	    String s = fin.readLine();
	    if (s == null) {
		System.exit (0);
	    }
	    return new StringTokenizer (s, " -");
	} 
	catch (Exception e) {
	    System.out.println ("I/O Error.");
	    System.exit (-1);
	}
	return null;
    }

    // shift: remove the first word from the current line
    public void shift () {
	if (v.size() > 0) {
	    Pair p = (Pair) v.remove (0);
	    syllables -= p.syls;
	}
    }

    // getSyllables: keep adding word pairs to the current line
    // until the total syllables gets to syls
    public void getSyllables (int syls) {
	while (syllables < syls) {
	    Pair p = getPair ();
	    if (p.syls == -1) {
		syllables = 0;
		v.removeAllElements();
		return;
	    } else {
		syllables += p.syls;
		v.add (p);
	    }
	}
    }

    // getForm: find the next set of words that can be assembled
    // into lines with the given form.  The form is an array of
    // integers specifying the number of syllables in each line
    public void getForm (int[] form) {

	for (int i=0; i<form.length; ) {

	    getSyllables (form[i]);

	    if (syllables == 0 ||
	        syllables > form[i] ||
                lines.size()==0 && badStart(v)) {

		// either we found a word with unknown syllables
		// or we have too many syllables
		// or this is not an acceptable first line...
		// in any case we have to start over

		shift ();
		lines.removeAllElements();
		i = 0;

	    } else {
		// the current line is good.
		// save it and start the next
		lines.add (v);
		v = new Vector ();
		syllables = 0;
		i++;
	    }
	}
    }

    // badStart: return true if this line doesn't begin with a
    // capital letter
    public boolean badStart (Vector v) {
	Pair p = (Pair) v.get(0);
	return !Character.isUpperCase (p.word.charAt(0));
    }

    // completeSentence: return true if the first line begins
    // with a capital letter and the last line ends with a period
    public boolean completeSentence () {
	Vector first = (Vector) lines.get(0);
	if (badStart (first)) return false;

	Vector last = (Vector) lines.get(lines.size()-1);
	Pair p = (Pair) last.get(last.size()-1);
	return p.word.charAt(p.word.length()-1) == '.';
    }

    // printLine: print a vector of word pairs with spaces between
    public void printLine (Vector line) {
	for (int i=0; i<line.size(); i++) {
	    Pair p = (Pair) line.get(i);
	    System.out.print (p.word + " ");
	}
	System.out.println ("");
    }

    // print: print all the lines
    public void print () {
	for (int i=0; i<lines.size(); i++) {
	    printLine ((Vector) lines.get(i));
	}
	System.out.println ("");
    }

    // getForms: get all the forms from the given file.
    // only print the ones that are complete sentences.
    public void getForms (String filename, int [] form) {
	try {
	    FileReader fileReader = new FileReader (filename);
	    fin = new BufferedReader (fileReader);
	}
	catch (Exception e) {
	    System.out.println ("Error opening file: " + filename);
	    System.exit (-1);
	}

	while (true) {
	    getForm (form);
	    if (completeSentence ()) {
		print ();
		lines.removeAllElements ();
	    }
	}
    }

    public static void main (String[] args) {

	if (args.length == 0) {
            System.out.println ("You must specify a filename.");
            System.exit (0);
        }
	String filename = args[0];

        Haiku haiku = new Haiku ();

	int[] form = {5, 7, 5};
	haiku.getForms (filename, form);
    }
}


// the pair class is used to store words and their syllable counts

class Pair {
    String word;
    int syls;

    public Pair (String word, int syls) {
	this.word = word;
	this.syls = syls;
    }

    public String toString () {
	return word + " " + syls;
    }
}
