Sentiment Analysis Background
Sentiment analysis is the process of using software to classify a piece of text into a category that reflects the opinion of the writer. Sentiment analysis falls into the growing field of machine learning. In this assignment, you will be writing a simple sentiment analysis program to predict the score a movie reviewer would give based on the words that they use.
Sentiment Analysis Algorithm
The goal of your program is to use a data set of known reviews to predict the review for a new piece of text. Our algorithm for sentiment analysis is a simple one: For each word in the input text, calculate the average score of the reviews that use that word per word use. (For the sake of clarity, let’s call that score the “word score”) Then, find the averages of the word scores for the words in your input review. That number is your predicted score for the input text. You should ignore capitalizationand punctuation.
For example, let’s say we have two reviews:
Score: 1, Text: “The plot holes were the most outrageous I’ve ever seen.”Score: 4, Text: “The outrageous slapstick sequences left me howling.”
In this example, “Outrageous” is used once in a 1 score review and once in a 4 score review. That means that its word score is 2.5. “The” is used twice in a 1 score review and once in a 4 score review, meaning that its word score is 2. If our input text was just “the outrageous,” we would predict a score of 2.25.
Code:
Caution: please change the file directory before running.
MovieApp.java
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Scanner;
public class MovieApp {
public static void main(String[] nt) throws IOException {
HashMap<String, Word> wordHashMap = new HashMap<>();
FileReader in = null;
Scanner scanner = new Scanner(System.in);
StringBuffer stringBuffer = new StringBuffer();
while (true) {
System.out.println("Review filename?");
String fileName = scanner.nextLine();
try {
in = new FileReader("/Users/mrdishant/Assignment Java/src/" + fileName);
int c;
while ((c = in.read()) != -1) {
stringBuffer.append((char) c);
}
} catch (FileNotFoundException e) {
System.out.println("Please Enter a valid fileName.");
// e.printStackTrace(); continue;
} catch (IOException e) {
System.out.println("Please Enter a valid fileName.");
// e.printStackTrace(); continue;
} finally {
if (in != null) {
in.close();
}
}
try {
String[] lines = stringBuffer.toString().toLowerCase().split("\n");
for (String line : lines) {
String[] words = line.replaceAll("\\p{Punct}", "").split(" ");
double score = Double.parseDouble(words[0]);
for (int i = 1; i < words.length; i++) {
if (wordHashMap.containsKey(words[i].trim())) {
wordHashMap.get(words[i].trim()).scores.add(score);
} else {
Word word1 = new Word();
word1.word = words[i].trim();
word1.scores = new ArrayList<>();
word1.scores.add(score);
wordHashMap.put(words[i].trim(), word1);
}
}
}
} catch (Exception e) {
System.out.println("File doesn’t match the input structure please try again.");
stringBuffer.delete(0,stringBuffer.length());
continue;
}
System.out.println("Input review?");
String inputReview = scanner.nextLine();
String[] wordsInput = inputReview.trim().toLowerCase().split(" ");
double sum = 0;
for (String wInput : wordsInput) {
wInput=wInput.trim();
if (wordHashMap.containsKey(wInput)) {
wordHashMap.get(wInput).calculateScore();
sum += wordHashMap.get(wInput).wordScore;
}
}
if (wordsInput.length > 0 && sum != 0.0) {
double average = sum / wordsInput.length;
System.out.println("" + Math.round(average * 100.0) / 100.0);
break;
} else {
System.out.println("-1");
break;
}
}
//// Word the=wordHashMap.get("1");// the.calculateScore();// System.out.print(the.toString());
//// for (Word word:wordHashMap.values()){// word.calculateScore();// //System.out.println(word.toString());// }
//System.out.println("Size "+hashMap.values().size());
// for (String s:words)// System.out.println("Words are "+s);
}
}
Word.java
import java.util.ArrayList;
public class Word {
String word;
ArrayList<Double> scores;
double wordScore;
@Override public String toString() {
return "Word{" +
"word='" + word + '\'' +
", scores=" + scores +
", wordScore=" + wordScore +
'}';
}
public void calculateScore(){
double sum=0.0;
for (Double score : scores){
sum+=score;
}
wordScore=sum/scores.size();
wordScore=Math.round(wordScore * 100.0) / 100.0;
}
}
Output: