Thursday, October 25, 2018

IMDB movies Year wise Rating Comparison (Data Analysis) Python

Tags

Code:


import requests
import bs4
from matplotlib import pyplot as plt

yearDict={}

def fetch(year,counting=100):

res=requests.get("https://www.imdb.com/search/title?count="+str(counting)+"&genres=action&release_date="+str(year)+","+str(year)+"&title_type=feature");

soup=bs4.BeautifulSoup(res.text,'lxml')

moviesList=soup.findAll("div",{"class":"lister-item mode-advanced"})

count=0
sum=0
for i in moviesList:
value=i.find("div",{"class":"inline-block ratings-imdb-rating"})
# year=i.find("span",{"class":"lister-item-year text-muted unbold"}).text
if value is not None:
count=count+1
sum=sum+float(value['data-value'])
# print(i.find("h3",{"class":"lister-item-header"}).text)
# print(i.find("div",{"class":"inline-block ratings-imdb-rating"})['data-value'])
print(year,{"sum":sum,"count":count,"total":len(moviesList)})
return {"sum":sum,"count":count,"total":len(moviesList)}
# print("Total is ",sum)
# print("Average is ",sum/count)

years=[2015,2016,2017,2018]
for i in years:
yearDict[i] = fetch(i)

print(yearDict)

averages=[]

for i in years:
averages.append(yearDict[i]['sum']/yearDict[i]['count'])

# plt.scatter(years, averages, label="Movies", color='r')
# plt.bar(years,averages)

# cols = ['r', 'g', 'b','y']
# explode = (0.1, 0, 0,0.1)

# plt.pie(averages,
# labels=years,
# colors=cols,
# startangle=180,
# shadow=True,
# explode=explode
# )

plt.plot(years,averages,'c', label="Movies", linewidth=2)

plt.legend()

plt.title("Movies")
plt.xlabel("Years")
plt.ylabel("Averages")

plt.grid(True,color='g')

plt.show()


NOTE: You can pass count to the fetch function to get analysis to that much movies

 

Screenshots:



Popular Singler on Top List (Data Analysis Gaana) Python

Tags

Screenshots:



Code:



import requests
import bs4

# from matplotlib import pyplot as plt

artistList={}


# def ploting():
# X=[]
# for i in artistList.keys():
# X.append(i)

# Y=[]
# for i in artistList.values():
# Y.append(i)



# print(X)
# print(Y)

# # plt.plot(X,Y,'g', label="Category-A", linewidth=3)

# cols = ['r', 'g', 'b']

# plt.pie(Y,
# labels=X,
# colors=cols,
# startangle=180,
# shadow=True,
# )

# plt.legend()

# plt.show()


def fetch():
# res=requests.get("https://gaana.com/playlist/gaana-dj-bollywood-top-50-1");
res=requests.get("https://gaana.com/playlist/gaana-dj-gaana-international-top-50");

soup=bs4.BeautifulSoup(res.text,'lxml')

songsList=soup.findAll("ul",{"class":"s_l artworkload _cursor "})

# print(songsList[2].find("li",{"class":"s_artist p_artist desktop"}).text)

for i in songsList:
# print(i.contents[3])
artist=i.find("li",{"class":"s_artist p_artist desktop"}).text
# print(artist)
try:
if(artist.__contains__(",")):
artists=artist.split(",")
# print(artists)
for i in artists:
if i in artistList:
artistList[i]=artistList[i]+1
else:
artistList[i]=1
else:
# print(artist)
if artist in artistList:
artistList[artist]=artistList[artist]+1
else:
artistList[artist]=1
except Exception as e:
print("Error for ",artist,e)
pass
# print(i.find("li",{"class":"s_artist p_artist desktop"}).text)
max=0
maxKey=""
for i in artistList.keys():
if artistList[i] > max:
max=artistList[i]
maxKey=i

print(maxKey," ",max)
# print(artistList)
# ploting()

fetch()

Monday, October 15, 2018

Movie Sentiment Analysis (Java)

Tags

Sentiment Analysis Background


Sentiment analysis is the process of using software to classify a piece of text into a category that reflects the opinion of the writer. Sentiment analysis falls into the growing field of machine learning. In this assignment, you will be writing a simple sentiment analysis program to predict the score a movie reviewer would give based on the words that they use.
 

Sentiment Analysis Algorithm

The goal of your program is to use a data set of known reviews to predict the review for a new piece of text. Our algorithm for sentiment analysis is a simple one: For each word in the input text, calculate the average score of the reviews that use that word per word use. (For the sake of clarity, let’s call that score the “word score”) Then, find the averages of the word scores for the words in your input review. That number is your predicted score for the input text. You should ignore capitalization 
and punctuation.
 

For example, let’s say we have two reviews:

Score: 1, Text: “The plot holes were the most outrageous I’ve ever seen.”
Score: 4, Text: “The outrageous slapstick sequences left me howling.”
In this example, “Outrageous” is used once in a 1 score review and once in a 4 score review. That means that its word score is 2.5. “The” is used twice in a 1 score review and once in a 4 score review, meaning that its word score is 2. If our input text was just “the outrageous,” we would predict a score of 2.25.



Code:


Caution: please change the file directory before running.

MovieApp.java

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Scanner;

public class MovieApp {


    public static void main(String[] nt) throws IOException {

        HashMap<String, Word> wordHashMap = new HashMap<>();
        FileReader in = null;
        Scanner scanner = new Scanner(System.in);
        StringBuffer stringBuffer = new StringBuffer();


        while (true) {



            System.out.println("Review filename?");
            String fileName = scanner.nextLine();


            try {
                in = new FileReader("/Users/mrdishant/Assignment Java/src/" + fileName);


                int c;
                while ((c = in.read()) != -1) {
                    stringBuffer.append((char) c);
                }

            } catch (FileNotFoundException e) {
                System.out.println("Please Enter a valid fileName.");
//                e.printStackTrace();                continue;
            } catch (IOException e) {
                System.out.println("Please Enter a valid fileName.");
//                e.printStackTrace();                continue;
            } finally {
                if (in != null) {
                    in.close();
                }
            }


            try {

                String[] lines = stringBuffer.toString().toLowerCase().split("\n");

                for (String line : lines) {

                    String[] words = line.replaceAll("\\p{Punct}", "").split(" ");


                    double score = Double.parseDouble(words[0]);

                    for (int i = 1; i < words.length; i++) {

                        if (wordHashMap.containsKey(words[i].trim())) {
                            wordHashMap.get(words[i].trim()).scores.add(score);

                        } else {
                            Word word1 = new Word();
                            word1.word = words[i].trim();

                            word1.scores = new ArrayList<>();
                            word1.scores.add(score);

                            wordHashMap.put(words[i].trim(), word1);
                        }
                    }

                }

            } catch (Exception e) {

                System.out.println("File doesn’t match the input structure please try again.");

                stringBuffer.delete(0,stringBuffer.length());

                continue;
            }



            System.out.println("Input review?");
            String inputReview = scanner.nextLine();

            String[] wordsInput = inputReview.trim().toLowerCase().split(" ");



            double sum = 0;

            for (String wInput : wordsInput) {

                wInput=wInput.trim();



                if (wordHashMap.containsKey(wInput)) {
                    wordHashMap.get(wInput).calculateScore();
                    sum += wordHashMap.get(wInput).wordScore;
                }

            }



            if (wordsInput.length > 0 && sum != 0.0) {

                double average = sum / wordsInput.length;

                System.out.println("" + Math.round(average * 100.0) / 100.0);

                break;

            } else {
                System.out.println("-1");
                break;
            }


        }
////        Word the=wordHashMap.get("1");//        the.calculateScore();//        System.out.print(the.toString());
////        for (Word word:wordHashMap.values()){//            word.calculateScore();//            //System.out.println(word.toString());//        }
        //System.out.println("Size "+hashMap.values().size());

//        for (String s:words)//            System.out.println("Words are "+s);
    }


}

Word.java


import java.util.ArrayList;

public class Word {

    String word;
    ArrayList<Double> scores;
    double wordScore;


    @Override    public String toString() {
        return "Word{" +
                "word='" + word + '\'' +
                ", scores=" + scores +
                ", wordScore=" + wordScore +
                '}';
    }

    public void calculateScore(){

        double sum=0.0;

        for (Double score : scores){
            sum+=score;
        }

        wordScore=sum/scores.size();

        wordScore=Math.round(wordScore * 100.0) / 100.0;

    }

}


Output:




Github repo url : https://github.com/mrdishant/Movie-Sentiment-Analysis-Java/tree/master/src

 




Wednesday, October 10, 2018

CD Store Management Python (Simple Approach)

Tags

ScreenShots :


Code:

listCD=[]

def sortByIndex(index):
for i in range(0,len(listCD)):
for j in range(0,len(listCD)-i-1):
if(listCD[j][index]>listCD[j+1][index]):
temp=listCD[j][index]
listCD[j][index]=listCD[j+1][index]
listCD[j+1][index]=temp

def createDatabase():
f=open("CD_Store.txt","r+")
for f1 in f.readlines():
try:
price=float(f1.split(",")[3])
except Exception as e:
print("Error in price ",e)
pass
listCD.append([f1.split(",")[0],f1.split(",")[1],f1.split(",")[2],price])

def printList():
for cd in listCD:
print(cd,"\n")

def findByTitle(target):
for cd in listCD:
if(target.lower() in cd[0].lower()):
print(cd)

def findByGenre(target):

for cd in listCD:
if(target.lower() in cd[2].lower()):
print(cd)

def findByArtist(target):
for cd in listCD:
if(target.lower() in cd[1].lower()):
print(cd)

def findByPrice(targetPrice):
for cd in listCD:
if(cd[3]<=targetPrice):
print(cd)

def main():
createDatabase()

while(True):
print("\nPlease Choose from below \n" )

print("1 to Print List of CDs")

print("2 to Sort CDs by Title")

print("3 for Sort CDs by Artist")

print("4 for Sort CDs by Genre")

print("5 for Sort CDs by Price")

print("6 for Find All CDs by Title")

print("7 for Find All CDs by Artist")

print("8 for Find All CDs by Genre")
print("9 for Find All CDs with Price at Most X")
print('quit to quit\n')


i=input()

if(i=='1'):
printList()
elif (i=='2'):
sortByIndex(0)
elif (i=='3'):
sortByIndex(1)
elif (i=='4'):
sortByIndex(2)
elif (i=='5'):
sortByIndex(3)
elif (i=='6'):
findByTitle(input("Enter CD Title : "))
elif (i=='7'):
findByArtist(input("Enter Artist name : "))
elif (i=='8'):
findByGenre(input("Enter Genre : "))
elif (i=='9'):
findByPrice(float(input("Enter target Price : ")))
elif (i.lower()=='quit'):
break

main()

Monday, October 8, 2018

CD Store Management System Python

Tags

Assignment:





Function SortByArtist

Input: List of CDs Output: Updates the list of CDs so that elements are sorted in ascending order by artist.
Description: Program sorts the list CDs by the artist attribute. 

Function SortByPrice 


Input: List of CDs Output: Updates the list of CDs so that elements are sorted in ascending order by price.
Description: Program sorts the list of CDs by the price attribute. Function 

FindByTitle 


Input: a target string and a list of CDs Output: Prints all CDs in the list of CDs that have the title target.
Description: Program should print all elements in the list of CDs that have a title that matches target. Function 

FindByGenre

Input: a target string and a list of CDs Output: Prints all CDs in the list of CDs that have the genre target.
Description: Program should print all elements in the list of CDs that have the genre given in target. Function 

FindByArtist 


Input: a target string and a list of CDs Output: Prints all CDs in the list of CDs that have target listed as the artist.
Description: Program should print all elements in the list of CDs that have the artist that matches target. Function 

FindByPrice

Input: the price (a decimal number) and a list of CDs Output: Prints all CDs in the list of CDs that cost at most the given price.
Description: Program finds all CDs that cost at most the amount specified by price.

Testing

Remember to test your program is working correctly. For example, you can print the list after it has been sorted to confirm that each sort function is working correctly. 


File:


CD_Store.txt 


Code :


class CD:


def __init__(self,aName,sName,type,price):
self.aName=aName
self.sName=sName
self.type=type
self.price=price

def getArtist(self):
return self.sName

def getPrice(self):
return str(self.price)

def __str__(self):
return ("{} by {} at {} is of {} Genre".format(self.aName,self.sName,self.getPrice(),self.type))
def __rep__(self):
return self.aName

class StoreHelper:


f=open("CD_Store.txt","r+")
listCD=[]

def __init__(self):
for f1 in self.f.readlines():
try:
price=float(f1.split(",")[3])
except Exception as e:
pass

self.listCD.append(CD(f1.split(",")[0],f1.split(",")[1],f1.split(",")[2],price))
def sortByArtist(self):

sortedList=sorted(self.listCD, key=lambda x: x.sName)

newFile=open("CD_Store.txt","w+")

for cd in sortedList:
print(cd,"\n")
newFile.write(cd.aName+","+cd.sName+","+cd.type+","+str(cd.price)+"\n")


def sortByPrice(self):
sortedList=sorted(self.listCD, key=lambda x: x.price)


newFile=open("CD_Store.txt","w+")

for cd in sortedList:
print(cd,"\n")
newFile.write(cd.aName+","+cd.sName+","+cd.type+","+cd.getPrice()+"\n")

def searchTitle(self,target):
for cd in self.listCD:

if(target.lower() in cd.aName.lower()):
print(cd)

def searchGenre(self,target):
for cd in self.listCD:
if(target.lower() in cd.type.lower()):
print(cd)

def searchArtist(self,target):
for cd in self.listCD:
if(target.lower() in cd.sName.lower()):
print(cd)

def searchPrice(self,targetPrice):
for cd in self.listCD:
if(cd.price<=targetPrice):
print(cd)


def main():
storeHelper=StoreHelper()

while(True):
print("\nPlease Choose from below \n" )

print("1 to Sort by Artist")

print("2 for Sort by Price")

print("3 for FindByTitle")

print("4 for FindByGenre")

print("5 for FindByArtist")

print("6 for FindByPrice\n")

print('quit to quit\n')

i=input()

if(i=='1'):
storeHelper.sortByArtist()
elif (i=='2'):
storeHelper.sortByPrice()
elif (i=='3'):
storeHelper.searchTitle(input("Enter Taget Title here : "))
elif (i=='4'):
storeHelper.searchGenre(input("Enter Taget Genre here : "))
elif (i=='5'):
storeHelper.searchArtist(input("Enter Artist name here : "))
elif (i=='6'):
storeHelper.searchPrice(float(input("Enter Target price here : ")))
elif (i.lower()=='quit'):
break

main()
  

For any query please comment down below.....