I am creating a simple movie recommendation system that returns the most similar movies based on a user's input. However, I also created a method that returns the top rated movies from the dataset. I'm trying to figure out how to implement an if statement that skips the similar_movies method I created if the user doesn't input a movie title.
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
#Read the dataset
df = pd.read_csv("movie_dataset.csv")
#Select the best features that will result in better recommendations
features = ['keywords','cast','genres','director']
#Combine features into single string so that they can be
#represented by a single line on a graph (for cosine similarity)
def combine_features(row):
return row['keywords']+" "+row['cast']+" "+row['genres']+" "+row['director']
#Clean the data so there are no NaN values (Preprocessing)
for feature in features:
df[feature] = df[feature].fillna('') #fills all NaNs with a blank string
#Apply the combined_features method to the dataset
df["combined_features"] = df.apply(combine_features,axis=1)
#Using the strings, created by combined_features, create a count matrix (for the creation of the similarity matrix)
cv = CountVectorizer()
count_matrix = cv.fit_transform(df["combined_features"])
#Using the count matrix, create a similarity matrix using cosine simialrity
cosine_sim = cosine_similarity(count_matrix)
#These functions simply help get the title from the index and the index from the title
def get_title_from_index(index):
return df[df.index == index]["title"].values[0]
def get_index_from_title(title):
try:
return df[df.title == title]["index"].values[0]
except IndexError:
return print ("\nPlease select a different Movie\nHere are the Top Rated movies:\n")
#Asks for user imput to return movies that the user likes
MTitle = input("\nType in a movie title: ")
movie_user_likes = MTitle
#Takes the user input so the program can locate the row title is on
movie_index = get_index_from_title(MTitle)
#Produces the similarity scores that the program can search to find most similar movies
similar_movies = list(enumerate(cosine_sim[movie_index]))
#Sorts the similarity scores so the program can return the best scores first
sorted_similar_movies = sorted(similar_movies,key=lambda x:x[1],reverse=True)[1:]
#Returns the most similar movies based on the user's input
i=0
print("\nSimilar movies to "+movie_user_likes+" are:\n")
for element in sorted_similar_movies:
print(get_title_from_index(element[0]))
i=i+1
if i>9:
break
I want the program to default to here if the user doesn't enter a movie title.
#If the user does not enter a movie title the program can default to the top rated movies in the dataset
sort_by_average_vote = sorted(sorted_similar_movies,key=lambda x:df["vote_average"]
[x[0]],reverse=True)
#print(sort_by_average_vote)
i=0
print("\nTop Rated Movies:\n")
for element in sort_by_average_vote:
print(get_title_from_index(element[0]))
i=i+1
if i>5:
break
Aucun commentaire:
Enregistrer un commentaire