Daniel Code: [Data Mining]Manhattan distance

def manhattan(rating1, rating2):
"""Computes the Manhattan distance. Both rating1 and rating2 are dictionaries of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
distance = 0
for key in rating1:
if key in rating2:
distance += abs(rating1[key] - rating2[key])
return distance

>>>manthattan(users['Hailey'],users['Veronica'])

def computeNearestNeighbor(username, users):
"""creates a sorted list of users based on their distance to username"""
distances = [] #list define
for user in users:
if user != username:
distance = manhattan(users[user], users[username])
distances.append((distance, user))
# sort based on distance -- closest first
distances.sort()
return distances

>>>computeNearestNeighbor("Hailey",users)

** recommend the band which Hailey nearest neighbor rated but he does not**
def recommend(username, users):
"""Give list of recommendations"""
# first find nearest neighbor
nearest = computeNearestNeighbor(username, users)[0][1]

recommendations = []
# now find bands neighbor rated that user didn't
neighborRatings = users[nearest]
userRatings = users[username]
for artist in neighborRatings:
if not artist in userRatings:
recommendations.append((artist, neighborRatings[artist]))
# using the fn sorted for variety - sort is more efficient
return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)

28/12/2014

[Data Mining]Manhattan distance