# handmail/ch02

2 years ago

In [43]:
``````import csv
import pandas as pd
import random
import numpy as np
import math
import operator``````
In [2]:
``````a = set([1,2,3])
b = set([2,3,4])
a & b   # 交集``````
Out[2]:
``{2, 3}``

M次实验，每次实验选取不同的 k ( 0 ≤ k ≤ M  1 )和相同的随机数种子 seed ,进行 M 次实验就可以得到 M 个不同的训练集和测试集,然后分别进行实验,用 M 次实验的平均值作为最后的评测指标

In [7]:
``````# 将数据集分成训练集和测试集
def SplitData(data, M, k, seed):
test = {};  train = {}
random.seed(seed)
for item in data:
user = item[0]
mov = item[1]
rdm = random.randint(0, M)
if rdm == k:
if user not in test:
test[user] = set()
else:
if user not in train:
train[user] = set()
return train, test``````
In [5]:
``train, test = SplitData(dataSet, 8, 1,0.6)``
In [6]:
``len(test), len(train)``
Out[6]:
``(86, 314)``

Recall

In [9]:
``````# 召回率
def Recall(train, test, N):
hit = 0
all = 0
for user in train.keys():
tu = test[user]
rank = GetRecommendation(user,  N)
for item, pui in rank:
if item in tu:
hit += 1
all += len(tu)
return hit / (all * 1.0)``````
In [11]:
``````#  准确率
def Precision(train, test, N):
hit = 0
all = 0
for user in train.keys():
tu = test[user]
rank =  GetRecommendation(user,  N)
for item, pui in rank:
if item in tu:
hit +=1
all += N
return hit / (all * 1.0)``````

In [12]:
``````# 覆盖率
# 新颖度        ``````

In [54]:
``````# UserCF : User Based Collaborative Filtering 基于用户的协同过滤
def UserSimilarity(train):
# 构建倒排表 (物品 - 用户)
item_users = dict()
for u, items in train.items():
for i in items:
if i not  in item_users:
item_users[i] = set()
# calculate co-rated items between users （计算被共同评分了的物品）
C = dict()
N = dict()
for i, users in item_users.items(): # 使用两层循环，标记 一个物品 里的两两用户，即 C[u][v]
for u in users:
if u not in N:
N[u] = 0
N[u] += 1
for v in users:
if u == v:
continue
if u not in C:
C[u] = dict()
if u not in C[u]:
val = 1
C[u].update({v:val})
else:
val = C[u][v] + 1
C[u].update({v: val})
#calculate finial similarity matrix W
W = dict() #W是余弦相似度中的分子部分
for u, related_users in C.items():
if u not in W:
W[u] = dict()
for v, cuv in related_users.items():
if v not in W[u]:
val = cuv / math.sqrt(N[u] * N[v]) # N[u] 交 N[v]，在开平方，但是N[v]是哪里来的？
W[u].update({v:val})
return W``````
In [67]:
``````def UserSimilarity2(train):
# 构建倒排表 (物品 - 用户)
item_users = dict()
for u, items in train.items():
for i in items:
if i not  in item_users:
item_users[i] = set()
# calculate co-rated items between users （计算被共同评分了的物品）
C = dict()
N = dict()
for i, users in item_users.items(): # 使用两层循环，标记 一个物品 里的两两用户，即 C[u][v]
for u in users:
if u not in N:
N[u] = 0
N[u] += 1
for v in users:
if u == v:
continue
if u not in C:
C[u] = dict()
if u not in C[u]:
val = 1 / math.log(1 + len(users))
C[u].update({v:val})
else:
val = C[u][v] + 1 / math.log(1 + len(users))
C[u].update({v: val})
#calculate finial similarity matrix W
W = dict()
for u, related_users in C.items():
if u not in W:
W[u] = dict()
for v, cuv in related_users.items():
if v not in W[u]:
val = cuv / math.sqrt(N[u] * N[v]) # N[u] 交 N[v]，在开平方，但是N[v]是哪里来的？
W[u].update({v:val})
return W``````

In [68]:
``````def Recommend(user, train, W, K):
rank = dict() # 排名
interacted_items = train[user] # interacted 相相关的
'''
W 是一个矩阵（UserSimilarity计算的结果），矩阵的结构是 用户之间的相似度，以用户362为例，结构如下：

362: {206: 0.39464380868665966,
561: 0.22784770917926217,
434: 0.22784770917926217,
312: 0.19732190434332983,
570: 0.2944888920518062,
514: 0.2082350925539453},
其他用户:{...}

可见，W[user].items(), 是一个dict(用户，相似度)，而 key=operator.itemgetter(1) 是说按照 dict里的相似度进行排序，reverse=True 是逆序的意思
'''

for v, wuv in sorted(W[user].items(), key=operator.itemgetter(1), reverse=True)[0:K]:
for i in train[v]:
if i not in interacted_items:
if(i in rank):
rank[i] += wuv
else:
rank[i] = wuv
return rank``````
In [69]:
``````df = pd.read_csv('/home/jason/文档/MovieLens数据集/ml-latest-small/ratings.csv', sep=',', engine='python').sample(frac=0.01)
df.columns=['user', 'movie', 'rating','time']
df = df.drop(['rating','time'],axis=1)
dataSet = np.array(df)
dataSet
``````
Out[69]:
``````array([[   387,   1267],
[   482,   8949],
[   483, 102125],
...,
[   322,   3481],
[    68,   8464],
[    60,    527]])``````
In [59]:
``trn, tst = SplitData(dataSet, 10, 1, 10)``
In [77]:
``````print(len(trn))
trn
print(trn[525])

``````
```337 {1760, 2048, 53996, 1198, 86833, 7090, 96821, 2006, 117533, 223} ```
In [61]:
``````tst
len(tst)``````
Out[61]:
``61``
In [70]:
``````userW = UserSimilarity2(trn)
userW
``````
Out[70]:
``````{362: {206: 0.39464380868665966,
561: 0.22784770917926217,
434: 0.22784770917926217,
312: 0.19732190434332983,
570: 0.2944888920518062,
514: 0.2082350925539453},
206: {362: 0.39464380868665966,
561: 0.3222253204769533,
434: 0.3222253204769533,
312: 0.2790553132756236},
561: {362: 0.22784770917926217,
206: 0.3222253204769533,
434: 0.1860368755170824,
312: 0.16111266023847665,
196: 0.5255268625199613},
434: {362: 0.22784770917926217,
206: 0.3222253204769533,
561: 0.1860368755170824,
312: 0.16111266023847665,
509: 0.23502275771936396},
312: {362: 0.19732190434332983,
206: 0.2790553132756236,
561: 0.16111266023847665,
434: 0.16111266023847665,
51: 0.18580180409177757},
51: {312: 0.18580180409177757,
361: 0.26276343125998064,
415: 0.37160360818355515},
91: {525: 0.10879439650028087,
304: 0.13632186771065563,
219: 0.1030496457777831,
577: 0.23484253110304637,
356: 0.1355863985495169,
451: 0.23484253110304637,
288: 0.08344149309849433},
525: {91: 0.10879439650028087},
18: {483: 0.18580180409177757, 339: 0.18580180409177757},
483: {18: 0.18580180409177757},
339: {18: 0.18580180409177757,
279: 0.18580180409177757,
563: 0.37160360818355515},
380: {381: 0.17851259917722495,
64: 0.11550804670065219,
254: 0.10003290278428412},
381: {380: 0.17851259917722495, 115: 0.6436363296498353},
64: {380: 0.11550804670065219, 254: 0.2082350925539453},
254: {64: 0.2082350925539453,
380: 0.10003290278428412,
417: 0.31066746727980593,
21: 0.13893471504706753,
133: 0.31066746727980593,
111: 0.14392145858854952},
610: {390: 0.24327166614569326, 414: 0.06621016121484992},
390: {610: 0.24327166614569326, 308: 0.45511961331341866},
308: {390: 0.45511961331341866},
88: {68: 0.22811011491194377, 103: 0.5100697232983947},
68: {88: 0.22811011491194377, 103: 0.16129820911147802},
103: {88: 0.5100697232983947, 68: 0.16129820911147802},
24: {279: 0.21967507280760062,
551: 0.31066746727980593,
135: 0.21967507280760062},
279: {24: 0.21967507280760062,
551: 0.21967507280760062,
135: 0.15533373363990297,
339: 0.18580180409177757},
551: {24: 0.31066746727980593,
279: 0.21967507280760062,
135: 0.21967507280760062},
135: {24: 0.21967507280760062,
279: 0.15533373363990297,
551: 0.21967507280760062},
107: {11: 0.39464380868665966,
240: 0.3222253204769533,
374: 0.5581106265512472,
447: 0.2790553132756236},
11: {107: 0.39464380868665966,
240: 0.22784770917926217,
374: 0.39464380868665966,
447: 0.19732190434332983,
263: 0.24327166614569326},
240: {107: 0.3222253204769533,
11: 0.22784770917926217,
374: 0.3222253204769533,
447: 0.16111266023847665},
374: {107: 0.5581106265512472,
11: 0.39464380868665966,
240: 0.3222253204769533,
447: 0.2790553132756236},
447: {107: 0.2790553132756236,
11: 0.19732190434332983,
240: 0.16111266023847665,
374: 0.2790553132756236,
510: 0.32181816482491765},
263: {11: 0.24327166614569326,
301: 0.24327166614569326,
305: 0.17201904480216956,
515: 0.19423531615409825,
419: 0.13734510919847584,
549: 0.19423531615409825,
590: 0.09711765807704913,
540: 0.19423531615409825,
69: 0.27264373542131126,
182: 0.1113063388914321},
496: {306: 0.9102392266268373},
306: {496: 0.9102392266268373},
8: {202: 0.31066746727980593,
507: 0.6213349345596119,
559: 0.43935014561520125},
202: {8: 0.31066746727980593,
507: 0.31066746727980593,
559: 0.21967507280760062},
507: {8: 0.6213349345596119,
202: 0.31066746727980593,
559: 0.43935014561520125},
559: {8: 0.43935014561520125,
202: 0.21967507280760062,
507: 0.43935014561520125},
137: {273: 0.5581106265512472,
469: 0.2790553132756236,
122: 0.24959465998671754,
348: 0.5581106265512472},
273: {137: 0.5581106265512472,
469: 0.2790553132756236,
122: 0.24959465998671754,
348: 0.5581106265512472},
469: {137: 0.2790553132756236,
273: 0.2790553132756236,
122: 0.12479732999335877,
348: 0.2790553132756236},
122: {137: 0.24959465998671754,
273: 0.24959465998671754,
469: 0.12479732999335877,
348: 0.24959465998671754,
249: 0.14392145858854952},
348: {137: 0.5581106265512472,
273: 0.5581106265512472,
469: 0.2790553132756236,
122: 0.24959465998671754},
249: {122: 0.14392145858854952, 29: 0.22755980665670933},
304: {91: 0.13632186771065563,
219: 0.13632186771065563,
606: 0.16129820911147802,
599: 0.07213475204444816},
219: {304: 0.13632186771065563, 91: 0.1030496457777831},
577: {91: 0.23484253110304637,
356: 0.35872789172491043,
451: 0.6213349345596119},
356: {577: 0.35872789172491043,
91: 0.1355863985495169,
451: 0.35872789172491043},
451: {577: 0.6213349345596119,
91: 0.23484253110304637,
356: 0.35872789172491043},
288: {91: 0.08344149309849433,
414: 0.04248632869155395,
45: 0.11038271988126416,
177: 0.08344149309849433,
489: 0.071424047500042,
274: 0.05519135994063208},
414: {610: 0.06621016121484992,
323: 0.12386786939451837,
62: 0.07834091923978799,
305: 0.08758781041999354,
288: 0.04248632869155395},
323: {414: 0.12386786939451837},
62: {414: 0.07834091923978799,
184: 0.22811011491194377,
495: 0.18625112890063278},
305: {414: 0.08758781041999354, 263: 0.17201904480216956},
580: {599: 0.12872726592996706},
599: {580: 0.12872726592996706,
304: 0.07213475204444816,
606: 0.06451928364459121,
448: 0.02707234193000523,
111: 0.057568583435419804,
600: 0.042189197769058356,
252: 0.11162212531024944,
360: 0.18204784532536747,
603: 0.0814142714609778,
354: 0.07432072163671102},
220: {606: 0.18625112890063278, 52: 0.2404491734814939},
606: {220: 0.18625112890063278,
52: 0.18625112890063278,
156: 0.20353567865244448,
304: 0.16129820911147802,
599: 0.06451928364459121},
52: {220: 0.2404491734814939,
606: 0.18625112890063278,
517: 0.3034130755422791},
307: {108: 0.3440380896043391},
108: {307: 0.3440380896043391},
570: {514: 0.17002324109946493, 362: 0.2944888920518062},
514: {570: 0.17002324109946493, 362: 0.2082350925539453},
368: {84: 0.28784291717709903},
84: {368: 0.28784291717709903},
248: {182: 0.2082350925539453, 542: 0.2944888920518062},
182: {248: 0.2082350925539453,
542: 0.17002324109946493,
69: 0.2944888920518062,
263: 0.1113063388914321},
542: {248: 0.2944888920518062,
182: 0.17002324109946493,
425: 0.2145454432166118},
425: {542: 0.2145454432166118,
421: 0.2944888920518062,
343: 0.2944888920518062},
156: {606: 0.20353567865244448},
357: {39: 0.2145454432166118},
39: {357: 0.2145454432166118},
465: {474: 0.14045296193542475},
474: {465: 0.14045296193542475},
301: {263: 0.24327166614569326},
45: {288: 0.11038271988126416,
448: 0.11038271988126416,
350: 0.45511961331341866},
177: {288: 0.08344149309849433,
600: 0.1030496457777831,
75: 0.27264373542131126,
7: 0.3440380896043391},
489: {288: 0.071424047500042,
274: 0.07362222301295154,
33: 0.18580180409177757,
512: 0.17002324109946493,
436: 0.2082350925539453},
274: {288: 0.05519135994063208,
489: 0.07362222301295154,
534: 0.10176783932622224,
370: 0.16090908241245883},
448: {294: 0.06981216182982913,
17: 0.17495247173941134,
510: 0.12371007915228557,
45: 0.11038271988126416,
111: 0.04280513104684762,
599: 0.02707234193000523,
600: 0.051161917253500167,
252: 0.13536170965002617},
294: {448: 0.06981216182982913,
464: 0.13893471504706753,
322: 0.19648335830400424,
76: 0.13893471504706753,
186: 0.1316994362492872,
221: 0.16129820911147802},
17: {448: 0.17495247173941134, 510: 0.5100697232983947},
510: {448: 0.12371007915228557,
17: 0.5100697232983947,
447: 0.32181816482491765},
111: {448: 0.04280513104684762,
599: 0.057568583435419804,
600: 0.06670697880275943,
252: 0.17649007662455862,
490: 0.20353567865244448,
254: 0.14392145858854952},
600: {448: 0.051161917253500167,
111: 0.06670697880275943,
599: 0.042189197769058356,
252: 0.21094598884529175,
177: 0.1030496457777831,
75: 0.27264373542131126},
252: {448: 0.13536170965002617,
111: 0.17649007662455862,
599: 0.11162212531024944,
600: 0.21094598884529175},
515: {419: 0.3633810027301768,
549: 0.5138983423697507,
263: 0.19423531615409825,
590: 0.25694917118487537,
540: 0.5138983423697507},
419: {515: 0.3633810027301768,
549: 0.3633810027301768,
263: 0.13734510919847584,
590: 0.1816905013650884,
540: 0.3633810027301768},
549: {515: 0.5138983423697507,
419: 0.3633810027301768,
263: 0.19423531615409825,
590: 0.25694917118487537,
540: 0.5138983423697507},
590: {515: 0.25694917118487537,
419: 0.1816905013650884,
549: 0.25694917118487537,
263: 0.09711765807704913,
540: 0.25694917118487537},
540: {515: 0.5138983423697507,
419: 0.3633810027301768,
549: 0.5138983423697507,
263: 0.19423531615409825,
590: 0.25694917118487537},
69: {182: 0.2944888920518062, 263: 0.27264373542131126},
352: {326: 0.37160360818355515},
326: {352: 0.37160360818355515},
226: {12: 0.5255268625199613},
12: {226: 0.5255268625199613},
187: {318: 0.2744474531076814},
318: {187: 0.2744474531076814},
360: {599: 0.18204784532536747},
603: {599: 0.0814142714609778},
354: {599: 0.07432072163671102, 330: 0.26276343125998064},
534: {274: 0.10176783932622224},
370: {274: 0.16090908241245883, 527: 0.6436363296498353},
105: {522: 0.18033688011112042, 567: 0.25503486164919736},
522: {105: 0.18033688011112042,
567: 0.5100697232983947,
300: 0.6436363296498353},
567: {105: 0.25503486164919736, 522: 0.5100697232983947},
140: {4: 0.18580180409177757},
4: {140: 0.18580180409177757},
178: {239: 0.6436363296498353},
239: {178: 0.6436363296498353},
29: {249: 0.22755980665670933},
184: {62: 0.22811011491194377, 495: 0.2944888920518062},
495: {184: 0.2944888920518062, 62: 0.18625112890063278},
403: {28: 0.5255268625199613},
28: {403: 0.5255268625199613},
75: {600: 0.27264373542131126, 177: 0.27264373542131126},
7: {177: 0.3440380896043391},
393: {332: 0.45511961331341866},
332: {393: 0.45511961331341866},
344: {477: 0.32181816482491765,
432: 0.36067376022224085,
265: 0.36067376022224085},
477: {344: 0.32181816482491765},
330: {354: 0.26276343125998064, 160: 0.37160360818355515},
290: {387: 0.25245493881090375},
387: {290: 0.25245493881090375},
517: {52: 0.3034130755422791},
160: {330: 0.37160360818355515, 453: 0.26276343125998064},
453: {160: 0.26276343125998064,
521: 0.36067376022224085,
6: 0.25503486164919736},
350: {45: 0.45511961331341866},
382: {462: 0.37160360818355515},
462: {382: 0.37160360818355515},
180: {22: 0.45511961331341866},
22: {180: 0.45511961331341866},
421: {425: 0.2944888920518062, 343: 0.7213475204444817},
343: {425: 0.2944888920518062, 421: 0.7213475204444817},
521: {453: 0.36067376022224085, 6: 0.5100697232983947},
6: {521: 0.5100697232983947, 453: 0.25503486164919736},
331: {475: 0.45511961331341866},
475: {331: 0.45511961331341866},
149: {438: 0.45511961331341866},
438: {149: 0.45511961331341866},
104: {38: 0.37160360818355515},
38: {104: 0.37160360818355515},
130: {275: 0.24959465998671754,
443: 0.39464380868665966,
90: 0.5581106265512472,
27: 0.39464380868665966},
275: {130: 0.24959465998671754,
443: 0.17649007662455862,
90: 0.24959465998671754,
27: 0.17649007662455862},
443: {130: 0.39464380868665966,
275: 0.17649007662455862,
90: 0.39464380868665966,
27: 0.2790553132756236},
90: {130: 0.5581106265512472,
275: 0.24959465998671754,
443: 0.39464380868665966,
27: 0.39464380868665966},
27: {130: 0.39464380868665966,
275: 0.17649007662455862,
443: 0.2790553132756236,
90: 0.39464380868665966},
476: {391: 0.3034130755422791, 512: 0.3034130755422791},
391: {476: 0.3034130755422791},
512: {476: 0.3034130755422791,
489: 0.17002324109946493,
436: 0.2944888920518062},
361: {51: 0.26276343125998064},
415: {51: 0.37160360818355515},
481: {524: 0.6436363296498353, 129: 0.6436363296498353},
524: {481: 0.6436363296498353},
176: {586: 0.2944888920518062, 420: 0.5100697232983947},
586: {176: 0.2944888920518062, 420: 0.4164701851078906},
420: {176: 0.5100697232983947, 586: 0.4164701851078906},
264: {298: 0.32259641822295604, 302: 0.5100697232983947},
298: {264: 0.32259641822295604,
302: 0.22811011491194377,
467: 0.28784291717709903,
152: 0.22811011491194377,
385: 0.32259641822295604},
302: {264: 0.5100697232983947, 298: 0.22811011491194377},
467: {298: 0.28784291717709903},
152: {385: 0.5100697232983947, 298: 0.22811011491194377},
385: {152: 0.5100697232983947, 298: 0.32259641822295604},
282: {405: 0.5255268625199613},
405: {282: 0.5255268625199613},
464: {322: 0.43935014561520125,
76: 0.31066746727980593,
294: 0.13893471504706753},
322: {464: 0.43935014561520125,
76: 0.43935014561520125,
294: 0.19648335830400424},
76: {464: 0.31066746727980593,
322: 0.43935014561520125,
294: 0.13893471504706753},
186: {221: 0.2944888920518062, 294: 0.1316994362492872},
221: {186: 0.2944888920518062, 294: 0.16129820911147802},
33: {489: 0.18580180409177757},
417: {21: 0.27786943009413506,
133: 0.6213349345596119,
254: 0.31066746727980593},
21: {417: 0.27786943009413506,
133: 0.27786943009413506,
254: 0.13893471504706753,
230: 0.40707135730488897},
133: {417: 0.6213349345596119,
21: 0.27786943009413506,
254: 0.31066746727980593},
230: {21: 0.40707135730488897},
457: {93: 0.6436363296498353},
93: {457: 0.6436363296498353},
509: {434: 0.23502275771936396,
234: 0.20353567865244448,
445: 0.40707135730488897},
292: {199: 0.5255268625199613},
199: {292: 0.5255268625199613},
432: {19: 0.24327166614569326,
265: 0.36067376022224085,
344: 0.36067376022224085},
19: {432: 0.24327166614569326,
345: 0.3440380896043391,
216: 0.19278823416444013,
167: 0.27264373542131126},
265: {432: 0.36067376022224085, 344: 0.36067376022224085},
234: {509: 0.20353567865244448},
445: {509: 0.40707135730488897},
345: {19: 0.3440380896043391},
216: {19: 0.19278823416444013, 167: 0.5100697232983947},
167: {216: 0.5100697232983947, 19: 0.27264373542131126},
116: {166: 0.6436363296498353},
166: {116: 0.6436363296498353},
300: {522: 0.6436363296498353},
162: {141: 0.6436363296498353},
141: {162: 0.6436363296498353},
456: {43: 0.6436363296498353},
43: {456: 0.6436363296498353},
490: {111: 0.20353567865244448},
337: {597: 0.45511961331341866},
597: {337: 0.45511961331341866},
436: {512: 0.2944888920518062, 489: 0.2082350925539453},
541: {335: 0.9102392266268373},
335: {541: 0.9102392266268373},
576: {572: 0.5255268625199613},
572: {576: 0.5255268625199613, 585: 0.5255268625199613},
379: {470: 0.9102392266268373},
470: {379: 0.9102392266268373},
585: {572: 0.5255268625199613},
563: {339: 0.37160360818355515},
115: {381: 0.6436363296498353},
508: {278: 0.9102392266268373},
278: {508: 0.9102392266268373},
129: {481: 0.6436363296498353},
222: {286: 0.6436363296498353},
286: {222: 0.6436363296498353},
196: {561: 0.5255268625199613},
527: {370: 0.6436363296498353},
102: {46: 0.9102392266268373},
46: {102: 0.9102392266268373},
16: {82: 0.9102392266268373},
82: {16: 0.9102392266268373}}``````
In [73]:
``rk = Recommend(1, trn, userW, 2)``
```--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-73-efee44635e51> in <module> ----> 1 rk = Recommend(1, trn, userW, 2) <ipython-input-68-646543567d80> in Recommend(user, train, W, K) 2 rank = dict() # 排名 3 interacted_items = train[user] ----> 4 for v, wuv in sorted(W[user].items(), key=operator.itemgetter(1), reverse=True)[0:K]: 5 for i in train[v]: 6 if i not in interacted_items: KeyError: 1```
In [72]:
``rk``
Out[72]:
``````{1372: 0.26276343125998064,
1125: 0.26276343125998064,
2359: 0.26276343125998064,
105954: 0.2145454432166118,
2947: 0.2145454432166118,
54276: 0.2145454432166118,
117572: 0.2145454432166118,
8798: 0.2145454432166118}``````
In [ ]:
`` ``
In [ ]:
`` ``
In [ ]:
`` ``
In [ ]:
`` ``
In [ ]:
``````import jovian #[ˈdʒəʊvɪən] 木星
jovian.commit()
``````
```[jovian] Saving notebook.. ```
In [ ]:
`` ``