其他
Python 爬取李子柒辣椒酱 1794 条数据,有人嫌牛肉粒太小...... | 原力计划
The following article comes from 数据不吹牛 Author 小Z
judges = pd.DataFrame(np.zeros(13 * len(df)).reshape(len(df),13),
columns = ['品牌','物流正面','物流负面','包装正面','包装负面','原料正面',
'原料负面','口感正面','口感负面','日期正面','日期负面',
'性价比正面','性价比负面'])
for i in range(len(result)):
words = result[i]
for word in words:
#李子柒的产品具有强IP属性,基本都是正面评价,这里不统计情绪,只统计提及次数
if '李子柒' in word or '子柒' in word or '小柒' in word or '李子七' in word\
or '小七' in word:
judges.iloc[i]['品牌'] = 1
#先判断是不是物流相关的
if '物流' in word or '快递' in word or '配送' in word or '取货' in word:
#再判断是正面还是负面情感
if '好' in word or '不错' in word or '棒' in word or '满意' in word or '迅速' in word:
judges.iloc[i]['物流正面'] = 1
elif '慢' in word or '龟速' in word or '暴力' in word or '差' in word:
judges.iloc[i]['物流负面'] = 1
#判断是否包装相关
if '包装' in word or '盒子' in word or '袋子' in word or '外观' in word:
if '高端' in word or '大气' in word or '还行' in word or '完整' in word or '好' in word or\
'严实' in word or '紧' in word:
judges.iloc[i]['包装正面'] = 1
elif '破' in word or '破损' in word or '瘪' in word or '简陋' in word:
judges.iloc[i]['包装负面'] = 1
#产品
#产品原料是牛肉为主,且评价大多会提到牛肉,因此我们把这个单独拎出来分析
if '肉' in word:
if '大' in word or '多' in word or '足' in word or '香' in word or '才' in word:
judges.iloc[i]['原料正面'] = 1
elif '小' in word or '少' in word or '没' in word:
judges.iloc[i]['原料负面'] = 1
#口感的情绪
if '口味' in word or '味道' in word or '口感' in word or '吃起来' in word:
if '不错' in word or '好' in word or '棒' in word or '鲜' in word or\
'可以' in word or '喜欢' in word or '符合' in word:
judges.iloc[i]['口感正面'] = 1
elif '不好' in word or '不行' in word or '不鲜' in word or\
'太烂' in word:
judges.iloc[i]['口感负面'] = 1
#口感方面,有些是不需要出现前置词,消费者直接评价好吃难吃的,例如:
if '难吃' in word or '不好吃' in word:
judges.iloc[i]['口感负面'] = 1
elif '好吃' in word or '香' in word:
judges.iloc[i]['口感正面'] = 1
#日期是不是新鲜
if '日期' in word or '时间' in word or '保质期' in word:
if '新鲜' in word:
judges.iloc[i]['日期正面'] = 1
elif '久' in word or '长' in word:
judges.iloc[i]['日期负面'] = 1
elif '过期' in word:
judges.iloc[i]['日期负面'] = 1
#性价比
if '划算' in word or '便宜' in word or '赚了' in word or '囤货' in word or '超值' in word or \
'太值' in word or '物美价廉' in word or '实惠' in word or '性价比高' in word or '不贵' in word:
judges.iloc[i]['性价比正面'] = 1
elif '贵' in word or '不值' in word or '亏了' in word or '不划算' in word or '不便宜' in word:
judges.iloc[i]['性价比负面'] = 1
final_result = pd.concat([df,judges],axis = 1)
return final_result
运行一下,结果毕现:
热 文 推 荐