88 lines
1.6 KiB
Python
88 lines
1.6 KiB
Python
import pandas as pd
|
|
import seaborn as sns
|
|
from matplotlib import pyplot as plt
|
|
import statistics as st
|
|
from scipy import stats
|
|
from statsmodels.stats.weightstats import ztest
|
|
import numpy as np
|
|
|
|
data = pd.read_csv("result.csv")
|
|
|
|
data['viewRate'] = (data['view'] / data['video']) / data['fan']
|
|
|
|
ind = data.loc[data['isOffice'] == 0]
|
|
office = data.loc[data['isOffice'] == 1]
|
|
|
|
# Question 1
|
|
test11 = ztest(
|
|
ind['fan'],
|
|
office['fan'],
|
|
alternative='smaller'
|
|
)
|
|
|
|
test12 = ztest(
|
|
ind['fan'],
|
|
office['fan'],
|
|
alternative='larger'
|
|
)
|
|
|
|
print(test11)
|
|
print(test12)
|
|
|
|
# Question 2
|
|
ind = ind.sort_values(
|
|
by=['viewRate']
|
|
).iloc[
|
|
len(ind.index)*5//100:len(ind.index)*95//100
|
|
]
|
|
office = office.sort_values(
|
|
by=['viewRate']
|
|
).iloc[
|
|
len(office.index)*5//100:len(office.index)*95//100
|
|
]
|
|
|
|
test21 = ztest(
|
|
ind['viewRate'],
|
|
office['viewRate'],
|
|
alternative='smaller'
|
|
)
|
|
|
|
test22 = ztest(
|
|
ind['viewRate'],
|
|
office['viewRate'],
|
|
alternative='larger'
|
|
)
|
|
|
|
print(test21)
|
|
print(test22)
|
|
|
|
|
|
# Question 3
|
|
firstHalf = data.iloc[lambda x: x.index < len(data.index)//2]
|
|
firstHalf = firstHalf.sort_values(
|
|
by=['viewRate']
|
|
).iloc[
|
|
len(firstHalf.index)*5//100:len(firstHalf.index)*95//100
|
|
]
|
|
secondHalf = data.iloc[lambda x: x.index >= len(data.index)//2]
|
|
secondHalf = secondHalf.sort_values(
|
|
by=['viewRate']
|
|
).iloc[
|
|
len(secondHalf.index)*5//100:len(secondHalf.index)*95//100
|
|
]
|
|
|
|
test31 = ztest(
|
|
firstHalf['viewRate'],
|
|
secondHalf['viewRate'],
|
|
alternative='smaller'
|
|
)
|
|
|
|
test32 = ztest(
|
|
firstHalf['viewRate'],
|
|
secondHalf['viewRate'],
|
|
alternative='larger'
|
|
)
|
|
|
|
print(test31)
|
|
print(test32)
|