Done analyzing

This commit is contained in:
Ian Shih
2023-06-15 12:33:33 +08:00
parent c746cdd016
commit 0e4280a1ae
15 changed files with 151 additions and 10 deletions

87
statistics-test.py Normal file
View File

@@ -0,0 +1,87 @@
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import statistics as st
from scipy import stats
from statsmodels.stats.weightstats import ztest
import numpy as np
data = pd.read_csv("result.csv")
data['viewRate'] = (data['view'] / data['video']) / data['fan']
ind = data.loc[data['isOffice'] == 0]
office = data.loc[data['isOffice'] == 1]
# Question 1
test11 = ztest(
ind['fan'],
office['fan'],
alternative='smaller'
)
test12 = ztest(
ind['fan'],
office['fan'],
alternative='larger'
)
print(test11)
print(test12)
# Question 2
ind = ind.sort_values(
by=['viewRate']
).iloc[
len(ind.index)*5//100:len(ind.index)*95//100
]
office = office.sort_values(
by=['viewRate']
).iloc[
len(office.index)*5//100:len(office.index)*95//100
]
test21 = ztest(
ind['viewRate'],
office['viewRate'],
alternative='smaller'
)
test22 = ztest(
ind['viewRate'],
office['viewRate'],
alternative='larger'
)
print(test21)
print(test22)
# Question 3
firstHalf = data.iloc[lambda x: x.index < len(data.index)//2]
firstHalf = firstHalf.sort_values(
by=['viewRate']
).iloc[
len(firstHalf.index)*5//100:len(firstHalf.index)*95//100
]
secondHalf = data.iloc[lambda x: x.index >= len(data.index)//2]
secondHalf = secondHalf.sort_values(
by=['viewRate']
).iloc[
len(secondHalf.index)*5//100:len(secondHalf.index)*95//100
]
test31 = ztest(
firstHalf['viewRate'],
secondHalf['viewRate'],
alternative='smaller'
)
test32 = ztest(
firstHalf['viewRate'],
secondHalf['viewRate'],
alternative='larger'
)
print(test31)
print(test32)