Files
statistics2023-final/statistics-test.py
2023-06-15 12:33:33 +08:00

88 lines
1.6 KiB
Python

import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import statistics as st
from scipy import stats
from statsmodels.stats.weightstats import ztest
import numpy as np
data = pd.read_csv("result.csv")
data['viewRate'] = (data['view'] / data['video']) / data['fan']
ind = data.loc[data['isOffice'] == 0]
office = data.loc[data['isOffice'] == 1]
# Question 1
test11 = ztest(
ind['fan'],
office['fan'],
alternative='smaller'
)
test12 = ztest(
ind['fan'],
office['fan'],
alternative='larger'
)
print(test11)
print(test12)
# Question 2
ind = ind.sort_values(
by=['viewRate']
).iloc[
len(ind.index)*5//100:len(ind.index)*95//100
]
office = office.sort_values(
by=['viewRate']
).iloc[
len(office.index)*5//100:len(office.index)*95//100
]
test21 = ztest(
ind['viewRate'],
office['viewRate'],
alternative='smaller'
)
test22 = ztest(
ind['viewRate'],
office['viewRate'],
alternative='larger'
)
print(test21)
print(test22)
# Question 3
firstHalf = data.iloc[lambda x: x.index < len(data.index)//2]
firstHalf = firstHalf.sort_values(
by=['viewRate']
).iloc[
len(firstHalf.index)*5//100:len(firstHalf.index)*95//100
]
secondHalf = data.iloc[lambda x: x.index >= len(data.index)//2]
secondHalf = secondHalf.sort_values(
by=['viewRate']
).iloc[
len(secondHalf.index)*5//100:len(secondHalf.index)*95//100
]
test31 = ztest(
firstHalf['viewRate'],
secondHalf['viewRate'],
alternative='smaller'
)
test32 = ztest(
firstHalf['viewRate'],
secondHalf['viewRate'],
alternative='larger'
)
print(test31)
print(test32)