108 lines
2.3 KiB
Python
108 lines
2.3 KiB
Python
def load_data(filename):
|
|
import pandas as pd
|
|
return pd.read_csv(filename)
|
|
|
|
|
|
|
|
def get_questions(data):
|
|
return [col for col in data.columns
|
|
if col not in ["Timestamp", "Ваш пол", "Ваш возраст", "googlehui"]]
|
|
|
|
|
|
|
|
|
|
def get_counts(data, questions):
|
|
women = data[data["Ваш пол"] == "женский"]
|
|
men = data[data["Ваш пол"] == "мужской"]
|
|
|
|
women_answers = {}
|
|
men_answers = {}
|
|
|
|
for q in questions:
|
|
women_answers[q] = women[q].value_counts().to_dict()
|
|
men_answers[q] = men[q].value_counts().to_dict()
|
|
|
|
return women_answers, men_answers
|
|
|
|
|
|
|
|
def fisher_for_question(data, question):
|
|
import pandas as pd
|
|
from scipy.stats import fisher_exact
|
|
|
|
results = {}
|
|
|
|
for answer in data[question].dropna().unique():
|
|
|
|
# бинаризация
|
|
binary = data[question] == answer
|
|
|
|
table = pd.crosstab(data["Ваш пол"], binary)
|
|
|
|
if table.shape == (2, 2):
|
|
_, p = fisher_exact(table)
|
|
results[answer] = p
|
|
|
|
return results
|
|
|
|
|
|
|
|
def chi2_for_question(data, question):
|
|
import pandas as pd
|
|
from scipy.stats import chi2_contingency
|
|
|
|
table = pd.crosstab(data["Ваш пол"], data[question])
|
|
|
|
chi2, p, _, _ = chi2_contingency(table)
|
|
|
|
return p
|
|
|
|
|
|
|
|
def expand_counts(count_dict, sep=";"):
|
|
from collections import Counter
|
|
result = Counter()
|
|
|
|
for key, value in count_dict.items():
|
|
# разбиваем ключ по ";"
|
|
items = [x.strip() for x in str(key).split(sep)]
|
|
|
|
for item in items:
|
|
result[item] += value
|
|
|
|
return dict(result)
|
|
|
|
|
|
|
|
def expand_all_counts(data_dict):
|
|
expanded = {}
|
|
|
|
for question, answers in data_dict.items():
|
|
expanded[question] = expand_counts(answers)
|
|
|
|
return expanded
|
|
|
|
|
|
|
|
data = load_data("MEN.fixed.csv")
|
|
questions = get_questions(data)
|
|
|
|
# просто посмотреть частоты
|
|
women_ans, men_ans = get_counts(data, questions)
|
|
|
|
women_ans = expand_all_counts(women_ans)
|
|
men_ans = expand_all_counts(men_ans)
|
|
|
|
print("women_ans: ", women_ans)
|
|
print("men_ans: ", men_ans)
|
|
|
|
exit(0)
|
|
|
|
# статистика
|
|
for q in questions:
|
|
fisher_res = fisher_for_question(data, q)
|
|
chi2_p = chi2_for_question(data, q)
|
|
|
|
print(f"\nВопрос: {q}")
|
|
print("Фишер:", fisher_res)
|
|
print("Хи-квадрат p:", chi2_p) |