def load_data(filename): import pandas as pd return pd.read_csv(filename) def get_questions(data): return [col for col in data.columns if col not in ["Timestamp", "Ваш пол", "Ваш возраст", "googlehui"]] def get_counts(data, questions): women = data[data["Ваш пол"] == "женский"] men = data[data["Ваш пол"] == "мужской"] women_answers = {} men_answers = {} for q in questions: women_answers[q] = women[q].value_counts().to_dict() men_answers[q] = men[q].value_counts().to_dict() return women_answers, men_answers def fisher_for_question(data, question): import pandas as pd from scipy.stats import fisher_exact results = {} for answer in data[question].dropna().unique(): # бинаризация binary = data[question] == answer table = pd.crosstab(data["Ваш пол"], binary) if table.shape == (2, 2): _, p = fisher_exact(table) results[answer] = p return results def chi2_for_question(data, question): import pandas as pd from scipy.stats import chi2_contingency table = pd.crosstab(data["Ваш пол"], data[question]) chi2, p, _, _ = chi2_contingency(table) return p def expand_counts(count_dict, sep=";"): from collections import Counter result = Counter() for key, value in count_dict.items(): # разбиваем ключ по ";" items = [x.strip() for x in str(key).split(sep)] for item in items: result[item] += value return dict(result) def expand_all_counts(data_dict): expanded = {} for question, answers in data_dict.items(): expanded[question] = expand_counts(answers) return expanded data = load_data("MEN.fixed.csv") questions = get_questions(data) # просто посмотреть частоты women_ans, men_ans = get_counts(data, questions) women_ans = expand_all_counts(women_ans) men_ans = expand_all_counts(men_ans) print("women_ans: ", women_ans) print("men_ans: ", men_ans) exit(0) # статистика for q in questions: fisher_res = fisher_for_question(data, q) chi2_p = chi2_for_question(data, q) print(f"\nВопрос: {q}") print("Фишер:", fisher_res) print("Хи-квадрат p:", chi2_p)