From 87addcda63677bb6cc28b79e5626af592a54403a Mon Sep 17 00:00:00 2001 From: kris Date: Sun, 22 Mar 2026 15:44:59 +0400 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=D0=B4=D1=81=D1=87=D1=91=D1=82=20?= =?UTF-8?q?=D0=BE=D1=82=D0=B2=D0=B5=D1=82=D0=BE=D0=B2,=20=D1=81=D0=BB?= =?UTF-8?q?=D0=BE=D0=BC=D0=B0=D0=BD=D0=B0=20=D1=81=D1=82=D0=B0=D1=82=D0=B8?= =?UTF-8?q?=D1=81=D1=82=D0=B8=D0=BA=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 ++ fix.py | 17 +++++++++ flake.lock | 27 ++++++++++++++ flake.nix | 46 +++++++++++++++++++++++ main.py | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 201 insertions(+) create mode 100644 .gitignore create mode 100644 fix.py create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 main.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..463420e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.csv +.venv +.vscode \ No newline at end of file diff --git a/fix.py b/fix.py new file mode 100644 index 0000000..a4a4fe6 --- /dev/null +++ b/fix.py @@ -0,0 +1,17 @@ +import pandas as pd + +# 1. Читаем исходный файл +df = pd.read_csv("MEN.csv") + +# 2. Условие: строки, где есть подстрока +substring = "все вышеперечисленное" + +q = "Что для вас является наиболее привлекательной чертой в мужчине? \n" + +mask = df[q].str.contains(substring, na=False) + +# 3. Замена значений +df.loc[mask, q] = "внешняя привлекательность (в том числе, физическая подготовка);ведение здорового образа жизни;эмоциональная открытость;инициативность;ум и интеллект;высокий заработок" + +# 4. Сохранение в новый файл +df.to_csv("MEN.fixed.csv", index=False) \ No newline at end of file diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..d8b0cdc --- /dev/null +++ b/flake.lock @@ -0,0 +1,27 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1773734432, + "narHash": "sha256-IF5ppUWh6gHGHYDbtVUyhwy/i7D261P7fWD1bPefOsw=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "cda48547b432e8d3b18b4180ba07473762ec8558", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..ca82b2b --- /dev/null +++ b/flake.nix @@ -0,0 +1,46 @@ +{ + description = "Python dev environment with pip, venv, and required C libraries"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + }; + + outputs = { self, nixpkgs }: + let + systems = [ "x86_64-linux" "aarch64-linux" ]; + forAllSystems = f: nixpkgs.lib.genAttrs systems (system: + f { pkgs = import nixpkgs { inherit system; }; }); + in + { + devShells = forAllSystems ({ pkgs }: { + default = pkgs.mkShell { + buildInputs = [ + pkgs.python313 + pkgs.gcc + pkgs.zlib + pkgs.libffi + ]; + + shellHook = '' + echo "Python dev environment ready 🐍" + + # пробрасываем библиотеки для C-расширений + export LD_LIBRARY_PATH="${pkgs.stdenv.cc.cc.lib}/lib:${pkgs.zlib}/lib:${pkgs.libffi}/lib:$LD_LIBRARY_PATH" + + if [ ! -d ".venv" ]; then + echo "Creating virtualenv in .venv..." + python -m venv .venv + echo "Activating virtualenv and installing numpy/pandas..." + . .venv/bin/activate + pip install --upgrade pip + pip install numpy pandas scipy + else + . .venv/bin/activate + fi + + echo "Virtualenv activated!" + ''; + }; + }); + }; +} \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..ccd05b8 --- /dev/null +++ b/main.py @@ -0,0 +1,108 @@ +def load_data(filename): + import pandas as pd + return pd.read_csv(filename) + + + +def get_questions(data): + return [col for col in data.columns + if col not in ["Timestamp", "Ваш пол", "Ваш возраст", "googlehui"]] + + + + +def get_counts(data, questions): + women = data[data["Ваш пол"] == "женский"] + men = data[data["Ваш пол"] == "мужской"] + + women_answers = {} + men_answers = {} + + for q in questions: + women_answers[q] = women[q].value_counts().to_dict() + men_answers[q] = men[q].value_counts().to_dict() + + return women_answers, men_answers + + + +def fisher_for_question(data, question): + import pandas as pd + from scipy.stats import fisher_exact + + results = {} + + for answer in data[question].dropna().unique(): + + # бинаризация + binary = data[question] == answer + + table = pd.crosstab(data["Ваш пол"], binary) + + if table.shape == (2, 2): + _, p = fisher_exact(table) + results[answer] = p + + return results + + + +def chi2_for_question(data, question): + import pandas as pd + from scipy.stats import chi2_contingency + + table = pd.crosstab(data["Ваш пол"], data[question]) + + chi2, p, _, _ = chi2_contingency(table) + + return p + + + +def expand_counts(count_dict, sep=";"): + from collections import Counter + result = Counter() + + for key, value in count_dict.items(): + # разбиваем ключ по ";" + items = [x.strip() for x in str(key).split(sep)] + + for item in items: + result[item] += value + + return dict(result) + + + +def expand_all_counts(data_dict): + expanded = {} + + for question, answers in data_dict.items(): + expanded[question] = expand_counts(answers) + + return expanded + + + +data = load_data("MEN.fixed.csv") +questions = get_questions(data) + +# просто посмотреть частоты +women_ans, men_ans = get_counts(data, questions) + +women_ans = expand_all_counts(women_ans) +men_ans = expand_all_counts(men_ans) + +print("women_ans: ", women_ans) +print("men_ans: ", men_ans) + +exit(0) + +# статистика +for q in questions: + fisher_res = fisher_for_question(data, q) + chi2_p = chi2_for_question(data, q) + + print(f"\nВопрос: {q}") + print("Фишер:", fisher_res) + print("Хи-квадрат p:", chi2_p) \ No newline at end of file