diff --git a/lib/mihainator/extractor.ex b/lib/mihainator/extractor.ex index 1dab693..e45f029 100644 --- a/lib/mihainator/extractor.ex +++ b/lib/mihainator/extractor.ex @@ -3,76 +3,72 @@ defmodule Mihainator.Extractor do This module will take Threema's CSV history file and parse dates / first interaction of day """ - # TODO: unit test - # TODO: refactor this method def extract(file) do raw_data = get_raw_data(file) last_date = - Enum.at(raw_data, -1) - |> elem(0) + Enum.at(raw_data, -1).date |> Date.end_of_month() first_date = Date.shift(last_date, year: -1) |> Date.beginning_of_month() - date_range = Date.range(first_date, last_date) + year_date_range = Date.range(first_date, last_date) - raw_data - |> Enum.filter(fn {date, _} -> Date.after?(date, first_date) end) + dates_to_check = + raw_data + |> Enum.filter(fn %{date: date} -> Date.after?(date, first_date) end) + |> Enum.group_by(&get_group_key/1) + + Map.new(year_date_range, fn x -> {x, []} end) + |> get_normalized_interaction_data(dates_to_check) |> Map.new() - |> get_normalized_interaction_data(date_range) - |> Enum.group_by(fn {date, _} -> - month = - Integer.to_string(date.month) - |> String.pad_leading(2, "0") - - "#{date.year}-#{month}" - end) end defp get_raw_data(file) do File.stream!(file) |> CSV.decode!(separator: ?,, headers: true, escape_max_lines: 100) - |> Stream.chunk_every(2, 1, :discard) - |> Stream.filter(&next_day?/1) |> Stream.map(&extract_data/1) |> Enum.to_list() end - defp next_day?([%{"posted_at" => current}, %{"posted_at" => next}]) do - current = - current - |> String.to_integer() - |> DateTime.from_unix!(:millisecond) + defp extract_data(%{"isoutbox" => is_out, "posted_at" => posted_at}) do + is_out = + case is_out do + "1" -> "out" + "0" -> "in" + end - next = - next - |> String.to_integer() - |> DateTime.from_unix!(:millisecond) - - Date.diff(next, current) == 1 - end - - defp extract_data([_, %{"isoutbox" => is_out, "posted_at" => posted_at}]) do - is_out = is_out == "1" - - posted_at = + date = posted_at |> String.to_integer() |> DateTime.from_unix!(:millisecond) |> DateTime.to_date() - {posted_at, is_out} + %{date: date, direction: is_out} end - defp get_normalized_interaction_data(date_to_postinfo, date_range) do - Enum.map(date_range, fn date -> - if Map.has_key?(date_to_postinfo, date) do - {date, Map.get(date_to_postinfo, date)} + defp get_group_key(%{date: date}) do + month = + Integer.to_string(date.month) + |> String.pad_leading(2, "0") + + day = + Integer.to_string(date.day) + |> String.pad_leading(2, "0") + + "#{date.year}-#{month}-#{day}" + end + + defp get_normalized_interaction_data(date_range, dates_to_check) do + Enum.map(date_range, fn {date, _} -> + date = Date.to_string(date) + + if Map.has_key?(dates_to_check, date) do + {date, Map.get(dates_to_check, date)} else - {date, nil} + {date, []} end end) end diff --git a/mix.exs b/mix.exs index 8c7bf61..87bdcbf 100644 --- a/mix.exs +++ b/mix.exs @@ -54,7 +54,8 @@ defmodule Mihainator.MixProject do {:dns_cluster, "~> 0.1.1"}, {:bandit, "~> 1.5"}, {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, - {:csv, "~> 3.2"} + {:csv, "~> 3.2"}, + {:timex, "~> 3.7", only: [:dev, :test], runtime: false} ] end diff --git a/mix.lock b/mix.lock index 8d736be..566d085 100644 --- a/mix.lock +++ b/mix.lock @@ -2,16 +2,25 @@ "bandit": {:hex, :bandit, "1.5.7", "6856b1e1df4f2b0cb3df1377eab7891bec2da6a7fd69dc78594ad3e152363a50", [:mix], [{:hpax, "~> 1.0.0", [hex: :hpax, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:thousand_island, "~> 1.0", [hex: :thousand_island, repo: "hexpm", optional: false]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "f2dd92ae87d2cbea2fa9aa1652db157b6cba6c405cb44d4f6dd87abba41371cd"}, "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, "castore": {:hex, :castore, "1.0.9", "5cc77474afadf02c7c017823f460a17daa7908e991b0cc917febc90e466a375c", [:mix], [], "hexpm", "5ea956504f1ba6f2b4eb707061d8e17870de2bee95fb59d512872c2ef06925e7"}, + "certifi": {:hex, :certifi, "2.12.0", "2d1cca2ec95f59643862af91f001478c9863c2ac9cb6e2f89780bfd8de987329", [:rebar3], [], "hexpm", "ee68d85df22e554040cdb4be100f33873ac6051387baf6a8f6ce82272340ff1c"}, + "combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm", "1b1dbc1790073076580d0d1d64e42eae2366583e7aecd455d1215b0d16f2451b"}, "credo": {:hex, :credo, "1.7.8", "9722ba1681e973025908d542ec3d95db5f9c549251ba5b028e251ad8c24ab8c5", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "cb9e87cc64f152f3ed1c6e325e7b894dea8f5ef2e41123bd864e3cd5ceb44968"}, "csv": {:hex, :csv, "3.2.1", "6d401f1ed33acb2627682a9ab6021e96d33ca6c1c6bccc243d8f7e2197d032f5", [:mix], [], "hexpm", "8f55a0524923ae49e97ff2642122a2ce7c61e159e7fe1184670b2ce847aee6c8"}, "dns_cluster": {:hex, :dns_cluster, "0.1.3", "0bc20a2c88ed6cc494f2964075c359f8c2d00e1bf25518a6a6c7fd277c9b0c66", [:mix], [], "hexpm", "46cb7c4a1b3e52c7ad4cbe33ca5079fbde4840dedeafca2baf77996c2da1bc33"}, "esbuild": {:hex, :esbuild, "0.8.1", "0cbf919f0eccb136d2eeef0df49c4acf55336de864e63594adcea3814f3edf41", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "25fc876a67c13cb0a776e7b5d7974851556baeda2085296c14ab48555ea7560f"}, + "expo": {:hex, :expo, "1.1.0", "f7b9ed7fb5745ebe1eeedf3d6f29226c5dd52897ac67c0f8af62a07e661e5c75", [:mix], [], "hexpm", "fbadf93f4700fb44c331362177bdca9eeb8097e8b0ef525c9cc501cb9917c960"}, "file_system": {:hex, :file_system, "1.0.1", "79e8ceaddb0416f8b8cd02a0127bdbababe7bf4a23d2a395b983c1f8b3f73edd", [:mix], [], "hexpm", "4414d1f38863ddf9120720cd976fce5bdde8e91d8283353f0e31850fa89feb9e"}, "floki": {:hex, :floki, "0.36.2", "a7da0193538c93f937714a6704369711998a51a6164a222d710ebd54020aa7a3", [:mix], [], "hexpm", "a8766c0bc92f074e5cb36c4f9961982eda84c5d2b8e979ca67f5c268ec8ed580"}, + "gettext": {:hex, :gettext, "0.26.1", "38e14ea5dcf962d1fc9f361b63ea07c0ce715a8ef1f9e82d3dfb8e67e0416715", [:mix], [{:expo, "~> 0.5.1 or ~> 1.0", [hex: :expo, repo: "hexpm", optional: false]}], "hexpm", "01ce56f188b9dc28780a52783d6529ad2bc7124f9744e571e1ee4ea88bf08734"}, + "hackney": {:hex, :hackney, "1.20.1", "8d97aec62ddddd757d128bfd1df6c5861093419f8f7a4223823537bad5d064e2", [:rebar3], [{:certifi, "~> 2.12.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~> 6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~> 1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~> 1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "fe9094e5f1a2a2c0a7d10918fee36bfec0ec2a979994cff8cfe8058cd9af38e3"}, "heroicons": {:git, "https://github.com/tailwindlabs/heroicons.git", "88ab3a0d790e6a47404cba02800a6b25d2afae50", [tag: "v2.1.1", sparse: "optimized", depth: 1]}, "hpax": {:hex, :hpax, "1.0.0", "28dcf54509fe2152a3d040e4e3df5b265dcb6cb532029ecbacf4ce52caea3fd2", [:mix], [], "hexpm", "7f1314731d711e2ca5fdc7fd361296593fc2542570b3105595bb0bc6d0fad601"}, + "idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"}, "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, + "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, "mime": {:hex, :mime, "2.0.6", "8f18486773d9b15f95f4f4f1e39b710045fa1de891fada4516559967276e4dc2", [:mix], [], "hexpm", "c9945363a6b26d747389aac3643f8e0e09d30499a138ad64fe8fd1d13d9b153e"}, + "mimerl": {:hex, :mimerl, "1.3.0", "d0cd9fc04b9061f82490f6581e0128379830e78535e017f7780f37fea7545726", [:rebar3], [], "hexpm", "a1e15a50d1887217de95f0b9b0793e32853f7c258a5cd227650889b38839fe9d"}, + "parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"}, "phoenix": {:hex, :phoenix, "1.7.14", "a7d0b3f1bc95987044ddada111e77bd7f75646a08518942c72a8440278ae7825", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.7", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.5.3", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "c7859bc56cc5dfef19ecfc240775dae358cbaa530231118a9e014df392ace61a"}, "phoenix_html": {:hex, :phoenix_html, "4.1.1", "4c064fd3873d12ebb1388425a8f2a19348cef56e7289e1998e2d2fa758aa982e", [:mix], [], "hexpm", "f2f2df5a72bc9a2f510b21497fd7d2b86d932ec0598f0210fed4114adc546c6f"}, "phoenix_live_dashboard": {:hex, :phoenix_live_dashboard, "0.8.4", "4508e481f791ce62ec6a096e13b061387158cbeefacca68c6c1928e1305e23ed", [:mix], [{:ecto, "~> 3.6.2 or ~> 3.7", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_mysql_extras, "~> 0.5", [hex: :ecto_mysql_extras, repo: "hexpm", optional: true]}, {:ecto_psql_extras, "~> 0.7", [hex: :ecto_psql_extras, repo: "hexpm", optional: true]}, {:ecto_sqlite3_extras, "~> 1.1.7 or ~> 1.2.0", [hex: :ecto_sqlite3_extras, repo: "hexpm", optional: true]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:phoenix_live_view, "~> 0.19 or ~> 1.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}, {:telemetry_metrics, "~> 0.6 or ~> 1.0", [hex: :telemetry_metrics, repo: "hexpm", optional: false]}], "hexpm", "2984aae96994fbc5c61795a73b8fb58153b41ff934019cfb522343d2d3817d59"}, @@ -21,11 +30,15 @@ "phoenix_template": {:hex, :phoenix_template, "1.0.4", "e2092c132f3b5e5b2d49c96695342eb36d0ed514c5b252a77048d5969330d639", [:mix], [{:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}], "hexpm", "2c0c81f0e5c6753faf5cca2f229c9709919aba34fab866d3bc05060c9c444206"}, "plug": {:hex, :plug, "1.16.1", "40c74619c12f82736d2214557dedec2e9762029b2438d6d175c5074c933edc9d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "a13ff6b9006b03d7e33874945b2755253841b238c34071ed85b0e86057f8cddc"}, "plug_crypto": {:hex, :plug_crypto, "2.1.0", "f44309c2b06d249c27c8d3f65cfe08158ade08418cf540fd4f72d4d6863abb7b", [:mix], [], "hexpm", "131216a4b030b8f8ce0f26038bc4421ae60e4bb95c5cf5395e1421437824c4fa"}, + "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"}, "tailwind": {:hex, :tailwind, "0.2.3", "277f08145d407de49650d0a4685dc062174bdd1ae7731c5f1da86163a24dfcdb", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}], "hexpm", "8e45e7a34a676a7747d04f7913a96c770c85e6be810a1d7f91e713d3a3655b5d"}, "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, "telemetry_metrics": {:hex, :telemetry_metrics, "1.0.0", "29f5f84991ca98b8eb02fc208b2e6de7c95f8bb2294ef244a176675adc7775df", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "f23713b3847286a534e005126d4c959ebcca68ae9582118ce436b521d1d47d5d"}, "telemetry_poller": {:hex, :telemetry_poller, "1.1.0", "58fa7c216257291caaf8d05678c8d01bd45f4bdbc1286838a28c4bb62ef32999", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "9eb9d9cbfd81cbd7cdd24682f8711b6e2b691289a0de6826e58452f28c103c8f"}, "thousand_island": {:hex, :thousand_island, "1.3.5", "6022b6338f1635b3d32406ff98d68b843ba73b3aa95cfc27154223244f3a6ca5", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "2be6954916fdfe4756af3239fb6b6d75d0b8063b5df03ba76fd8a4c87849e180"}, + "timex": {:hex, :timex, "3.7.11", "bb95cb4eb1d06e27346325de506bcc6c30f9c6dea40d1ebe390b262fad1862d1", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.20", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 1.1", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "8b9024f7efbabaf9bd7aa04f65cf8dcd7c9818ca5737677c7b76acbc6a94d1aa"}, + "tzdata": {:hex, :tzdata, "1.1.2", "45e5f1fcf8729525ec27c65e163be5b3d247ab1702581a94674e008413eef50b", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "cec7b286e608371602318c414f344941d5eb0375e14cfdab605cca2fe66cba8b"}, + "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"}, "websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"}, "websock_adapter": {:hex, :websock_adapter, "0.5.7", "65fa74042530064ef0570b75b43f5c49bb8b235d6515671b3d250022cb8a1f9e", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "d0f478ee64deddfec64b800673fd6e0c8888b079d9f3444dd96d2a98383bdbd1"}, } diff --git a/test/mihainator/extractor_test.exs b/test/mihainator/extractor_test.exs index 50cbe2c..3c598f3 100644 --- a/test/mihainator/extractor_test.exs +++ b/test/mihainator/extractor_test.exs @@ -7,25 +7,52 @@ defmodule Mihainator.ExtractorTest do @test_file Path.join([File.cwd!(), "data", "test.csv"]) - test "should get a full year of sorted months" do - expected = [ - "2021-12", - "2022-01", - "2022-02", - "2022-03", - "2022-04", - "2022-05", - "2022-06", - "2022-07", - "2022-08", - "2022-09", - "2022-10", - "2022-11", - "2022-12" - ] - + setup_all do result = Extractor.extract(@test_file) - assert Map.keys(result) == expected + {:ok, result: result} + end + + test "should have a full year's list of days PLUS a month", state do + assert Enum.count(state[:result]) == 365 + 31 + end + + test "should subsequent list of days", state do + sorted_map_keys = + Map.keys(state[:result]) + |> Enum.sort() + + first_entry = Enum.at(sorted_map_keys, 0) |> Timex.parse!("{YYYY}-{0M}-{0D}") + second_entry = Enum.at(sorted_map_keys, 1) |> Timex.parse!("{YYYY}-{0M}-{0D}") + + assert NaiveDateTime.shift(first_entry, day: 1) == second_entry + end + + test "should find communication messages per day", state do + expected = [ + {"2024-07-07", + [ + %{date: ~D[2024-07-07], direction: "in"}, + %{date: ~D[2024-07-07], direction: "in"}, + %{date: ~D[2024-07-07], direction: "out"} + ]}, + {"2024-07-09", [%{date: ~D[2024-07-09], direction: "in"}]}, + {"2024-07-10", [%{date: ~D[2024-07-10], direction: "in"}]}, + {"2024-09-09", + [%{date: ~D[2024-09-09], direction: "out"}, %{date: ~D[2024-09-09], direction: "out"}]}, + {"2024-09-10", + [%{date: ~D[2024-09-10], direction: "out"}, %{date: ~D[2024-09-10], direction: "out"}]}, + {"2024-09-11", + [%{date: ~D[2024-09-11], direction: "in"}, %{date: ~D[2024-09-11], direction: "in"}]}, + {"2024-09-13", + [%{date: ~D[2024-09-13], direction: "in"}, %{date: ~D[2024-09-13], direction: "in"}]} + ] + + found_values = + state[:result] + |> Enum.filter(fn {_key, value} -> Enum.count(value) > 0 end) + |> Enum.sort() + + assert found_values == expected end end