Test extractor

This commit is contained in:
2024-10-25 21:14:59 +02:00
parent 040160c03a
commit a3af9b7817
4 changed files with 96 additions and 59 deletions

View File

@@ -3,76 +3,72 @@ defmodule Mihainator.Extractor do
This module will take Threema's CSV history file and parse dates / first interaction of day
"""
# TODO: unit test
# TODO: refactor this method
def extract(file) do
raw_data = get_raw_data(file)
last_date =
Enum.at(raw_data, -1)
|> elem(0)
Enum.at(raw_data, -1).date
|> Date.end_of_month()
first_date =
Date.shift(last_date, year: -1)
|> Date.beginning_of_month()
date_range = Date.range(first_date, last_date)
year_date_range = Date.range(first_date, last_date)
raw_data
|> Enum.filter(fn {date, _} -> Date.after?(date, first_date) end)
dates_to_check =
raw_data
|> Enum.filter(fn %{date: date} -> Date.after?(date, first_date) end)
|> Enum.group_by(&get_group_key/1)
Map.new(year_date_range, fn x -> {x, []} end)
|> get_normalized_interaction_data(dates_to_check)
|> Map.new()
|> get_normalized_interaction_data(date_range)
|> Enum.group_by(fn {date, _} ->
month =
Integer.to_string(date.month)
|> String.pad_leading(2, "0")
"#{date.year}-#{month}"
end)
end
defp get_raw_data(file) do
File.stream!(file)
|> CSV.decode!(separator: ?,, headers: true, escape_max_lines: 100)
|> Stream.chunk_every(2, 1, :discard)
|> Stream.filter(&next_day?/1)
|> Stream.map(&extract_data/1)
|> Enum.to_list()
end
defp next_day?([%{"posted_at" => current}, %{"posted_at" => next}]) do
current =
current
|> String.to_integer()
|> DateTime.from_unix!(:millisecond)
defp extract_data(%{"isoutbox" => is_out, "posted_at" => posted_at}) do
is_out =
case is_out do
"1" -> "out"
"0" -> "in"
end
next =
next
|> String.to_integer()
|> DateTime.from_unix!(:millisecond)
Date.diff(next, current) == 1
end
defp extract_data([_, %{"isoutbox" => is_out, "posted_at" => posted_at}]) do
is_out = is_out == "1"
posted_at =
date =
posted_at
|> String.to_integer()
|> DateTime.from_unix!(:millisecond)
|> DateTime.to_date()
{posted_at, is_out}
%{date: date, direction: is_out}
end
defp get_normalized_interaction_data(date_to_postinfo, date_range) do
Enum.map(date_range, fn date ->
if Map.has_key?(date_to_postinfo, date) do
{date, Map.get(date_to_postinfo, date)}
defp get_group_key(%{date: date}) do
month =
Integer.to_string(date.month)
|> String.pad_leading(2, "0")
day =
Integer.to_string(date.day)
|> String.pad_leading(2, "0")
"#{date.year}-#{month}-#{day}"
end
defp get_normalized_interaction_data(date_range, dates_to_check) do
Enum.map(date_range, fn {date, _} ->
date = Date.to_string(date)
if Map.has_key?(dates_to_check, date) do
{date, Map.get(dates_to_check, date)}
else
{date, nil}
{date, []}
end
end)
end