Test extractor
This commit is contained in:
@@ -3,76 +3,72 @@ defmodule Mihainator.Extractor do
|
||||
This module will take Threema's CSV history file and parse dates / first interaction of day
|
||||
"""
|
||||
|
||||
# TODO: unit test
|
||||
# TODO: refactor this method
|
||||
def extract(file) do
|
||||
raw_data = get_raw_data(file)
|
||||
|
||||
last_date =
|
||||
Enum.at(raw_data, -1)
|
||||
|> elem(0)
|
||||
Enum.at(raw_data, -1).date
|
||||
|> Date.end_of_month()
|
||||
|
||||
first_date =
|
||||
Date.shift(last_date, year: -1)
|
||||
|> Date.beginning_of_month()
|
||||
|
||||
date_range = Date.range(first_date, last_date)
|
||||
year_date_range = Date.range(first_date, last_date)
|
||||
|
||||
raw_data
|
||||
|> Enum.filter(fn {date, _} -> Date.after?(date, first_date) end)
|
||||
dates_to_check =
|
||||
raw_data
|
||||
|> Enum.filter(fn %{date: date} -> Date.after?(date, first_date) end)
|
||||
|> Enum.group_by(&get_group_key/1)
|
||||
|
||||
Map.new(year_date_range, fn x -> {x, []} end)
|
||||
|> get_normalized_interaction_data(dates_to_check)
|
||||
|> Map.new()
|
||||
|> get_normalized_interaction_data(date_range)
|
||||
|> Enum.group_by(fn {date, _} ->
|
||||
month =
|
||||
Integer.to_string(date.month)
|
||||
|> String.pad_leading(2, "0")
|
||||
|
||||
"#{date.year}-#{month}"
|
||||
end)
|
||||
end
|
||||
|
||||
defp get_raw_data(file) do
|
||||
File.stream!(file)
|
||||
|> CSV.decode!(separator: ?,, headers: true, escape_max_lines: 100)
|
||||
|> Stream.chunk_every(2, 1, :discard)
|
||||
|> Stream.filter(&next_day?/1)
|
||||
|> Stream.map(&extract_data/1)
|
||||
|> Enum.to_list()
|
||||
end
|
||||
|
||||
defp next_day?([%{"posted_at" => current}, %{"posted_at" => next}]) do
|
||||
current =
|
||||
current
|
||||
|> String.to_integer()
|
||||
|> DateTime.from_unix!(:millisecond)
|
||||
defp extract_data(%{"isoutbox" => is_out, "posted_at" => posted_at}) do
|
||||
is_out =
|
||||
case is_out do
|
||||
"1" -> "out"
|
||||
"0" -> "in"
|
||||
end
|
||||
|
||||
next =
|
||||
next
|
||||
|> String.to_integer()
|
||||
|> DateTime.from_unix!(:millisecond)
|
||||
|
||||
Date.diff(next, current) == 1
|
||||
end
|
||||
|
||||
defp extract_data([_, %{"isoutbox" => is_out, "posted_at" => posted_at}]) do
|
||||
is_out = is_out == "1"
|
||||
|
||||
posted_at =
|
||||
date =
|
||||
posted_at
|
||||
|> String.to_integer()
|
||||
|> DateTime.from_unix!(:millisecond)
|
||||
|> DateTime.to_date()
|
||||
|
||||
{posted_at, is_out}
|
||||
%{date: date, direction: is_out}
|
||||
end
|
||||
|
||||
defp get_normalized_interaction_data(date_to_postinfo, date_range) do
|
||||
Enum.map(date_range, fn date ->
|
||||
if Map.has_key?(date_to_postinfo, date) do
|
||||
{date, Map.get(date_to_postinfo, date)}
|
||||
defp get_group_key(%{date: date}) do
|
||||
month =
|
||||
Integer.to_string(date.month)
|
||||
|> String.pad_leading(2, "0")
|
||||
|
||||
day =
|
||||
Integer.to_string(date.day)
|
||||
|> String.pad_leading(2, "0")
|
||||
|
||||
"#{date.year}-#{month}-#{day}"
|
||||
end
|
||||
|
||||
defp get_normalized_interaction_data(date_range, dates_to_check) do
|
||||
Enum.map(date_range, fn {date, _} ->
|
||||
date = Date.to_string(date)
|
||||
|
||||
if Map.has_key?(dates_to_check, date) do
|
||||
{date, Map.get(dates_to_check, date)}
|
||||
else
|
||||
{date, nil}
|
||||
{date, []}
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user