diff --git a/lib/mihainator/csv_parser.ex b/lib/mihainator/csv_parser.ex index a9a32d8..0503542 100644 --- a/lib/mihainator/csv_parser.ex +++ b/lib/mihainator/csv_parser.ex @@ -3,7 +3,35 @@ defmodule Mihainator.CSVParser do def start(file) do File.stream!(file) - |> Stream.map(&String.split(&1, ",")) - |> Enum.count() + |> CSV.decode!(separator: ?,, headers: true, escape_max_lines: 100) + |> Stream.chunk_every(2, 1, :discard) + |> Stream.filter(&next_day?/1) + |> Stream.map(&extract_data/1) + |> Enum.to_list() + end + + defp next_day?([%{"posted_at" => current}, %{"posted_at" => next}]) do + current = + current + |> String.to_integer() + |> DateTime.from_unix!(:millisecond) + + next = + next + |> String.to_integer() + |> DateTime.from_unix!(:millisecond) + + Date.diff(next, current) == 1 + end + + defp extract_data([_, %{"isoutbox" => is_out, "posted_at" => posted_at}]) do + is_out = is_out == "1" + + posted_at = + posted_at + |> String.to_integer() + |> DateTime.from_unix!(:millisecond) + + [is_out, posted_at] end end diff --git a/mix.exs b/mix.exs index 17c968a..8c7bf61 100644 --- a/mix.exs +++ b/mix.exs @@ -53,7 +53,8 @@ defmodule Mihainator.MixProject do {:jason, "~> 1.2"}, {:dns_cluster, "~> 0.1.1"}, {:bandit, "~> 1.5"}, - {:credo, "~> 1.7", only: [:dev, :test], runtime: false} + {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, + {:csv, "~> 3.2"} ] end diff --git a/mix.lock b/mix.lock index 3077ba7..8d736be 100644 --- a/mix.lock +++ b/mix.lock @@ -3,6 +3,7 @@ "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, "castore": {:hex, :castore, "1.0.9", "5cc77474afadf02c7c017823f460a17daa7908e991b0cc917febc90e466a375c", [:mix], [], "hexpm", "5ea956504f1ba6f2b4eb707061d8e17870de2bee95fb59d512872c2ef06925e7"}, "credo": {:hex, :credo, "1.7.8", "9722ba1681e973025908d542ec3d95db5f9c549251ba5b028e251ad8c24ab8c5", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "cb9e87cc64f152f3ed1c6e325e7b894dea8f5ef2e41123bd864e3cd5ceb44968"}, + "csv": {:hex, :csv, "3.2.1", "6d401f1ed33acb2627682a9ab6021e96d33ca6c1c6bccc243d8f7e2197d032f5", [:mix], [], "hexpm", "8f55a0524923ae49e97ff2642122a2ce7c61e159e7fe1184670b2ce847aee6c8"}, "dns_cluster": {:hex, :dns_cluster, "0.1.3", "0bc20a2c88ed6cc494f2964075c359f8c2d00e1bf25518a6a6c7fd277c9b0c66", [:mix], [], "hexpm", "46cb7c4a1b3e52c7ad4cbe33ca5079fbde4840dedeafca2baf77996c2da1bc33"}, "esbuild": {:hex, :esbuild, "0.8.1", "0cbf919f0eccb136d2eeef0df49c4acf55336de864e63594adcea3814f3edf41", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "25fc876a67c13cb0a776e7b5d7974851556baeda2085296c14ab48555ea7560f"}, "file_system": {:hex, :file_system, "1.0.1", "79e8ceaddb0416f8b8cd02a0127bdbababe7bf4a23d2a395b983c1f8b3f73edd", [:mix], [], "hexpm", "4414d1f38863ddf9120720cd976fce5bdde8e91d8283353f0e31850fa89feb9e"},