RSSPAPER/src/rsspaper/feeds.clj

105 lines
4.5 KiB
Clojure
Raw Normal View History

2021-06-02 01:35:26 -04:00
(ns rsspaper.feeds
(:require
[rsspaper.config :refer [config]]
2021-07-30 19:22:18 -04:00
[clj-http.client :as client]
2021-07-06 18:43:45 -04:00
[clj-time.core :as t]
2021-07-01 15:59:50 -04:00
[clj-time.coerce :as c]
[clj-time.format :as f]
2021-06-02 01:35:26 -04:00
[remus :refer [parse-url]]))
2021-07-01 15:59:50 -04:00
(def date-custom-formatter (f/formatter "dd MM yyyy"))
(defn datetimes-to-unixtime
2021-07-02 15:29:55 -04:00
[articles]
(map (fn [article]
(assoc article :published-date (c/to-long (:published-date article)))) articles))
2021-07-01 15:59:50 -04:00
2021-07-06 18:43:45 -04:00
(defn filter-edition
[articles]
2021-12-16 13:35:14 -05:00
(let [daily (c/to-long (t/minus (t/now) (t/days 1)))
2021-07-18 05:53:06 -04:00
weekly (c/to-long (t/minus (t/now) (t/weeks 1)))]
2021-07-06 18:43:45 -04:00
(case (:edition config)
2021-12-16 13:35:14 -05:00
"daily" (filter (fn [article] (and (not (nil? (:published-date article))) (>= (:published-date article) daily))) articles)
2021-07-18 06:50:54 -04:00
"weekly" (filter (fn [article] (and (not (nil? (:published-date article))) (>= (:published-date article) weekly))) articles)
2021-07-30 19:22:18 -04:00
articles)))
2021-07-06 18:43:45 -04:00
2021-12-16 13:35:14 -05:00
(defn remove-future-editions
[articles]
(filter (fn [article] (and (not (nil? (:published-date article))) (< (:published-date article) (c/to-long (t/now))))) articles))
2021-07-01 15:59:50 -04:00
(defn add-datetimes-formatter
2021-07-02 15:29:55 -04:00
[articles]
(map (fn [article]
(assoc article :published-date-formatter (f/unparse date-custom-formatter (c/from-long (:published-date article))))) articles))
2021-07-01 15:59:50 -04:00
2021-07-02 15:29:55 -04:00
(defn zip-feeds-in-articles
[feeds]
;; Flat all articles
(reduce (fn [articles feed]
;; Add in every article, all information from feed
2021-10-28 15:35:26 -04:00
(concat articles (map (fn [article] (assoc
;; Add feed-url
2021-12-16 13:35:14 -05:00
(assoc article :feed
2021-10-28 15:35:26 -04:00
;; Add feed
2021-12-16 13:35:14 -05:00
(:feed (update-in feed [:feed] dissoc :entries))) :feed-url (:feed-url feed))) (get-in feed [:feed :entries])))) [] feeds))
2021-10-28 03:58:34 -04:00
(defn add-domain-to-relative-path
[url-complete url-relative]
;; Converts a relative path to a path with its domain.
2021-10-28 15:35:26 -04:00
;; /foo/boo/ -> http://example.com/foo/boo/
2021-10-28 03:58:34 -04:00
(let [is-relative (= (str (first url-relative)) "/")
url-elements (re-find #"(.+\/\/|www.)(.*?)\/.+" url-complete)
url-with-domain (if is-relative (str (get url-elements 1) (get url-elements 2) url-relative) url-relative)]
url-with-domain))
2021-07-02 15:29:55 -04:00
(defn add-cover-article
[articles]
;; Add cover to article search first image in description
;; Iterate every blog
(map (fn [article]
2021-09-04 03:37:48 -04:00
; User feedback
2021-10-28 15:35:26 -04:00
(prn (str "Looking for cover image for article > " (add-domain-to-relative-path (:feed-url article) (:link article))))
2021-09-04 03:37:48 -04:00
; Search cover image
2021-10-28 03:58:34 -04:00
(let [url-article (add-domain-to-relative-path (:feed-url article) (:link article))
2021-09-04 03:37:48 -04:00
html (:body (client/get url-article {:insecure? true :throw-exceptions false}))
url-og-image (second (re-find #"<meta[^>].*?property=\"og:image(?::url)?\".*?content=\"(.*?)\".*?>|<meta[^>].*?content=\"(.*?)\".*?property=\"og:image(?::url)?\".*?>" html))
2021-07-31 15:30:57 -04:00
url-first-image (second (re-find #"<main.*>[\s\S]+<img[^>]+src=\"([^\">]+)\"|id=['\"] ?main ?['\"]>[\s\S]+<img[^>]+src=\"([^\">]+)\"|class=['\"] ?main ?[\'\"]>[\s\S]+<img[^>]+src=\"([^\">]+)\"" html))
2021-09-04 03:37:48 -04:00
images [url-og-image url-first-image]
2021-12-16 13:35:14 -05:00
url-valid (first (remove nil? images))
2021-10-28 15:35:26 -04:00
url-final-image (add-domain-to-relative-path (:feed-url article) url-valid)]
2021-07-31 05:56:47 -04:00
(assoc article :cover url-final-image))) articles))
2021-07-02 15:29:55 -04:00
2021-07-18 06:50:54 -04:00
(defn order-published
[articles]
2021-07-30 19:22:18 -04:00
;; Order articles
2021-07-18 06:50:54 -04:00
(reverse (sort-by :published-date articles)))
2021-07-02 15:29:55 -04:00
(defn get-articles
2021-06-02 01:35:26 -04:00
[]
;; Get all feeds from config -> feeds
2021-07-18 06:50:54 -04:00
(->
2021-12-16 13:35:14 -05:00
(reduce
(fn [feeds feed-url]
2022-02-15 09:02:40 -05:00
(try
; Read feed
(let [feed (parse-url feed-url {:insecure? true})]
; User feedback
(prn (str "Reading RSS > " feed-url))
; Check is not null
(if-not (nil? feed)
; Add feed and add key feed original
(conj feeds (assoc feed :feed-url feed-url))
; Alert fail
(prn (str "Error with '" feed-url) "'")))
2022-02-16 02:14:56 -05:00
(catch Throwable e
2022-02-15 09:02:40 -05:00
(prn (str feed-url " has been ignored because of bad formatting."))
2022-02-16 02:14:56 -05:00
)))
2021-12-16 13:35:14 -05:00
[] (:feeds config))
zip-feeds-in-articles
datetimes-to-unixtime
filter-edition
remove-future-editions
order-published
add-cover-article
add-datetimes-formatter))