105 lines
4.5 KiB
Clojure
105 lines
4.5 KiB
Clojure
(ns rsspaper.feeds
|
|
(:require
|
|
[rsspaper.config :refer [config]]
|
|
[clj-http.client :as client]
|
|
[clj-time.core :as t]
|
|
[clj-time.coerce :as c]
|
|
[clj-time.format :as f]
|
|
[remus :refer [parse-url]]))
|
|
|
|
(def date-custom-formatter (f/formatter "dd MM yyyy"))
|
|
|
|
(defn datetimes-to-unixtime
|
|
[articles]
|
|
(map (fn [article]
|
|
(assoc article :published-date (c/to-long (:published-date article)))) articles))
|
|
|
|
(defn filter-edition
|
|
[articles]
|
|
(let [daily (c/to-long (t/minus (t/now) (t/days 1)))
|
|
weekly (c/to-long (t/minus (t/now) (t/weeks 1)))]
|
|
(case (:edition config)
|
|
"daily" (filter (fn [article] (and (not (nil? (:published-date article))) (>= (:published-date article) daily))) articles)
|
|
"weekly" (filter (fn [article] (and (not (nil? (:published-date article))) (>= (:published-date article) weekly))) articles)
|
|
articles)))
|
|
|
|
(defn remove-future-editions
|
|
[articles]
|
|
(filter (fn [article] (and (not (nil? (:published-date article))) (< (:published-date article) (c/to-long (t/now))))) articles))
|
|
|
|
(defn add-datetimes-formatter
|
|
[articles]
|
|
(map (fn [article]
|
|
(assoc article :published-date-formatter (f/unparse date-custom-formatter (c/from-long (:published-date article))))) articles))
|
|
|
|
(defn zip-feeds-in-articles
|
|
[feeds]
|
|
;; Flat all articles
|
|
(reduce (fn [articles feed]
|
|
;; Add in every article, all information from feed
|
|
(concat articles (map (fn [article] (assoc
|
|
;; Add feed-url
|
|
(assoc article :feed
|
|
;; Add feed
|
|
(:feed (update-in feed [:feed] dissoc :entries))) :feed-url (:feed-url feed))) (get-in feed [:feed :entries])))) [] feeds))
|
|
|
|
(defn add-domain-to-relative-path
|
|
[url-complete url-relative]
|
|
;; Converts a relative path to a path with its domain.
|
|
;; /foo/boo/ -> http://example.com/foo/boo/
|
|
(let [is-relative (= (str (first url-relative)) "/")
|
|
url-elements (re-find #"(.+\/\/|www.)(.*?)\/.+" url-complete)
|
|
url-with-domain (if is-relative (str (get url-elements 1) (get url-elements 2) url-relative) url-relative)]
|
|
url-with-domain))
|
|
|
|
(defn add-cover-article
|
|
[articles]
|
|
;; Add cover to article search first image in description
|
|
;; Iterate every blog
|
|
(map (fn [article]
|
|
; User feedback
|
|
(prn (str "Looking for cover image for article > " (add-domain-to-relative-path (:feed-url article) (:link article))))
|
|
; Search cover image
|
|
(let [url-article (add-domain-to-relative-path (:feed-url article) (:link article))
|
|
html (:body (client/get url-article {:insecure? true :throw-exceptions false}))
|
|
url-og-image (second (re-find #"<meta[^>].*?property=\"og:image(?::url)?\".*?content=\"(.*?)\".*?>|<meta[^>].*?content=\"(.*?)\".*?property=\"og:image(?::url)?\".*?>" html))
|
|
url-first-image (second (re-find #"<main.*>[\s\S]+<img[^>]+src=\"([^\">]+)\"|id=['\"] ?main ?['\"]>[\s\S]+<img[^>]+src=\"([^\">]+)\"|class=['\"] ?main ?[\'\"]>[\s\S]+<img[^>]+src=\"([^\">]+)\"" html))
|
|
images [url-og-image url-first-image]
|
|
url-valid (first (remove nil? images))
|
|
url-final-image (add-domain-to-relative-path (:feed-url article) url-valid)]
|
|
(assoc article :cover url-final-image))) articles))
|
|
|
|
(defn order-published
|
|
[articles]
|
|
;; Order articles
|
|
(reverse (sort-by :published-date articles)))
|
|
|
|
(defn get-articles
|
|
[]
|
|
;; Get all feeds from config -> feeds
|
|
(->
|
|
(reduce
|
|
(fn [feeds feed-url]
|
|
(try
|
|
; Read feed
|
|
(let [feed (parse-url feed-url {:insecure? true})]
|
|
; User feedback
|
|
(prn (str "Reading RSS > " feed-url))
|
|
; Check is not null
|
|
(if-not (nil? feed)
|
|
; Add feed and add key feed original
|
|
(conj feeds (assoc feed :feed-url feed-url))
|
|
; Alert fail
|
|
(prn (str "Error with '" feed-url) "'")))
|
|
(catch Throwable e
|
|
(prn (str feed-url " has been ignored because of bad formatting."))
|
|
)))
|
|
[] (:feeds config))
|
|
zip-feeds-in-articles
|
|
datetimes-to-unixtime
|
|
filter-edition
|
|
remove-future-editions
|
|
order-published
|
|
add-cover-article
|
|
add-datetimes-formatter))
|