种植日期:
上次照料:
2026-04-08
Douban 元数据抓取
文献管理 的一种实践。
(defun my/set-book-metadata-by-douban (search-text)
(interactive (list (read-string "Query: ")))
(request
"https://search.douban.com/book/subject_search"
:headers (list (cons "User-Agent" "Mozilla/5.0 X11; Linux x86_64; rv:149.0) Gecko/20100101 Firefox/149.0"))
:params `(("cat" . "1001") ("search_text" . ,search-text))
:sync t
:success (cl-function
(lambda (&key data &allow-other-keys)
(when (string-match "window\\.__DATA__ = \\({.+?}\\);$" data)
(let* ((json-str (match-string 1 data))
(items (plist-get (json-parse-string json-str :object-type 'plist) :items))
(sorted-items (sort items
(lambda (a b)
(let ((r1 (or (thread-first a (plist-get :rating) (plist-get :value)) 0))
(r2 (or (thread-first b (plist-get :rating) (plist-get :value)) 0)))
(> r1 r2)))))
(candidates (mapcar
(lambda (cand)
(let ((rating (thread-first cand (plist-get :rating) (plist-get :value))))
(propertize (string-join
(list (or (plist-get cand :title) "No Title")
(or (plist-get cand :abstract) "")
(if rating (number-to-string rating) "N/A"))
" | ")
:url (plist-get cand :url))))
sorted-items))
(book-url (consult--read
candidates
:sort nil
:lookup (apply-partially #'consult--lookup-prop :url)))
(book-info (my/douban--get-book-info book-url)))
(if (derived-mode-p 'org-mode)
(progn
(org-back-to-heading t)
(org-set-property "ISBN" (cdr (assoc "ISBN" book-info)))
(org-set-property "AUTHOR" (cdr (assoc "作者" book-info)))
(org-set-property "PUBLISH_DATE" (cdr (assoc "出版年" book-info)))
(org-set-property "PAGES" (cdr (assoc "页数" book-info)))
(org-set-property "DOUBAN_SUBJECT" book-url))
(prin1 (cons (cons "book-url" book-url) book-info)))
))))))
(defun my/douban--get-book-info (book-url)
(let ((html-data))
(request
book-url
:headers (list (cons "User-Agent" "Mozilla/5.0 X11; Linux x86_64; rv:149.0) Gecko/20100101 Firefox/149.0"))
:sync t
:success (cl-function
(lambda (&key data &allow-other-keys)
(setq html-data data))))
(let* ((info-node
(with-temp-buffer
(insert html-data)
(car (dom-by-id (libxml-parse-html-region (point-min) (point-max)) "^info$"))))
(children (dom-children info-node))
(results '())
(i 0))
(while (< i (length children))
(let* ((node (nth i children))
(pl-node (when (listp node)
(if (equal (dom-attr node 'class) "pl")
node
(car (dom-by-class node "pl"))))))
(when pl-node
(let* ((key (string-remove-suffix ":" (string-trim (dom-texts pl-node))))
(full-node-text (dom-texts node))
(internal-val (string-trim
(replace-regexp-in-string
(concat "^.*" (regexp-quote key) "[ \t\n\r::]*") ""
full-node-text)))
(value nil))
(if (not (string-empty-p internal-val))
(setq value internal-val)
(let ((next-node (nth (1+ i) children)))
(when next-node
(setq value (string-trim (if (stringp next-node) next-node (dom-texts next-node)) "[ \t\n\r::\u00a0]+"))
(setq i (1+ i)))))
(when (and key (not (string-empty-p value)))
(push (cons key value) results)))))
(setq i (1+ i)))
results)))