;-*- coding: utf-8 -*-
;; emacs lisp. emacs 22.
;; started: 2008-01-03.
;; generate a report of wikipedia links.
;; this program traverse a given dir, visiting every html file, find links to Wikipedia in those files, collect them, and generate a nice html report of these links and the files they are from, then write it to a given file.
;; Xah Lee
;; ∑ http://xahlee.org/
;;;; user level globle parameters
(defconst dirpath (expand-file-name "../")
"The dir to process.")
(defconst root-path-char-count (length dirpath)
"A integer that counts how many chars to take off of a given file's full path, to result as a relative path for the link url. e.g. if file path is
“/Users/xah/web/emacs/emacs.html” , and root-path-char-count is 15, then its url in link would be “emacs/emacs.html”.
This number is not necessarily the length of dirpath. It can be smaller for flexibility.")
(defconst output-file
(concat (expand-file-name "../") "wikipedia_links.html")
"The file to save the generated report to. (existing file backedup as ~)")
;;;; loading package. global vars.
(setq tmpBufName (concat " xahtemp" (int-to-string (random t)) ))
(require 'find-lisp)
;; create hash table.
;; for each entry, the key is Wikipedia url, and value is a list of file paths.
;; like this: ("Wikipedia url" ("file1" "file2" ...))
(setq wpdata-hash (make-hash-table :test 'equal :size 4000))
;; a list version of the hash for sorting & report
(setq wpdata-list '())
;; header text for the generated HTML file
(setq header-text "
\n")
)
)
(defun wikipedia-url-to-link (url)
"Return the url as html link string.\n
Example:
http://en.wikipedia.org/wiki/Emacs
becomes
Emacs."
(require 'gnus-util)
(let ((linktext url))
(setq linktext (gnus-url-unhex-string linktext nil))
(setq linktext (concat (car (last (split-string linktext "/")))) )
(setq linktext (replace-regexp-in-string "&" "&" linktext))
(setq linktext (replace-regexp-in-string "_" " " linktext))
(concat "" linktext "" ) ))
(defun get-html-file-title (fname)
"Return FNAME tag's text.
Assumes that the file contains the string
“...”."
(let (x1 x2 linkText)
(with-temp-buffer
(goto-char (point-min))
(insert-file-contents fname nil nil nil t)
(setq x1 (search-forward ""))
(search-forward "")
(setq x2 (search-backward "<"))
(buffer-substring-no-properties x1 x2)
)
))
;;;; main
;; backup
(when (file-exists-p output-file)
(copy-file output-file (concat output-file "~") t)
(delete-file output-file)
)
;; get links from files, put to hash
(save-current-buffer
(set-buffer (get-buffer-create tmpBufName))
(let (filePaths)
;; get files ending in “.html” but not starting with “xx”.
(mapcar
(lambda (x) (when (not (string-match "/xx" x))
(setq filePaths (cons x filePaths) )
))
(find-lisp-find-files dirpath "\\.html$"))
(mapc 'add-wplink-to-hash filePaths)
)
(setq wpdata-list (hash-to-list wpdata-hash))
(setq wpdata-list
(sort wpdata-list
(lambda (a b) (string< (downcase (car a)) (downcase (car b))))
)))
;; print it out in a temp buffer and save to file
(switch-to-buffer tmpBufName)
(erase-buffer)
(insert header-text)
(insert "
This page contains all existing links from XahLee.org to Wikipedia, as of ")
(insert-date)
(insert ". There are a total of " (number-to-string (length wpdata-list)) " links.