Xah Talk Show 2025-04-17 Ep644 emacs lisp, text processing tutorial, validate HTML annotation markup

xah talk show 2025-04-17 1b08f
xah talk show 2025-04-17 1b08f

;; -*- coding: utf-8; lexical-binding: t; -*-

;; for each html file in a dir, for each html marker
;; <span class="xn-marker240">duenna</span>
;; find the target
;; <b class="xn-target561">duenna</b>
;; with matching innertext.
;; they must always come in pairs.
;; report if not

;; travese dir
;; open each html file
;; find all the markup, put them hashtable or asso list
;; find all the target markup, put them hashtable or asso list
;; them pair them up, check if any is missing
;; if so, report e.g. print.

;; c:/Users/xah/web/xahlee_org/wordy/arabian_nights/an3.html

(require 'subr-x)

(defun my-process-file (FPath)
  "Process the file at path FPath
Created: 2025-04-17
Version: 2025-04-17"
  (let ((xmarker-table (make-hash-table :test 'equal))
        (xtarget-table (make-hash-table :test 'equal)))
    ;; open file for read
    (with-temp-buffer
      (insert-file-contents FPath)
      ;; find all
      ;; <span class="xn-marker240">ID</span>
      ;; put ID a hashtable, with file path
      ;; if already exist, report it

      ;; populate the xmarker-table
      (goto-char (point-min))
      (while
          (re-search-forward "<span class=\"xn-marker240\">\\([^<]+\\)</span>" nil t)
        (let (xid)
          (setq xid (match-string 1))
          (if (gethash xid xmarker-table)
              (progn
                (warn "duplicate marker %s in file %s" xid FPath))
            (progn
              (puthash xid t xmarker-table)))))

      ;; populate the xtarget-table
      (goto-char (point-min))
      (while
          (re-search-forward "<b class=\"xn-target561\">\\([^<]+\\)</b>" nil t)
        (let (xid)
          (setq xid (match-string 1))
          (if (gethash xid xtarget-table)
              (progn
                (warn "duplicate target %s in file %s" xid FPath))
            (progn
              (puthash xid t xtarget-table)))))

      (message "this is all marker id found [%s]" xmarker-table)

      ;; check if the 2 hashtable have the same keys
      (if (equal
           (sort (hash-table-keys xmarker-table))
           (sort (hash-table-keys xtarget-table)))
          (progn (message "fantastic"))
        (progn (warn "you got a problem, in file %s" FPath))))))

;; HHHH------------------------------


;; (my-process-file "c:/Users/xah/web/xahlee_org/wordy/arabian_nights/an3.html" )


;; HHHH------------------------------

;; list dir and and all subdirectory
;; of file extension .html
;; ignore dirs whose name start with dot, e.g. .git

(setq xfilelist
      (directory-files-recursively
       "c:/Users/xah/web/xahlee_org/wordy/"
       "\\.html$" nil (lambda (x) (not (string-match-p "/\\." x)))))

(mapc (lambda (x) (my-process-file x)) xfilelist)