Xah Talk Show 2023-01-09 Golang, Python, Emacs Lisp, WolframLang, Speed Comparison for Find/Replace

Xah Talk Show 2023-01-09 Golang, Python, Emacs Lisp, WolframLang, Speed Comparison for Find/Replace

problem spec

text pairs that we want to do find and replace.
Case matters

"Previous:" → "prev_ykzFQ"
"that" → "that_hM8xd"
"lambda" → "lam_cnj3G"
"number" → "num_j2CWg"
# 2023-01-09 generate test dir
# file name: x_gen_test_dir.ps1

$fromDir = "c:/Users/xah/web/xahlee_info/emacs/emacs_manual/elisp/"
$dxx = "c:/Users/xah/xx/"

if (Test-Path $dxx) { rm -Recurse $dxx }
$null = mkdir $dxx
cp -Recurse ($fromDir + "*") $dxx
# 2023-01-09 generate one big test file
# file name: x_gen_big_file.ps1

$fromDir = "c:/Users/xah/web/xahlee_info/emacs/emacs_manual/elisp/"
$dxx = "c:/Users/xah/xx/"
$bigName = "xxbig.html"

if (Test-Path $dxx) { rm -Recurse $dxx }
$null = mkdir $dxx

# join all files in a dir into single file
Get-Content ($fromDir + "*") -Filter *html -Raw | Set-Content -NoNewline ($dxx + $bigName)

Measure-Command { emacs --script c:/Users/xah/xelisp.el }
Measure-Command { wolframscript.exe -file c:/Users/xah/xWolframLang.wl }
Measure-Command { python.exe c:/Users/xah/xpython3.py }
Measure-Command { ruby.exe c:/Users/xah/xruby.rb }
Measure-Command { go run c:/Users/xah/xgo.go }
Measure-Command { c:/Users/xah/xgo.exe }

Timing Result

first run result, single file
(first run may include compile time, or loading executable to memory)

python3     : 0.17
go_compiled : 0.19
elisp       : 0.39
go          : 0.6
ruby        : 1.28
WolframLang : 2.1

second run result, single file

go compiled : 0.04
python3     : 0.17
ruby        : 0.18
elisp       : 0.29
go          : 0.57
WolframLang : 1.8

first run result, 1k files

go compiled : 3.5
python3     : 3.2
go          : 3.7
ruby        : 3.9
elisp       : 5.4
WolframLang : 5.7

second run result, 1k files

go compiled : 0.5
ruby        : 0.6
go          : 1.0
elisp       : 1.8
WolframLang : 2.6
python3     : 3.0

WolframLang

(* 2023-01-09 find replace mulitple pairs in a dir *)
(* code has a bug, of adding line return to the file *)

inputDir = "c:/Users/xah/xx/";

xpairs = {
 "Previous:" -> "prev_ykzFQ",
 "that" -> "that_hM8xd",
 "lambda" -> "lam_cnj3G",
 "number" -> "num_j2CWg"
 };

Scan[
Module[{xold = ReadString[#], xnew},
  xnew = StringReplace[xold, xpairs];
   If[xold =!= xnew, WriteString[#, xnew]];
 ] &
,
FileNames["*html", inputDir, {1}] ]

Emacs Lisp

 ;; 2023-01-09 find and replace string of all files in a dir
(setq xinputDir "c:/Users/xah/xx/")

(setq xFindReplacePairs
 [
  ["Previous:" "prev_ykzFQ"]
  ["that" "that_hM8xd"]
  ["lambda" "lam_cnj3G"]
  ["number" "num_j2CWg"]
  ])

(defun my-do-file (fPath)
  "Process the file at path FPATH"
  (let (($changed-p nil))
    (with-temp-buffer
      (insert-file-contents fPath)
      (mapc
       (lambda (x)
         (let (($find (aref x 0)) ($rep (aref x 1)))
           (goto-char 1)
           (when (search-forward $find nil t)
             (progn (replace-match $rep t t) (setq $changed-p t)))
           (while (search-forward $find nil t)
             (replace-match $rep t t))))
       xFindReplacePairs)

      (when $changed-p (write-region (point-min) (point-max) fPath)))))

(mapc #'my-do-file
      (directory-files xinputDir t "html$" t))

python

# Python 3
# find replace mulitple pairs in a dir. 2023-01-08
import sys, os, re

xinput = "c:/Users/xah/xx/"

xpairs = [
    ("Previous:", "prev_ykzFQ"),
    ("that", "that_hM8xd"),
    ("lambda", "lam_cnj3G"),
    ("number", "num_j2CWg"),
]

def doFile(xpath):
    "Replace find/replace in xpairs in xpath"
    inputFile = open(xpath, "r", encoding="utf-8")
    xMeat = inputFile.read()
    inputFile.close()

    xNew = xMeat
    for apair in xpairs:
        xNew = xNew.replace(apair[0], apair[1])

    if xNew != xMeat:
        xoutFile = open(xpath, "w", encoding="utf-8")
        xoutFile.write(xNew)
        xoutFile.close()

for dirPath, subdirList, fileList in os.walk(xinput):
    for fName in fileList:
        if re.search(r"\.html$", fName, re.U):
            doFile(dirPath + os.sep + fName)

Ruby

# 2023-01-10
# find replace multiple pairs in a dir.
# code by george

input_dir = "c:/Users/xah/xx/"

replacements = [
  ["Previous:", "prev_ykzFQ"],
  ["that", "that_hM8xd"],
  ["lambda", "lam_cnj3G"],
  ["number", "num_j2CWg"],
]

replace = lambda do |file|
  content = File.read(file)
  result = replacements.reduce(content) { |string, (pattern, replacement)| string.gsub(pattern, replacement) }
  File.write(file, result) if content != result
end

Dir.glob(File.join(input_dir, "**", "*.html"), &replace)

golang

// 2023-01-09 find replace mulitple pairs in a dir
package main

import (
	"fmt"
	"io/fs"
	"os"
	"path/filepath"
	"regexp"
	"strings"
)

var inDir = "c:/Users/xah/xx/"
var fnameRegex = `\.html$`

type frPair struct {
	findStr    string
	replaceStr string
}

// frPairs is a slice of frPair struct.
var frPairs = []frPair{
	frPair{findStr: `Previous:`, replaceStr: `prev_ykzFQ`},
	frPair{findStr: `that`, replaceStr: `that_hM8xd`},
	frPair{findStr: `lambda`, replaceStr: `lam_cnj3G`},
	frPair{findStr: `number`, replaceStr: `num_j2CWg`},
}

func doFile(path string) error {
	contentBytes, er := os.ReadFile(path)
	if er != nil {
		fmt.Printf("processing %v\n", path)
		panic(er)
	}
	var content = string(contentBytes)
	var changed = false
	for _, pair := range frPairs {
		var found = strings.Index(content, pair.findStr)
		if found != -1 {
			content = strings.Replace(content, pair.findStr, pair.replaceStr, -1)
			changed = true
		}
	}
	if changed {
		err2 := os.WriteFile(path, []byte(content), 0644)
		if err2 != nil {
			panic("write file problem")
		}
	}
	return nil
}

var pWalker = func(xpath string, xinfo fs.DirEntry, xerr error) error {
	if xerr != nil {
		fmt.Printf("error [%v] at a path [%q]\n", xerr, xpath)
		return xerr
	}
	{
		var x, err = regexp.MatchString(fnameRegex, filepath.Base(xpath))
		if err != nil {
			panic("stupid MatchString error 59767")
		}
		if x {
			doFile(xpath)
		}
	}
	return nil
}

func main() {
	err := filepath.WalkDir(inDir, pWalker)
	if err != nil {
		fmt.Printf("error walking the path %q: %v\n", inDir, err)
	}

}

Find/Replace Speed Comparison