安装插件

1
npm install tesseract.js

保存文件为txt文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
const { createWorker } = require('tesseract.js');
const fs = require('fs');

async function ocrImage(imagePath) {
const worker = createWorker();

await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');

const { data: { text } } = await worker.recognize(imagePath);
await worker.terminate();

return text;
}

function saveToTxt(text, outputPath) {
fs.writeFileSync(outputPath, text);
console.log('OCR 结果已保存到:', outputPath);
}

// 要进行 OCR 的图片路径
const imagePath = 'path/to/image.jpg';

// 保存结果的 txt 文件路径
const outputPath = 'path/to/output.txt';

ocrImage(imagePath)
.then(text => saveToTxt(text, outputPath))
.catch(error => console.log('发生错误:', error));