如何在C#中使用tesseract 3.02训练数据?

我可以通过命令提示符使用新训练的tessedata(版本3.02)获得正确的OCR输出,但我希望在带有DLL ref的C#代码中使用相同的输出。我已尝试使用tessnet2_32.dll引用但是它抛出exception因此如何使用或通过C#代码使用DLL引用访问tesseract 3.02版本训练的tessedata?

这是针对Tesseract 2.04的。 您需要一个兼容3.02版本的.NET 包装器 。

 To access or use tesseract 3.02 trained data we have to create separate wrapper class like below. using System; using System.IO; using System.Diagnostics; using System.Drawing; ///  /// Summary description for TesseractOCR ///  /// namespace tesseractThree { public class TesseractOCR { public TesseractOCR() { // // TODO: Add constructor logic here // } private string commandpath; private string outpath; private string tmppath; public TesseractOCR(string commandpath) { this.commandpath = commandpath; tmppath = System.Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData) + @"\out.tif"; outpath = System.Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData) + @"\out.txt"; } public string analyze(string filename,string lang,bool noLine) { string args = filename + " " + outpath.Replace(".txt", ""); ProcessStartInfo startinfo; if (noLine == true) { startinfo = new ProcessStartInfo(commandpath, args + " -l " + lang + " -psm 6"); } else { startinfo = new ProcessStartInfo(commandpath, args + " -l " + lang); } startinfo.CreateNoWindow = true; startinfo.UseShellExecute = false; Process.Start(startinfo).WaitForExit(); string ret = ""; using (StreamReader r = new StreamReader(outpath)) { string content = r.ReadToEnd(); ret = content; } File.Delete(outpath); return ret; } public string OCRFromBitmap(Bitmap bmp,string lang,bool noLine) { bmp.Save(tmppath, System.Drawing.Imaging.ImageFormat.Tiff); string ret = analyze(tmppath,lang,noLine); File.Delete(tmppath); return ret; } /* public string OCRFromFile(string filename) { return analyze(filename); }*/ } } //Usage of this class string lang = "enc"; Bitmap b = new Bitmap(@"D:\Image\enc.test_font.exp0.tif"); TesseractOCR ocr = new TesseractOCR(@"C:\Program Files\Tesseract-OCR\tesseract.exe"); string result = ocr.OCRFromBitmap(b, lang,true); Label1.Text = result; OR Refer below link for more details. https://gist.github.com/yatt/915443 

使用tesseractengine3.dll我们可以使用tesseract v3.02训练数据,如下所示。

 using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using tesseract; using System.Drawing; using System.IO; public enum TesseractEngineMode : int { ///  /// Run Tesseract only - fastest ///  TESSERACT_ONLY = 0, ///  /// Run Cube only - better accuracy, but slower ///  CUBE_ONLY = 1, ///  /// Run both and combine results - best accuracy ///  TESSERACT_CUBE_COMBINED = 2, ///  /// Specify this mode when calling init_*(), /// to indicate that any of the above modes /// should be automatically inferred from the /// variables in the language-specific config, /// command-line configs, or if not specified /// in any of the above should be set to the /// default OEM_TESSERACT_ONLY. ///  DEFAULT = 3 } public enum TesseractPageSegMode : int { ///  /// Fully automatic page segmentation ///  PSM_AUTO = 0, ///  /// Assume a single column of text of variable sizes ///  PSM_SINGLE_COLUMN = 1, ///  /// Assume a single uniform block of text (Default) ///  PSM_SINGLE_BLOCK = 2, ///  /// Treat the image as a single text line ///  PSM_SINGLE_LINE = 3, ///  /// Treat the image as a single word ///  PSM_SINGLE_WORD = 4, ///  /// Treat the image as a single character ///  PSM_SINGLE_CHAR = 5 } public partial class importDLL : System.Web.UI.Page { private TesseractProcessor m_tesseract = null; //private const string m_path = @"..\..\data\"; private const string m_path = @"D:\tessdata-3.02\"; private const string m_lang = "eng"; protected void Page_Load(object sender, EventArgs e) { var image = System.Drawing.Image.FromFile(@"D:\Image\Capture1T.tif"); m_tesseract = new TesseractProcessor(); bool succeed = m_tesseract.Init(m_path, m_lang, (int)TesseractEngineMode.DEFAULT); if (!succeed) { } m_tesseract.SetVariable("tessedit_pageseg_mode", ((int)TesseractPageSegMode.PSM_SINGLE_LINE).ToString()); m_tesseract.Clear(); m_tesseract.ClearAdaptiveClassifier(); string outValue= m_tesseract.Apply(image); Response.Write(outValue); } }