I am new to reading pdf using C# where I have below function in my project. How to recognize text position in pdf file?
Here _dblRect has 151.0, 696.0, 400.0, 500.0
_strFileName is txt file name.
Can someone explain how this function works?
private string fcnTextFromCrop(string _strFileName, double[] _dblRect, CAcroPDDoc _docOriginal, int _intPage)
{
string str = "";
try
{
object jSObject = _docOriginal.GetJSObject();
System.Type type = jSObject.GetType();
object target = null;
object[] args = new object[] { _intPage };
target = type.InvokeMember("extractPages", BindingFlags.InvokeMethod | BindingFlags.Public | BindingFlags.Instance, null, jSObject, args);
object[] objArray2 = new object[] { "Crop", 0, 0, _dblRect };
type.InvokeMember("setPageBoxes", BindingFlags.InvokeMethod | BindingFlags.Public | BindingFlags.Instance, null, target, objArray2);
object[] objArray3 = new object[] { _strFileName, "com.adobe.acrobat.plain-text" };
type.InvokeMember("saveAs", BindingFlags.InvokeMethod | BindingFlags.Public | BindingFlags.Instance, null, target, objArray3);
object[] objArray4 = new object[] { true };
type.InvokeMember("closeDoc", BindingFlags.InvokeMethod | BindingFlags.Public | BindingFlags.Instance, null, target, objArray4);
using (StreamReader reader = new StreamReader(_strFileName))
{
str = reader.ReadToEnd();
reader.Close();
}
System.IO.File.Delete(_strFileName);
}
catch
{
str = "";
}
return str;
}