123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698 |
- /*
- * Copyright 2011, Google Inc.
- * Copyright 2011, Robert Theis
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include <stdio.h>
- #include <malloc.h>
- #include "android/bitmap.h"
- #include "common.h"
- #include "baseapi.h"
- #include "ocrclass.h"
- #include "allheaders.h"
- #include "renderer.h"
- static jmethodID method_onProgressValues;
- struct native_data_t {
- tesseract::TessBaseAPI api;
- PIX *pix;
- void *data;
- bool debug;
- Box* currentTextBox = NULL;
- l_int32 lastProgress;
- bool cancel_ocr;
- JNIEnv *cachedEnv;
- jobject* cachedObject;
- bool isStateValid() {
- if (cancel_ocr == false && cachedEnv != NULL && cachedObject != NULL) {
- return true;
- } else {
- LOGI("state is cancelled");
- return false;
- }
- }
- void setTextBoundaries(l_uint32 x, l_uint32 y, l_uint32 width, l_uint32 height) {
- boxSetGeometry(currentTextBox, x, y, width, height);
- }
- void initStateVariables(JNIEnv* env, jobject *object) {
- cancel_ocr = false;
- cachedEnv = env;
- cachedObject = object;
- lastProgress = 0;
- }
- void resetStateVariables() {
- cancel_ocr = false;
- cachedEnv = NULL;
- cachedObject = NULL;
- lastProgress = 0;
- boxSetGeometry(currentTextBox, 0, 0, 0, 0);
- }
- native_data_t() {
- currentTextBox = boxCreate(0, 0, 0, 0);
- lastProgress = 0;
- pix = NULL;
- data = NULL;
- debug = false;
- cachedEnv = NULL;
- cachedObject = NULL;
- cancel_ocr = false;
- }
- ~native_data_t() {
- boxDestroy(¤tTextBox);
- }
- };
- /**
- * Callback for Tesseract's monitor to cancel recognition.
- */
- bool cancelFunc(void* cancel_this, int words) {
- native_data_t *nat = (native_data_t*)cancel_this;
- return nat->cancel_ocr;
- }
- /**
- * Callback for Tesseract's monitor to update progress.
- */
- bool progressJavaCallback(void* progress_this, int progress, int left, int right,
- int top, int bottom) {
- native_data_t *nat = (native_data_t*)progress_this;
- if (nat->isStateValid() && nat->currentTextBox != NULL) {
- if (progress > nat->lastProgress || left != 0 || right != 0 || top != 0 || bottom != 0) {
- int x, y, width, height;
- boxGetGeometry(nat->currentTextBox, &x, &y, &width, &height);
- nat->cachedEnv->CallVoidMethod(*(nat->cachedObject), method_onProgressValues, progress,
- (jint) left, (jint) right, (jint) top, (jint) bottom,
- (jint) x, (jint) (x + width), (jint) (y + height), (jint) y);
- nat->lastProgress = progress;
- }
- }
- return true;
- }
- #ifdef __cplusplus
- extern "C" {
- #endif
- jint JNI_OnLoad(JavaVM* vm, void* reserved) {
- JNIEnv *env;
- if (vm->GetEnv((void**) &env, JNI_VERSION_1_6) != JNI_OK) {
- LOGE("Failed to get the environment using GetEnv()");
- return -1;
- }
- return JNI_VERSION_1_6;
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeClassInit(JNIEnv* env,
- jclass clazz) {
- method_onProgressValues = env->GetMethodID(clazz, "onProgressValues", "(IIIIIIIII)V");
- }
- jlong Java_com_googlecode_tesseract_android_TessBaseAPI_nativeConstruct(JNIEnv* env,
- jobject object) {
- native_data_t *nat = new native_data_t;
- if (nat == NULL) {
- LOGE("%s: out of memory!", __FUNCTION__);
- return 0;
- }
- return (jlong) nat;
- }
- jboolean Java_com_googlecode_tesseract_android_TessBaseAPI_nativeInit(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jstring dir,
- jstring lang) {
- native_data_t *nat = (native_data_t*) mNativeData;
- const char *c_dir = env->GetStringUTFChars(dir, NULL);
- const char *c_lang = env->GetStringUTFChars(lang, NULL);
- jboolean res = JNI_TRUE;
- if (nat->api.Init(c_dir, c_lang)) {
- LOGE("Could not initialize Tesseract API with language=%s!", c_lang);
- res = JNI_FALSE;
- } else {
- LOGI("Initialized Tesseract API with language=%s", c_lang);
- }
- env->ReleaseStringUTFChars(dir, c_dir);
- env->ReleaseStringUTFChars(lang, c_lang);
- return res;
- }
- jboolean Java_com_googlecode_tesseract_android_TessBaseAPI_nativeInitOem(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jstring dir,
- jstring lang,
- jint mode) {
- native_data_t *nat = (native_data_t*) mNativeData;
- const char *c_dir = env->GetStringUTFChars(dir, NULL);
- const char *c_lang = env->GetStringUTFChars(lang, NULL);
- jboolean res = JNI_TRUE;
- if (nat->api.Init(c_dir, c_lang, (tesseract::OcrEngineMode) mode)) {
- LOGE("Could not initialize Tesseract API with language=%s!", c_lang);
- res = JNI_FALSE;
- } else {
- LOGI("Initialized Tesseract API with language=%s", c_lang);
- }
- env->ReleaseStringUTFChars(dir, c_dir);
- env->ReleaseStringUTFChars(lang, c_lang);
- return res;
- }
- jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetInitLanguagesAsString(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- const char *text = nat->api.GetInitLanguagesAsString();
- jstring result = env->NewStringUTF(text);
- return result;
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetImageBytes(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jbyteArray data,
- jint width,
- jint height,
- jint bpp,
- jint bpl) {
- jbyte *data_array = env->GetByteArrayElements(data, NULL);
- int count = env->GetArrayLength(data);
- unsigned char* imagedata = (unsigned char *) malloc(count * sizeof(unsigned char));
- // This is painfully slow, but necessary because we don't know
- // how many bits the JVM might be using to represent a byte
- for (int i = 0; i < count; i++) {
- imagedata[i] = (unsigned char) data_array[i];
- }
- env->ReleaseByteArrayElements(data, data_array, JNI_ABORT);
- native_data_t *nat = (native_data_t*) mNativeData;
- nat->api.SetImage(imagedata, (int) width, (int) height, (int) bpp, (int) bpl);
- // Since Tesseract doesn't take ownership of the memory, we keep a pointer in the native
- // code struct. We need to free that pointer when we release our instance of Tesseract or
- // attempt to set a new image using one of the nativeSet* methods.
- if (nat->data != NULL)
- free(nat->data);
- else if (nat->pix != NULL)
- pixDestroy(&nat->pix);
- nat->data = imagedata;
- nat->pix = NULL;
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetImagePix(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jlong nativePix) {
- PIX *pixs = (PIX *) nativePix;
- PIX *pixd = pixClone(pixs);
- native_data_t *nat = (native_data_t*) mNativeData;
- if (pixd) {
- l_int32 width = pixGetWidth(pixd);
- l_int32 height = pixGetHeight(pixd);
- nat->setTextBoundaries(0, 0, width, height);
- }
- nat->api.SetImage(pixd);
- // Since Tesseract doesn't take ownership of the memory, we keep a pointer in the native
- // code struct. We need to free that pointer when we release our instance of Tesseract or
- // attempt to set a new image using one of the nativeSet* methods.
- if (nat->data != NULL)
- free(nat->data);
- else if (nat->pix != NULL)
- pixDestroy(&nat->pix);
- nat->data = NULL;
- nat->pix = pixd;
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetRectangle(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jint left,
- jint top,
- jint width,
- jint height) {
- native_data_t *nat = (native_data_t*) mNativeData;
- nat->setTextBoundaries(left, top, width, height);
- nat->api.SetRectangle(left, top, width, height);
- }
- jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetUTF8Text(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- nat->initStateVariables(env, &thiz);
- ETEXT_DESC monitor;
- monitor.progress_callback = progressJavaCallback;
- monitor.cancel = cancelFunc;
- monitor.cancel_this = nat;
- monitor.progress_this = nat;
- char *text = nat->api.GetUTF8Text();
- jstring result = env->NewStringUTF(text);
- free(text);
- nat->resetStateVariables();
- return result;
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeStop(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- // Stop by setting a flag that's used by the monitor
- nat->resetStateVariables();
- nat->cancel_ocr = true;
- }
- jint Java_com_googlecode_tesseract_android_TessBaseAPI_nativeMeanConfidence(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- return (jint) nat->api.MeanTextConf();
- }
- jintArray Java_com_googlecode_tesseract_android_TessBaseAPI_nativeWordConfidences(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- int *confs = nat->api.AllWordConfidences();
- if (confs == NULL) {
- LOGE("Could not get word-confidence values!");
- return NULL;
- }
- int len, *trav;
- for (len = 0, trav = confs; *trav != -1; trav++, len++)
- ;
- LOG_ASSERT((confs != NULL), "Confidence array has %d elements", len);
- jintArray ret = env->NewIntArray(len);
- LOG_ASSERT((ret != NULL), "Could not create Java confidence array!");
- env->SetIntArrayRegion(ret, 0, len, confs);
- delete[] confs;
- return ret;
- }
- jboolean Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetVariable(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jstring var,
- jstring value) {
- native_data_t *nat = (native_data_t*) mNativeData;
- const char *c_var = env->GetStringUTFChars(var, NULL);
- const char *c_value = env->GetStringUTFChars(value, NULL);
- jboolean set = nat->api.SetVariable(c_var, c_value) ? JNI_TRUE : JNI_FALSE;
- env->ReleaseStringUTFChars(var, c_var);
- env->ReleaseStringUTFChars(value, c_value);
- return set;
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeClear(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- nat->api.Clear();
- // Call between pages or documents etc to free up memory and forget adaptive data.
- nat->api.ClearAdaptiveClassifier();
- // Since Tesseract doesn't take ownership of the memory, we keep a pointer in the native
- // code struct. We need to free that pointer when we release our instance of Tesseract or
- // attempt to set a new image using one of the nativeSet* methods.
- if (nat->data != NULL)
- free(nat->data);
- else if (nat->pix != NULL)
- pixDestroy(&nat->pix);
- nat->data = NULL;
- nat->pix = NULL;
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeEnd(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- nat->api.End();
- // Since Tesseract doesn't take ownership of the memory, we keep a pointer in the native
- // code struct. We need to free that pointer when we release our instance of Tesseract or
- // attempt to set a new image using one of the nativeSet* methods.
- if (nat->data != NULL)
- free(nat->data);
- else if (nat->pix != NULL)
- pixDestroy(&nat->pix);
- nat->data = NULL;
- nat->pix = NULL;
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetDebug(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jboolean debug) {
- native_data_t *nat = (native_data_t*) mNativeData;
- nat->debug = (debug == JNI_TRUE) ? TRUE : FALSE;
- }
- jint Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetPageSegMode(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- return nat->api.GetPageSegMode();
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetPageSegMode(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jint mode) {
- native_data_t *nat = (native_data_t*) mNativeData;
- nat->api.SetPageSegMode((tesseract::PageSegMode) mode);
- }
- jlong Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetThresholdedImage(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- PIX *pix = nat->api.GetThresholdedImage();
- return (jlong) pix;
- }
- jlong Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetRegions(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- PIXA *pixa = NULL;
- BOXA *boxa;
- boxa = nat->api.GetRegions(&pixa);
- boxaDestroy(&boxa);
- return reinterpret_cast<jlong>(pixa);
- }
- jlong Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetTextlines(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- PIXA *pixa = NULL;
- BOXA *boxa;
- boxa = nat->api.GetTextlines(&pixa, NULL);
- boxaDestroy(&boxa);
- return reinterpret_cast<jlong>(pixa);
- }
- jlong Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetStrips(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- PIXA *pixa = NULL;
- BOXA *boxa;
- boxa = nat->api.GetStrips(&pixa, NULL);
- boxaDestroy(&boxa);
- return reinterpret_cast<jlong>(pixa);
- }
- jlong Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetWords(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- PIXA *pixa = NULL;
- BOXA *boxa;
- boxa = nat->api.GetWords(&pixa);
- boxaDestroy(&boxa);
- return reinterpret_cast<jlong>(pixa);
- }
- jlong Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetConnectedComponents(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- PIXA *pixa = NULL;
- BOXA *boxa;
- boxa = nat->api.GetConnectedComponents(&pixa);
- boxaDestroy(&boxa);
- return reinterpret_cast<jlong>(pixa);
- }
- jlong Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetResultIterator(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- return (jlong) nat->api.GetIterator();
- }
- jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetHOCRText(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jint page) {
- native_data_t *nat = (native_data_t*) mNativeData;
- nat->initStateVariables(env, &thiz);
- ETEXT_DESC monitor;
- monitor.progress_callback = progressJavaCallback;
- monitor.cancel = cancelFunc;
- monitor.cancel_this = nat;
- monitor.progress_this = nat;
- char *text = nat->api.GetHOCRText(&monitor, page);
- jstring result = env->NewStringUTF(text);
- free(text);
- nat->resetStateVariables();
- return result;
- }
- jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetBoxText(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jint page) {
- native_data_t *nat = (native_data_t*) mNativeData;
- char *text = nat->api.GetBoxText(page);
- jstring result = env->NewStringUTF(text);
- free(text);
- return result;
- }
- jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetVersion(JNIEnv *env,
- jobject thiz,
- jlong mNativeData) {
- native_data_t *nat = (native_data_t*) mNativeData;
- const char *text = nat->api.Version();
- jstring result = env->NewStringUTF(text);
- return result;
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetInputName(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jstring name) {
- native_data_t *nat = (native_data_t*) mNativeData;
- const char *c_name = env->GetStringUTFChars(name, NULL);
- nat->api.SetInputName(c_name);
- env->ReleaseStringUTFChars(name, c_name);
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeSetOutputName(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jstring name) {
- native_data_t *nat = (native_data_t*) mNativeData;
- const char *c_name = env->GetStringUTFChars(name, NULL);
- nat->api.SetOutputName(c_name);
- env->ReleaseStringUTFChars(name, c_name);
- }
- void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeReadConfigFile(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jstring fileName) {
- native_data_t *nat = (native_data_t*) mNativeData;
- const char *c_file_name = env->GetStringUTFChars(fileName, NULL);
- nat->api.ReadConfigFile(c_file_name);
- env->ReleaseStringUTFChars(fileName, c_file_name);
- }
- jlong Java_com_googlecode_tesseract_android_TessPdfRenderer_nativeCreate(JNIEnv *env,
- jobject thiz,
- jlong jTessBaseApi,
- jstring outputPath) {
- native_data_t *nat = (native_data_t*) jTessBaseApi;
- const char *c_output_path = env->GetStringUTFChars(outputPath, NULL);
- tesseract::TessPDFRenderer* result = new tesseract::TessPDFRenderer(c_output_path, nat->api.GetDatapath());
- env->ReleaseStringUTFChars(outputPath, c_output_path);
- return (jlong) result;
- }
- void Java_com_googlecode_tesseract_android_TessPdfRenderer_nativeRecycle(JNIEnv *env,
- jobject thiz,
- jlong jPointer) {
- tesseract::TessPDFRenderer* renderer = (tesseract::TessPDFRenderer*) jPointer;
- delete renderer;
- }
- jboolean Java_com_googlecode_tesseract_android_TessBaseAPI_nativeBeginDocument(JNIEnv *env,
- jobject thiz,
- jlong jRenderer,
- jstring title) {
- const char *c_title = env->GetStringUTFChars(title, NULL);
- tesseract::TessPDFRenderer* pdfRenderer = (tesseract::TessPDFRenderer*) jRenderer;
- jboolean res = JNI_TRUE;
- if (pdfRenderer->BeginDocument(c_title)) {
- res = JNI_FALSE;
- }
- env->ReleaseStringUTFChars(title, c_title);
- return res;
- }
- jboolean Java_com_googlecode_tesseract_android_TessBaseAPI_nativeEndDocument(JNIEnv *env,
- jobject thiz,
- jlong jRenderer) {
- tesseract::TessPDFRenderer* pdfRenderer = (tesseract::TessPDFRenderer*) jRenderer;
- return pdfRenderer->EndDocument();
- }
- jboolean Java_com_googlecode_tesseract_android_TessBaseAPI_nativeAddPageToDocument(JNIEnv *env,
- jobject thiz,
- jlong mNativeData,
- jlong jPix,
- jstring jPath,
- jlong jRenderer) {
- tesseract::TessPDFRenderer* pdfRenderer = (tesseract::TessPDFRenderer*) jRenderer;
- native_data_t *nat = (native_data_t*) mNativeData;
- PIX* pix = (PIX*) jPix;
- const char *inputImage = env->GetStringUTFChars(jPath, NULL);
- nat->api.ProcessPage(pix, 0, inputImage, NULL, 0, pdfRenderer);
- env->ReleaseStringUTFChars(jPath, inputImage);
- return true;
- }
- #ifdef __cplusplus
- }
- #endif
|