/*
 * Decompiled with CFR 0.152.
 */
package org.nuxeo.labs.aws.textract;

import com.amazonaws.services.textract.model.DetectDocumentTextResult;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.nuxeo.ecm.automation.core.annotations.Context;
import org.nuxeo.ecm.automation.core.annotations.Operation;
import org.nuxeo.ecm.automation.core.annotations.OperationMethod;
import org.nuxeo.ecm.automation.core.annotations.Param;
import org.nuxeo.ecm.automation.core.util.BlobList;
import org.nuxeo.ecm.core.api.Blob;
import org.nuxeo.ecm.core.api.CoreSession;
import org.nuxeo.ecm.core.api.DocumentModel;
import org.nuxeo.labs.aws.textract.TextractService;
import org.nuxeo.labs.aws.textract.TextractUtils;

@Operation(id="Textract.DetectDocumentText", category="Document", label="Textract.DocumentText", description="Call the DocumentText synchone Textract API. (see limitation in this case). Use the blob at blobXPath (default file:content), and save the result in the resultXPath field. If savbeDocument is true (false by default) the input document is saved. Granularity is WORD, LINE. If returnRawJson is true, granularity is ignored and the operation saves the JSON String as returned by Textract. You can get this string and JSON.Parse() it? WORD and LINE set the values to a String, with a linefeed as separator. It does not return duplicates. WARNING; This is POC and using AWS synchrnous calls => it analyses only the first page. Pull requests welcome.")
public class DetectDocumentTextOp {
    public static final String ID = "Textract.DetectDocumentText";
    @Context
    protected CoreSession session;
    @Param(name="blobXPath", required=false)
    protected String blobXPath = "file:content";
    @Param(name="resultXPath", required=true)
    protected String resultXPath;
    @Param(name="granularity", widget="Option", values={"WORD", "LINE"}, required=false)
    protected String granularity = "WORD";
    @Param(name="returnRawJson", required=false)
    protected Boolean returnRawJson = false;
    @Param(name="saveDocument", required=false)
    protected Boolean saveDocument = false;
    @Param(name="bucket", required=false, description="Used when unit testing, mainly")
    protected String bucket = null;
    @Param(name="bucketPrefix", required=false, description="Used when unit testing, mainly")
    protected String bucketPrefix = null;
    @Param(name="region", required=false, description="Used when unit testing, mainly")
    protected String region = null;

    @OperationMethod
    public DocumentModel run(DocumentModel doc) {
        Blob blob = (Blob)doc.getPropertyValue(this.blobXPath);
        int pages = 1;
        BlobList blobList = TextractUtils.splitPDFIfMoreThanOnePage(blob);
        if (blobList != null) {
            pages = blobList.size();
        }
        TextractService service = null;
        service = StringUtils.isNoneBlank((CharSequence[])new CharSequence[]{this.bucket, this.bucketPrefix, this.region}) ? TextractService.getInstance(this.bucket, this.bucketPrefix, this.region) : TextractService.getInstance();
        Object result = null;
        TextractUtils.Granularity correctGranularity = TextractUtils.Granularity.valueOf(this.granularity);
        if (pages == 1) {
            result = this.returnRawJson.booleanValue() ? service.detectDocumentTextGetRawResultJsonString(blob) : service.detectDocumentTextGetText(correctGranularity, blob);
        } else if (this.returnRawJson.booleanValue()) {
            JSONArray finalJson = new JSONArray();
            for (Blob oneBlob : blobList) {
                DetectDocumentTextResult analyzeResult = service.detectDocumentText(oneBlob);
                JSONObject obj = new JSONObject((Object)analyzeResult);
                finalJson.put((Object)obj);
            }
            result = finalJson.toString();
            TextractUtils.deleteFilesSilently(blobList);
            blobList = null;
        } else {
            result = "";
            for (Blob oneBlob : blobList) {
                DetectDocumentTextResult analyzeResult = service.detectDocumentText(oneBlob);
                String onePageResult = TextractUtils.getAllTextJoined(() -> ((DetectDocumentTextResult)analyzeResult).getBlocks(), correctGranularity, "\n");
                result = (String)result + "/n" + onePageResult;
            }
            result = TextractUtils.removeDuplicates((String)result, "\n");
        }
        doc.setPropertyValue(this.resultXPath, (Serializable)result);
        if (this.saveDocument.booleanValue()) {
            doc = this.session.saveDocument(doc);
        }
        return doc;
    }
}

