/*
 * Decompiled with CFR 0.152.
 */
package org.nuxeo.ecm.platform.semanticentities.extraction;

import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.scheme.SocketFactory;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpParams;
import org.nuxeo.common.utils.StringUtils;
import org.nuxeo.ecm.automation.core.annotations.Context;
import org.nuxeo.ecm.automation.core.annotations.Operation;
import org.nuxeo.ecm.automation.core.annotations.OperationMethod;
import org.nuxeo.ecm.automation.core.annotations.Param;
import org.nuxeo.ecm.core.api.Blob;
import org.nuxeo.ecm.core.api.ClientException;
import org.nuxeo.ecm.core.api.CoreSession;
import org.nuxeo.ecm.core.api.DocumentModel;
import org.nuxeo.ecm.core.api.DocumentModelList;
import org.nuxeo.ecm.core.api.DocumentRef;
import org.nuxeo.ecm.core.api.DocumentRefList;
import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
import org.nuxeo.ecm.core.api.impl.DocumentModelListImpl;
import org.nuxeo.ecm.core.api.impl.blob.StreamingBlob;
import org.nuxeo.ecm.core.api.model.PropertyException;
import org.nuxeo.ecm.core.api.pathsegment.PathSegmentService;
import org.nuxeo.ecm.core.convert.api.ConversionService;
import org.nuxeo.ecm.core.schema.SchemaManager;
import org.nuxeo.ecm.core.utils.BlobsExtractor;
import org.nuxeo.ecm.platform.semanticentities.EntitySuggestion;
import org.nuxeo.ecm.platform.semanticentities.LocalEntityService;
import org.nuxeo.ecm.platform.semanticentities.adapter.OccurrenceGroup;
import org.nuxeo.ecm.platform.semanticentities.adapter.OccurrenceInfo;
import org.nuxeo.runtime.api.Framework;

@Operation(id="Document.ExtractSemanticEntitiesOccurrences", category="Document", label="Extract occurrences", description="Extract the text and launch an use a semantic engine to extract and link occurrences of semantic entities. Returns back the analyzed document.")
public class OccurrenceExtractionOperation {
    private static final Log log = LogFactory.getLog(OccurrenceExtractionOperation.class);
    public static final String ID = "Document.ExtractSemanticEntitiesOccurrences";
    private static final String ANY2TEXT = "any2text";
    protected static final String DEFAULT_ENGINE_URL = "https://stanbol.demo.nuxeo.com/engines";
    protected static final String ENGINE_URL_PROPERTY = "org.nuxeo.ecm.platform.semanticentities.stanbolUrl";
    protected static final String DEFAULT_SPARQL_QUERY = "SELECT ?label ?type ?context ";
    protected static final String DEFAULT_SOURCE_NAME = "dbpedia";
    protected static final String DEFAULT_ENGINE_OUTPUT_FORMAT = "application/rdf+xml";
    protected ConversionService conversionService = (ConversionService)Framework.getService(ConversionService.class);
    protected HttpClient httpClient;
    protected static final Map<String, String> localTypes = new HashMap<String, String>();
    @Context
    protected CoreSession session;
    @Param(name="engineURL", required=true, values={"https://stanbol.demo.nuxeo.com/engines"})
    protected String engineURL = null;
    @Param(name="sparqlQuery", required=true, values={"SELECT ?label ?type ?context "})
    protected String sparqlQuery = "SELECT ?label ?type ?context ";
    @Param(name="sourceName", required=true, values={"dbpedia"})
    protected String sourceName = "dbpedia";
    @Param(name="engineOutputFormat", required=true, values={"application/rdf+xml"})
    protected String outputFormat = "application/rdf+xml";
    @Param(name="linkToUnrecognizedEntities", required=true, values={"true"})
    protected boolean linkToUnrecognizedEntities = true;
    @Param(name="linkToAmbiguousEntities", required=true, values={"true"})
    protected boolean linkToAmbiguousEntities = true;

    public OccurrenceExtractionOperation() throws Exception {
        this.initHttpClient();
    }

    public OccurrenceExtractionOperation(CoreSession session) throws Exception {
        this();
        this.session = session;
    }

    protected void initHttpClient() {
        SchemeRegistry schemeRegistry = new SchemeRegistry();
        schemeRegistry.register(new Scheme("http", (SocketFactory)PlainSocketFactory.getSocketFactory(), 80));
        schemeRegistry.register(new Scheme("https", (SocketFactory)SSLSocketFactory.getSocketFactory(), 443));
        BasicHttpParams params = new BasicHttpParams();
        ThreadSafeClientConnManager cm = new ThreadSafeClientConnManager((HttpParams)params, schemeRegistry);
        this.httpClient = new DefaultHttpClient((ClientConnectionManager)cm, (HttpParams)params);
    }

    @OperationMethod
    public DocumentRef run(DocumentRef docRef) throws Exception {
        DocumentModel doc = this.session.getDocument(docRef);
        doc = this.run(doc);
        return doc.getRef();
    }

    @OperationMethod
    public DocumentModel run(DocumentModel doc) throws Exception {
        SchemaManager schemaManager = (SchemaManager)Framework.getService(SchemaManager.class);
        if (schemaManager.getDocumentTypeNamesExtending("Entity").contains(doc.getType()) || schemaManager.getDocumentTypeNamesExtending("Occurrence").contains(doc.getType())) {
            return doc;
        }
        String textContent = this.extractText(doc);
        String output = this.callSemanticEngine(textContent, this.outputFormat);
        Model model = ModelFactory.createDefaultModel().read((Reader)new StringReader(output), null);
        List<OccurrenceGroup> groups = this.findStanbolEntityOccurrences(model);
        if (groups.isEmpty()) {
            return doc;
        }
        LocalEntityService leService = (LocalEntityService)Framework.getService(LocalEntityService.class);
        DocumentModel entityContainer = leService.getEntityContainer(this.session);
        for (OccurrenceGroup group : groups) {
            List suggestions = leService.suggestEntity(this.session, group.name, group.type, 3);
            if (suggestions.isEmpty() && this.linkToUnrecognizedEntities) {
                PathSegmentService pathService = (PathSegmentService)Framework.getService(PathSegmentService.class);
                DocumentModel localEntity = this.session.createDocumentModel(group.type);
                localEntity.setPropertyValue("dc:title", (Serializable)((Object)group.name));
                String pathSegment = pathService.generatePathSegment(localEntity);
                localEntity.setPathInfo(entityContainer.getPathAsString(), pathSegment);
                localEntity = this.session.createDocument(localEntity);
                this.session.save();
                leService.addOccurrences(this.session, doc.getRef(), localEntity.getRef(), group.occurrences);
                continue;
            }
            if (suggestions.size() > 1 && !this.linkToAmbiguousEntities) continue;
            EntitySuggestion bestGuess = (EntitySuggestion)suggestions.get(0);
            leService.addOccurrences(this.session, doc.getRef(), bestGuess, group.occurrences);
        }
        return doc;
    }

    public List<OccurrenceGroup> findStanbolEntityOccurrences(Model model) {
        Property type = model.getProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
        Property entityType = model.getProperty("http://purl.org/dc/terms/type");
        Property dcRelation = model.getProperty("http://purl.org/dc/terms/relation");
        Resource textAnnotationType = model.getResource("http://fise.iks-project.eu/ontology/TextAnnotation");
        ResIterator it = model.listSubjectsWithProperty(type, (RDFNode)textAnnotationType);
        ArrayList<OccurrenceGroup> groups = new ArrayList<OccurrenceGroup>();
        while (it.hasNext()) {
            OccurrenceInfo occInfo;
            Resource typeResouce;
            String localType;
            Statement typeStmt;
            Resource annotation = it.nextResource();
            if (model.listObjectsOfProperty(annotation, dcRelation).hasNext() || (typeStmt = annotation.getProperty(entityType)) == null || !typeStmt.getObject().isURIResource() || (localType = localTypes.get((typeResouce = (Resource)typeStmt.getObject().as(Resource.class)).getURI())) == null || (occInfo = this.getOccurrenceInfo(model, annotation)) == null) continue;
            OccurrenceGroup group = new OccurrenceGroup(occInfo.mention, localType);
            group.occurrences.add(occInfo);
            ResIterator it2 = model.listSubjectsWithProperty(dcRelation, (RDFNode)annotation);
            while (it2.hasNext()) {
                OccurrenceInfo subMention = this.getOccurrenceInfo(model, it2.nextResource());
                if (subMention == null) continue;
                group.occurrences.add(subMention);
            }
            groups.add(group);
        }
        return groups;
    }

    protected OccurrenceInfo getOccurrenceInfo(Model model, Resource annotation) {
        Property mentionProp = model.getProperty("http://fise.iks-project.eu/ontology/selected-text");
        Statement mentionStmt = annotation.getProperty(mentionProp);
        if (mentionStmt == null || !mentionStmt.getObject().isLiteral()) {
            return null;
        }
        Literal mentionLiteral = (Literal)mentionStmt.getObject().as(Literal.class);
        Property contextProp = model.getProperty("http://fise.iks-project.eu/ontology/selection-context");
        Statement contextStmt = annotation.getProperty(contextProp);
        if (contextStmt == null || !contextStmt.getObject().isLiteral()) {
            return null;
        }
        Literal contextLiteral = (Literal)contextStmt.getObject().as(Literal.class);
        String mention = mentionLiteral.getString().trim();
        String context = contextLiteral.getString().trim();
        if (!context.contains(mention) || context.length() > 500) {
            context = mention;
        }
        return new OccurrenceInfo(mention, context);
    }

    @OperationMethod
    public DocumentModelList run(DocumentModelList docs) throws Exception {
        DocumentModelListImpl result = new DocumentModelListImpl((int)docs.totalSize());
        for (DocumentModel doc : docs) {
            result.add((Object)this.run(doc));
        }
        return result;
    }

    @OperationMethod
    public DocumentModelList run(DocumentRefList docRefs) throws Exception {
        DocumentModelListImpl result = new DocumentModelListImpl((int)docRefs.totalSize());
        for (DocumentRef docRef : docRefs) {
            result.add((Object)this.session.getDocument(this.run(docRef)));
        }
        return result;
    }

    protected String callSemanticEngine(String textContent, String outputFormat) throws ClientProtocolException, IOException {
        String effectiveEngineUrl = this.engineURL;
        if (effectiveEngineUrl == null && (effectiveEngineUrl = Framework.getProperty((String)ENGINE_URL_PROPERTY, (String)DEFAULT_ENGINE_URL)).trim().isEmpty()) {
            effectiveEngineUrl = DEFAULT_ENGINE_URL;
        }
        HttpPost post = new HttpPost(effectiveEngineUrl);
        try {
            post.setHeader("Accept", outputFormat);
            post.setHeader("Content-Type", "text/plain");
            post.setEntity((HttpEntity)new ByteArrayEntity(textContent.getBytes("utf-8")));
            HttpResponse response = this.httpClient.execute((HttpUriRequest)post);
            InputStream content = response.getEntity().getContent();
            String body = IOUtils.toString((InputStream)content);
            content.close();
            if (response.getStatusLine().getStatusCode() == 200) {
                return body;
            }
            String errorMsg = response.getStatusLine().toString();
            log.error((Object)(errorMsg + ":\n" + body));
            throw new IOException(errorMsg);
        }
        catch (ClientProtocolException e) {
            post.abort();
            throw e;
        }
        catch (IOException e) {
            post.abort();
            throw e;
        }
    }

    protected String extractText(DocumentModel doc) throws ClientException {
        StringBuilder sb = new StringBuilder();
        sb.append(doc.getTitle());
        sb.append("\n\n");
        Serializable description = doc.getPropertyValue("dc:description");
        if (description != null) {
            sb.append(description);
            sb.append("\n\n");
        }
        try {
            String noteContent = (String)((Object)doc.getPropertyValue("note:note"));
            StreamingBlob blob = StreamingBlob.createFromString((String)noteContent);
            blob.setMimeType("text/html");
            SimpleBlobHolder bh = new SimpleBlobHolder((Blob)blob);
            BlobHolder converted = this.conversionService.convert(ANY2TEXT, (BlobHolder)bh, null);
            sb.append(converted.getBlob().getString());
            sb.append("\n\n");
        }
        catch (PropertyException pe) {
        }
        catch (IOException e) {
            throw new ClientException((Throwable)e);
        }
        BlobsExtractor extractor = new BlobsExtractor();
        sb.append(this.blobsToText(extractor.getBlobs(doc)));
        return sb.toString();
    }

    protected String blobsToText(List<Blob> blobs) {
        LinkedList<String> strings = new LinkedList<String>();
        for (Blob blob : blobs) {
            try {
                SimpleBlobHolder bh = new SimpleBlobHolder(blob);
                BlobHolder result = this.conversionService.convert(ANY2TEXT, (BlobHolder)bh, null);
                if (result == null || (blob = result.getBlob()) == null) continue;
                String string = new String(blob.getByteArray(), "UTF-8");
                if (string.indexOf(0) >= 0) {
                    string = string.replace("\u0000", " ");
                }
                strings.add(string);
            }
            catch (Exception e) {
                log.error((Object)e.getMessage(), (Throwable)e);
            }
        }
        return StringUtils.join(strings, (String)"\n\n");
    }

    static {
        localTypes.put("http://dbpedia.org/ontology/Place", "Place");
        localTypes.put("http://dbpedia.org/ontology/Person", "Person");
        localTypes.put("http://dbpedia.org/ontology/Organisation", "Organization");
    }
}

