/*
 * Decompiled with CFR 0.152.
 */
package dummy.name.utils;

import dummy.name.math.PageRank;
import dummy.name.modularity.Community;
import dummy.name.modularity.Graph;
import dummy.name.nlputils.Index;
import dummy.name.nlputils.NLPUtils;
import dummy.name.nlputils.SparseDocument;
import dummy.name.nlputils.SparseKeyphrase;
import dummy.name.stemmer.PorterStemmer;
import dummy.name.utils.PCA;
import java.awt.Color;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;

public class ReducedMemVTM {
    private static final int DEFAULT_THRESHOLD = Integer.MAX_VALUE;
    private static final int MAX_THRESHOLD = 10;
    private boolean stopVocabGrowth;
    private int threshold;
    private int totalDocumentsSize;
    private int[] communities;
    private int[] communitySizes;
    private double[][] chiSquare;
    private double[] pageRankVals;
    private int[] dateBasedOrdering;
    private Long[] documentDates;
    private Map<Integer, List<Integer>> docsToCommunities;
    private List<SparseDocument> keyphrasesToDocs;
    private List<SparseKeyphrase> docsToKeyphrases;
    private Map<String, Integer> vocabularyToIds;
    private Map<Integer, String> idsToOriginalForm;
    private Map<String, String> titlesToFiles;
    private Calculation calc;
    private Metric metric;
    private KeyphraseStrategy keyPhraseStrat;
    private PorterStemmer ps = new PorterStemmer();
    public String[] docKeyPhrases;
    public Set<String> mySet = new HashSet<String>();
    public List<String> sortedDate = new ArrayList<String>();
    private static final String[] WS_TOPICS_MAPPING = new String[]{"egzotikus nyelvek NLP-je", "MT", "NL generation", "annotation", "parsing", "discourse, speech", "transliteration", "BioNLP, medical NLP", "textual entailment, paraphraseing", "OM, SA", "MWE", "semantics", "IR", "education", "WordNet, ontologies", "graph-based methods", "morphology and phonology", "text summarization", "negation and speculation", "unsupervised", "QA", "word senses and multilinguality???", "cultural heritage, \u00e9s egy\u00e9b b\u00f6lcs\u00e9szed\u00e9sek", "software engineering", "NLP and XML", "IE", "cognitive science", "MISC"};
    private Random rand = new Random(15L);

    public ReducedMemVTM(ReducedMemVTM v) {
        this.vocabularyToIds = v.vocabularyToIds;
        this.idsToOriginalForm = v.idsToOriginalForm;
        this.keyphrasesToDocs = new ArrayList<SparseDocument>(v.getTotalDocumentSize());
        this.docsToKeyphrases = new ArrayList<SparseKeyphrase>(v.docsToKeyphrases.size());
        for (SparseKeyphrase sk : v.docsToKeyphrases) {
            this.docsToKeyphrases.add((SparseKeyphrase)sk.clone());
        }
        this.titlesToFiles = new HashMap<String, String>();
        this.calc = v.getCalculation();
        this.metric = v.getMetric();
        this.keyPhraseStrat = v.getKeyphraseStrat();
        this.threshold = v.getThreshold();
    }

    public ReducedMemVTM(InputStream ... inputLoc) {
        this(Integer.MAX_VALUE, new HashMap<String, String>(), inputLoc);
    }

    public long getFirstDate() {
        return this.documentDates[0];
    }

    public long getLastDate() {
        return this.documentDates[this.documentDates.length - 1];
    }

    public int[] getCommunities() {
        return this.communities;
    }

    public int getTotalDocumentSize() {
        return this.totalDocumentsSize;
    }

    public void setStopGrowth(boolean sg) {
        this.stopVocabGrowth = sg;
    }

    public int getThreshold() {
        return this.threshold;
    }

    public Calculation getCalculation() {
        return this.calc;
    }

    public Metric getMetric() {
        return this.metric;
    }

    public KeyphraseStrategy getKeyphraseStrat() {
        return this.keyPhraseStrat;
    }

    public double[] getPageRanks() {
        return this.pageRankVals;
    }

    public Map<Integer, String> getIdsToVocab() {
        return this.idsToOriginalForm;
    }

    public Map<Integer, List<Integer>> getDocsToCommunities() {
        return this.docsToCommunities;
    }

    public Date getDocDate(int docId) {
        int i = 0;
        while (i < this.totalDocumentsSize) {
            if (this.dateBasedOrdering[i] == docId) {
                return new Date(this.documentDates[i]);
            }
            ++i;
        }
        return null;
    }

    public void addDoc(SparseDocument doc) {
        int[] indices = doc.getIndices();
        int[] rankings = doc.getRanking();
        int i = 0;
        while (i < doc.getNonzeroElements()) {
            this.docsToKeyphrases.get(indices[i]).setFeatureValAt(this.keyphrasesToDocs.size(), rankings[i]);
            ++i;
        }
        ++this.totalDocumentsSize;
        this.keyphrasesToDocs.add(doc);
    }

    private List<Long> processKeyphraseInput(Map<String, String> notOKDocs, InputStream ... input) {
        SparseDocument sd = null;
        SimpleDateFormat formatter = new SimpleDateFormat("d MMM yyyy", Locale.ENGLISH);
        LinkedList<Long> unorderedDates = new LinkedList<Long>();
        boolean testInstance = true;
        boolean dummyDateAdded = false;
        LinkedList<Integer> fullCandidates = new LinkedList<Integer>();
        LinkedList<Integer> fullTestCandidates = new LinkedList<Integer>();
        int i = 0;
        int lastDocSize = -1;
        try {
            Throwable throwable = null;
            Object var14_15 = null;
            try (BufferedReader br = new BufferedReader(new InputStreamReader(input[0], "UTF-8"));){
                String line;
                while ((line = br.readLine()) != null) {
                    String[] parts = line.split("\t");
                    if (line.length() > 0 && parts.length >= 3 && parts.length < 7) {
                        boolean withDocSize;
                        boolean bl = withDocSize = parts.length >= 5;
                        if (sd != null && i > 0) {
                            sd.finalizeData(lastDocSize, Integer.MAX_VALUE);
                            this.keyphrasesToDocs.add(sd);
                            fullCandidates.add(sd.getNonzeroElements());
                            if (testInstance) {
                                fullTestCandidates.add(sd.getNonzeroElements());
                                boolean bl2 = testInstance = !sd.getId().equals("W05-1628");
                            }
                            if (this.keyphrasesToDocs.size() % 250 != 0) {
                                sd.getId().equals("W05-1628");
                            }
                        } else if (sd != null) {
                            unorderedDates.remove(unorderedDates.size() - 1);
                        }
                        long d = 0L;
                        try {
                            d = formatter.parse(parts[withDocSize ? 3 : 1].length() == 0 ? "1 Jan 1970" : parts[withDocSize ? 3 : 1]).getTime();
                            unorderedDates.add(d);
                        }
                        catch (ParseException e) {
                            dummyDateAdded = true;
                            unorderedDates.add(new Date().getTime());
                        }
                        String title = parts.length < 3 ? parts[0].replaceAll(".pdf_$", "") : parts[withDocSize ? 4 : 2];
                        lastDocSize = withDocSize ? Integer.parseInt(parts[1]) : 0;
                        String id = line.contains("aac") ? line.replaceAll(".*(W\\d{2}-\\d{4}).*", "$1") : "n/a";
                        sd = new SparseDocument(title, parts.length > 5 ? parts[5] : id);
                        this.titlesToFiles.put(title, parts[0].replaceAll(".pdf_$", ".pdf"));
                        i = 0;
                        continue;
                    }
                    if (parts.length < 7 || i >= 10) continue;
                    String[] stringArray = new String[]{parts[0]};
                    int n = stringArray.length;
                    int n2 = 0;
                    while (n2 < n) {
                        block31: {
                            int totalOccurrences;
                            double probability;
                            Integer featureId;
                            block32: {
                                block30: {
                                    String pp = stringArray[n2];
                                    featureId = this.vocabularyToIds.get(pp);
                                    probability = Double.parseDouble(parts[3]);
                                    String[] originalForms = parts[6].substring(1, parts[6].length() - 1).split(", ");
                                    String originalForm = null;
                                    totalOccurrences = 0;
                                    int maxOccurrence = 0;
                                    int oi = 0;
                                    while (oi < originalForms.length) {
                                        String[] originalFormParts = originalForms[oi].split("=");
                                        String of = originalFormParts[0];
                                        int occurrence = Integer.parseInt(originalFormParts[1]);
                                        totalOccurrences += occurrence;
                                        if (occurrence > maxOccurrence && !of.matches(".* (is|are|do(es)?|w(as|ere|ith)|the|of|an?|ha(ve|s)) .*")) {
                                            originalForm = of;
                                            maxOccurrence = occurrence;
                                        }
                                        ++oi;
                                    }
                                    if (featureId != null) break block30;
                                    if (this.stopVocabGrowth) break block31;
                                    featureId = this.vocabularyToIds.size();
                                    this.vocabularyToIds.put(pp, featureId);
                                    originalForm = originalForm == null ? originalForms[0].split("=")[0] : originalForm;
                                    this.idsToOriginalForm.put(featureId, originalForm);
                                    this.docsToKeyphrases.add(new SparseKeyphrase(pp));
                                    break block32;
                                }
                                if (this.stopVocabGrowth && this.docsToKeyphrases.get(featureId).getNonzeroElements() == 0) break block31;
                            }
                            this.docsToKeyphrases.get(featureId).setFeatureValAt(this.keyphrasesToDocs.size(), ++i);
                            sd.setFeatureValAt(featureId, i, Double.parseDouble(parts[1]), Double.parseDouble(parts[2]), probability, totalOccurrences);
                        }
                        ++n2;
                    }
                }
            }
            catch (Throwable throwable2) {
                if (throwable == null) {
                    throwable = throwable2;
                } else if (throwable != throwable2) {
                    throwable.addSuppressed(throwable2);
                }
                throw throwable;
            }
        }
        catch (IOException io) {
            io.printStackTrace();
        }
        if (dummyDateAdded) {
            System.err.println("Dummy date(s) had to be added unfortunatelly.");
        }
        sd.finalizeData(lastDocSize, Integer.MAX_VALUE);
        fullCandidates.add(sd.getNonzeroElements());
        this.keyphrasesToDocs.add(sd);
        return unorderedDates;
    }

    private void init(int th) {
        this.titlesToFiles = new HashMap<String, String>();
        this.setThreshold(th);
        this.keyphrasesToDocs = new ArrayList<SparseDocument>(5000);
        this.docsToKeyphrases = new ArrayList<SparseKeyphrase>(5000);
        this.vocabularyToIds = new HashMap<String, Integer>();
        this.idsToOriginalForm = new HashMap<Integer, String>();
    }

    public ReducedMemVTM(int th, Map<String, String> notOKDocs, InputStream ... inputLoc) {
        this.init(th);
        List<Long> unorderedDates = null;
        if (inputLoc.length == 1) {
            unorderedDates = this.processKeyphraseInput(notOKDocs, inputLoc);
        }
        this.totalDocumentsSize = this.keyphrasesToDocs.size();
        this.documentDates = unorderedDates.toArray(new Long[unorderedDates.size()]);
        this.dateBasedOrdering = NLPUtils.stableSort((Comparable[])this.documentDates);
        Arrays.sort((Object[])this.documentDates);
        SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd.", Locale.ENGLISH);
        int i = 0;
        while (i < this.totalDocumentsSize) {
            this.mySet.add(df.format(this.getDocDate(i)));
            ++i;
        }
        this.sortedDate = new ArrayList<String>(this.mySet);
        Collections.sort(this.sortedDate);
    }

    public void setThreshold(int th) {
        int thToSet;
        this.threshold = thToSet = Math.min(th, 10);
    }

    public void setCalculation(Calculation c) {
        this.calc = c;
    }

    public void setMetric(Metric m) {
        this.metric = m;
    }

    public void setKeyphraseStrategy(KeyphraseStrategy k) {
        this.keyPhraseStrat = k;
    }

    public long[] getDocumentDates() {
        long[] toReturn = new long[this.totalDocumentsSize];
        int i = 0;
        while (i < this.totalDocumentsSize) {
            toReturn[this.dateBasedOrdering[i]] = this.documentDates[i];
            ++i;
        }
        return toReturn;
    }

    public List<SparseDocument> getKeyphrasesToDocs() {
        return this.keyphrasesToDocs;
    }

    public int[] filterForDates(long start, long end) {
        if (start > end) {
            System.err.println("Start date should not exceed end date. Dates will be automatically swapped.");
            long swapper = start;
            start = end;
            end = swapper;
        }
        int startIndex = Arrays.binarySearch((Object[])this.documentDates, (Object)start);
        int discountedStart = 0;
        if (startIndex > 0) {
            int toSubtract = 1;
            while (toSubtract <= startIndex && this.documentDates[startIndex - toSubtract] == start) {
                ++toSubtract;
            }
            int subtract = 1;
            while (subtract <= startIndex) {
                if (this.documentDates[startIndex - subtract] != start) {
                    discountedStart = startIndex - (subtract - 1);
                    break;
                }
                ++subtract;
            }
        } else {
            discountedStart = Math.abs(startIndex) - 1;
        }
        int endIndex = Arrays.binarySearch((Object[])this.documentDates, discountedStart, this.documentDates.length, (Object)end);
        if (endIndex > 0) {
            int toAdd = 1;
            while (toAdd < this.totalDocumentsSize - endIndex && this.documentDates[endIndex + toAdd] == end) {
                ++toAdd;
            }
            endIndex += toAdd;
        } else {
            endIndex = Math.abs(endIndex) - 1;
        }
        int[] filteredDocIds = new int[endIndex - discountedStart];
        int i = discountedStart;
        while (i < endIndex) {
            filteredDocIds[i - discountedStart] = this.dateBasedOrdering[i];
            ++i;
        }
        Arrays.sort(filteredDocIds);
        return filteredDocIds;
    }

    public SparseDocument getTopKeyphrases(String[] keyphrases, int ... docIds) {
        return this.getTopKeyphrases("", keyphrases, docIds);
    }

    public SparseDocument getTopKeyphrases(String groupId, String[] keyphrases, int ... docIds) {
        Arrays.sort(docIds);
        if (this.keyPhraseStrat == null) {
            System.exit(2);
        }
        SparseDocument prototypeDoc = new SparseDocument(groupId);
        if (docIds.length == 1) {
            SparseDocument sd = this.keyphrasesToDocs.get(docIds[0]);
            int[] kpIndexes = sd.getTopKeyphraseIds(keyphrases.length);
            int kpIdIndex = 0;
            while (kpIdIndex < kpIndexes.length) {
                keyphrases[kpIdIndex] = this.idsToOriginalForm.get(kpIndexes[kpIdIndex]);
                ++kpIdIndex;
            }
        } else if (this.keyPhraseStrat == KeyphraseStrategy.CHI_SQUARE || this.keyPhraseStrat == KeyphraseStrategy.WF_IWF || this.keyPhraseStrat == KeyphraseStrategy.INFO_GAIN || this.keyPhraseStrat == KeyphraseStrategy.FREQ_BASED) {
            int totalLength = 0;
            int[] aggregatedPhraseFreqs = new int[this.docsToKeyphrases.size()];
            double[] scores = new double[this.docsToKeyphrases.size()];
            HashSet<Integer> alreadyChecked = new HashSet<Integer>();
            int[] nArray = docIds;
            int n = docIds.length;
            int n2 = 0;
            while (n2 < n) {
                int docID = nArray[n2];
                SparseDocument sd = this.keyphrasesToDocs.get(docID);
                totalLength += sd.getDocumentLength();
                int distinctPhrases = sd.getNonzeroElements();
                int[] keyphraseIndices = sd.getIndices();
                int[] keyphraseFreqs = sd.getFrequencies();
                int kpNum = 0;
                while (kpNum < distinctPhrases) {
                    int kpId;
                    int n3 = kpId = keyphraseIndices[kpNum];
                    aggregatedPhraseFreqs[n3] = aggregatedPhraseFreqs[n3] + keyphraseFreqs[kpNum];
                    if (alreadyChecked.add(kpId)) {
                        if (sd.searchRankingFor(kpId) > this.threshold) {
                            scores[kpId] = Double.NEGATIVE_INFINITY;
                        } else {
                            boolean ok = false;
                            if (this.keyPhraseStrat == KeyphraseStrategy.CHI_SQUARE || this.keyPhraseStrat == KeyphraseStrategy.INFO_GAIN) {
                                double n11 = 0.0;
                                double n01 = 0.0;
                                double n10 = 0.0;
                                double n00 = 0.0;
                                SparseKeyphrase sk = this.docsToKeyphrases.get(kpId);
                                int nonZeroElements = sk.getNonzeroElements();
                                int[] containingIds = sk.getDocIndices();
                                int nonZeroIndex = 0;
                                while (nonZeroIndex < nonZeroElements) {
                                    int index;
                                    if (docID == containingIds[nonZeroIndex]) {
                                        ok = true;
                                    }
                                    if ((index = Arrays.binarySearch(docIds, containingIds[nonZeroIndex])) < 0) {
                                        n10 += 1.0;
                                    } else {
                                        n11 += 1.0;
                                    }
                                    ++nonZeroIndex;
                                }
                                n01 = (double)docIds.length - n11;
                                n00 = (double)(this.totalDocumentsSize - docIds.length) - n10;
                                if (this.keyPhraseStrat == KeyphraseStrategy.CHI_SQUARE) {
                                    double product = Math.abs(n11 * n00 - n10 * n01);
                                    if (product > 0.0 && n11 > n10 * (double)docIds.length / (n01 + n00)) {
                                        double numerator = Math.log(this.totalDocumentsSize) + 2.0 * Math.log(product);
                                        scores[kpId] = numerator - Math.log(docIds.length) - Math.log(n11 + n10) - Math.log(n10 + n00) - Math.log(n01 + n00);
                                    } else {
                                        scores[kpId] = Double.NEGATIVE_INFINITY;
                                    }
                                } else if (this.keyPhraseStrat == KeyphraseStrategy.INFO_GAIN) {
                                    double presence = n11 + n10;
                                    double absence = (double)this.totalDocumentsSize - presence;
                                    double presenceRatio = presence / (double)this.totalDocumentsSize;
                                    double p1 = n11 / presence;
                                    double p2 = n01 / absence;
                                    scores[kpId] = n11 > n10 * (double)docIds.length / (n01 + n00) ? (p1 == 0.0 || p1 == 1.0 ? 0.0 : presenceRatio * (p1 * Math.log(p1) + (1.0 - p1) * Math.log(1.0 - p1))) + (p2 == 1.0 || p2 == 0.0 ? 0.0 : (1.0 - presenceRatio) * (p2 * Math.log(p2) + (1.0 - p2) * Math.log(1.0 - p2))) : Double.NEGATIVE_INFINITY;
                                }
                                Double.isNaN(scores[kpId]);
                            } else {
                                HashSet<Integer> presencesInCommunities = new HashSet<Integer>();
                                SparseKeyphrase sk = this.docsToKeyphrases.get(kpId);
                                int[] containingIds = sk.getDocIndices();
                                int totalPresence = 0;
                                int totalTokens = 0;
                                int nz = 0;
                                while (nz < sk.getNonzeroElements()) {
                                    if (containingIds[nz] >= this.communities.length) break;
                                    if (docID == containingIds[nz]) {
                                        ok = true;
                                    }
                                    presencesInCommunities.add(this.communities[containingIds[nz]]);
                                    int cfr_ignored_0 = this.communities[containingIds[nz]];
                                    int index = Arrays.binarySearch(docIds, containingIds[nz]);
                                    if (index >= 0) {
                                        SparseDocument docWithTerm = this.keyphrasesToDocs.get(containingIds[nz]);
                                        int prevPresence = totalPresence;
                                        totalPresence += docWithTerm.searchFrequencyFor(kpId);
                                        totalTokens += docWithTerm.getDocumentLength();
                                    }
                                    ++nz;
                                }
                                scores[kpId] = (double)totalPresence / (double)totalTokens;
                                int n4 = kpId;
                                scores[n4] = scores[n4] * (this.keyPhraseStrat == KeyphraseStrategy.WF_IWF ? Math.log(this.communitySizes.length / presencesInCommunities.size()) : 1.0);
                            }
                        }
                    }
                    ++kpNum;
                }
                ++n2;
            }
            int[] ranking = NLPUtils.stableSort(scores);
            int i = 0;
            int leftOut = 0;
            while (i < keyphrases.length && i + leftOut < ranking.length) {
                int topKpId = ranking[ranking.length - i - leftOut - 1];
                if (alreadyChecked.contains(topKpId)) {
                    keyphrases[i] = String.valueOf(this.idsToOriginalForm.get(topKpId)) + "\t" + scores[ranking[ranking.length - i - leftOut - 1]];
                    SparseKeyphrase sk = this.docsToKeyphrases.get(topKpId);
                    double idf = Math.log((double)this.totalDocumentsSize / (double)sk.getNonzeroElements());
                    double relativeFreq = (double)aggregatedPhraseFreqs[topKpId] / (double)totalLength;
                    prototypeDoc.setFeatureValAt(topKpId, i + 1, relativeFreq * idf, 0.0, relativeFreq, aggregatedPhraseFreqs[topKpId]);
                    prototypeDoc.setDocumentLength(totalLength);
                } else {
                    --i;
                    ++leftOut;
                }
                ++i;
            }
        }
        TreeMap map = new TreeMap();
        this.docKeyPhrases = keyphrases;
        return prototypeDoc;
    }

    public int[] getDocIdsWithKeyphrase(String keyphrase) {
        List<String> normalizedTokens = Arrays.asList(this.ps.stemString(keyphrase).split(" "));
        Collections.sort(normalizedTokens);
        StringBuilder sb = new StringBuilder();
        for (String token : normalizedTokens) {
            sb.append(String.valueOf(this.ps.stemString(token)) + ' ');
        }
        String normalizedKeyphrase = sb.toString().trim().replace(' ', '_');
        Integer normalizedKeyWordId = this.vocabularyToIds.get(normalizedKeyphrase);
        return normalizedKeyWordId == null ? new int[]{} : this.docsToKeyphrases.get(normalizedKeyWordId).getDocIndices();
    }

    private void updateClosestNodes(Index[] closestNodes, int m, int maxNeighbors, Index toInsert) {
        int i = m * maxNeighbors;
        while (toInsert != null && i < (m + 1) * maxNeighbors) {
            if (closestNodes[i] == null || toInsert.getDistance() > closestNodes[i].getDistance()) {
                Index swap = closestNodes[i];
                closestNodes[i] = toInsert;
                toInsert = swap;
                if (toInsert == null) break;
            }
            ++i;
        }
    }

    private double[] getDocValuesToCountWith(SparseDocument sd) {
        if (this.metric == Metric.TFIDF || this.metric == Metric.OCCURRENCES) {
            return sd.getTfIdfs();
        }
        if (this.metric == Metric.FIRST_OCCURRENCE) {
            return sd.getFirstPositions();
        }
        if (this.metric == Metric.PROBABILITY) {
            return sd.getProbabilities();
        }
        System.err.println("Improper metric set.");
        return null;
    }

    public void calculateOverlaps() {
        this.calculateOverlaps(3, false, new int[0]);
    }

    public Index[] calculateOverlaps(int maxNeighbors, boolean calculatePageRank, int ... filteredDocIds) {
        int docsToDealWith = filteredDocIds.length == 0 ? this.totalDocumentsSize : filteredDocIds.length;
        Index[] closestNodes = new Index[docsToDealWith * maxNeighbors];
        int i = 0;
        while (i < docsToDealWith - 1) {
            SparseDocument sd1 = this.keyphrasesToDocs.get(filteredDocIds.length == 0 ? i : filteredDocIds[i]);
            int j = i + 1;
            while (j < docsToDealWith) {
                SparseDocument sd2 = this.keyphrasesToDocs.get(filteredDocIds.length == 0 ? j : filteredDocIds[j]);
                double docSimilarity = this.calculateSimilarity(sd1, sd2);
                this.updateClosestNodes(closestNodes, i, maxNeighbors, new Index(j, docSimilarity));
                this.updateClosestNodes(closestNodes, j, maxNeighbors, new Index(i, docSimilarity));
                ++j;
            }
            ++i;
        }
        if (calculatePageRank) {
            this.pageRankVals = PageRank.calculatePageRank(closestNodes, maxNeighbors, 0.15);
        }
        return closestNodes;
    }

    public double calculateSimilarity(SparseDocument sd1, SparseDocument sd2) {
        int intersectionSize = 0;
        int[] outerIndices = sd1.getIndices();
        int[] outerRanks = sd1.getRanking();
        double[] outerValues = this.getDocValuesToCountWith(sd1);
        int[] innerIndices = sd2.getIndices();
        int[] innerRanks = sd2.getRanking();
        double[] innerValues = this.getDocValuesToCountWith(sd2);
        double docSimilarity = 0.0;
        double iLength = 0.0;
        double oLength = 0.0;
        int ii = 0;
        int oi = 0;
        while (ii < innerIndices.length && oi < outerIndices.length && intersectionSize < this.threshold) {
            block26: {
                block27: {
                    block24: {
                        block25: {
                            if (innerIndices[ii] != outerIndices[oi] || Math.max(innerRanks[ii], outerRanks[oi]) > this.threshold) break block24;
                            Double tempSimilarity = null;
                            if (this.calc == Calculation.HARMONIC_MEAN) {
                                tempSimilarity = 2.0 * innerValues[ii] * outerValues[oi] / (innerValues[ii] + outerValues[oi]);
                            } else if (this.calc == Calculation.MAX) {
                                tempSimilarity = Math.max(innerValues[ii], outerValues[oi]);
                            } else if (this.calc == Calculation.MIN) {
                                tempSimilarity = Math.min(innerValues[ii], outerValues[oi]);
                            } else if (this.calc == Calculation.MEAN) {
                                tempSimilarity = (innerValues[ii] + outerValues[oi]) / 2.0;
                            } else if (this.calc == Calculation.PRODUCT) {
                                tempSimilarity = innerValues[ii] * outerValues[oi];
                            } else if (this.calc == Calculation.COSINE) {
                                docSimilarity += outerValues[oi] * innerValues[ii];
                                oLength += outerValues[oi] * outerValues[oi];
                                iLength += innerValues[ii] * innerValues[ii];
                            }
                            if (this.calc == Calculation.DICE || this.calc == Calculation.JACCARD || this.calc == Calculation.COSINE) break block25;
                            if (Double.isNaN(tempSimilarity)) break block26;
                            if (this.metric == Metric.TFIDF || this.metric == Metric.OCCURRENCES) {
                                docSimilarity = Math.max(docSimilarity, tempSimilarity);
                            } else if (this.metric == Metric.PROBABILITY && this.calc == Calculation.PRODUCT) {
                                docSimilarity += tempSimilarity.doubleValue();
                            } else if (tempSimilarity != 0.0) {
                                docSimilarity += tempSimilarity > 1.0 ? Math.log(tempSimilarity) : 1.0 / -Math.log(tempSimilarity);
                            }
                        }
                        ++intersectionSize;
                        break block27;
                    }
                    if (innerIndices[ii] > outerIndices[oi]) {
                        --ii;
                        oLength += (double)(outerIndices[oi] * outerIndices[oi]);
                    } else {
                        --oi;
                        iLength += (double)(innerIndices[ii] * innerIndices[ii]);
                    }
                }
                ++oi;
            }
            ++ii;
        }
        if (this.calc == Calculation.DICE) {
            docSimilarity = 2.0 * (double)intersectionSize / (double)(innerIndices.length + outerIndices.length);
        } else if (this.calc == Calculation.JACCARD) {
            docSimilarity = (double)intersectionSize / (double)(innerIndices.length + outerIndices.length - intersectionSize);
        } else if (this.calc == Calculation.COSINE && docSimilarity > 0.0) {
            docSimilarity /= Math.sqrt(iLength) * Math.sqrt(oLength);
        }
        return docSimilarity;
    }

    public Map<Integer, String[]> determineEtalonCommunities(String wsNameFile, boolean mergedWorkshops, int yearWindow, int originalSize) {
        HashMap<String, String[]> wsIdToName = new HashMap<String, String[]>();
        boolean dealWithYears = yearWindow != Integer.MAX_VALUE && yearWindow != 0;
        try {
            Throwable throwable = null;
            Object var8_12 = null;
            try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(wsNameFile)));){
                String line;
                while ((line = br.readLine()) != null && line.length() > 0) {
                    String[] parts = line.split("\t");
                    int yearBucket = -1;
                    if (dealWithYears) {
                        int deltaYear;
                        yearBucket = (deltaYear += (deltaYear = Integer.parseInt(parts[1].substring(1, 3))) > 89 ? -90 : 10) / yearWindow;
                    }
                    String wsID = parts[1].substring(1, 6);
                    if (!mergedWorkshops) {
                        wsIdToName.put(wsID, new String[]{dealWithYears ? Integer.toString(yearBucket) : parts[2], parts.length > 7 ? parts[7] : "?"});
                        continue;
                    }
                    if (!mergedWorkshops || parts.length != 8) continue;
                    int topicNumber = WS_TOPICS_MAPPING.length;
                    try {
                        topicNumber = Integer.parseInt(parts[7]);
                    }
                    catch (NumberFormatException numberFormatException) {
                        // empty catch block
                    }
                    wsIdToName.put(wsID, new String[]{String.valueOf(Integer.toString(yearBucket)) + "-" + WS_TOPICS_MAPPING[topicNumber - 1], parts.length > 7 ? parts[7] : "?"});
                }
            }
            catch (Throwable throwable2) {
                if (throwable == null) {
                    throwable = throwable2;
                } else if (throwable != throwable2) {
                    throwable.addSuppressed(throwable2);
                }
                throw throwable;
            }
        }
        catch (IOException io) {
            io.printStackTrace();
        }
        int i = originalSize;
        while (i < this.keyphrasesToDocs.size()) {
            SparseDocument sd = this.keyphrasesToDocs.get(i);
            if (sd.getTitle().contains("wiki")) {
                String wikiTopicId = Integer.toString(Integer.parseInt(sd.getTitle().replaceAll(".*W\\d{2}-(\\d{2}).*", "$1")));
                sd.setId(wikiTopicId);
            } else {
                String cfpId = sd.getTitle().replaceAll(".*W(\\d{2}-\\d{2}).*", "$1");
                String wsName = ((String[])wsIdToName.get(cfpId))[0];
                sd.setId(wsName);
            }
            ++i;
        }
        HashMap<Integer, String[]> idsToWsNames = new HashMap<Integer, String[]>();
        HashMap<String, int[]> workshop2IdAndSize = new HashMap<String, int[]>();
        this.communities = new int[originalSize];
        this.docsToCommunities = new HashMap<Integer, List<Integer>>();
        int d = 0;
        while (d < originalSize) {
            SparseDocument sd = this.keyphrasesToDocs.get(d);
            int mapSize = workshop2IdAndSize.size();
            String id = sd.getId().substring(1, 6);
            String[] values = (String[])wsIdToName.get(id);
            if ((id = values[0]) != null) {
                int[] vals = (int[])workshop2IdAndSize.get(id);
                if (vals == null) {
                    idsToWsNames.put(mapSize, new String[]{id, values[1]});
                    vals = new int[]{mapSize, 1};
                    this.docsToCommunities.put(mapSize, new ArrayList());
                } else {
                    vals[1] = vals[1] + 1;
                }
                this.docsToCommunities.get(vals[0]).add(d);
                this.communities[d] = vals[0];
                workshop2IdAndSize.put(id, vals);
            }
            ++d;
        }
        this.communitySizes = new int[workshop2IdAndSize.size()];
        for (int[] vals : workshop2IdAndSize.values()) {
            this.communitySizes[vals[0]] = vals[1];
        }
        return idsToWsNames;
    }

    public double determineCommunities(Index[] similarities, int maxNeighbors, int ... selectedDocIds) {
        HashSet<Integer> notConnectedIds = new HashSet<Integer>();
        Graph similarityGraph = new Graph(similarities, maxNeighbors, true, false, notConnectedIds);
        Community comm = new Community(similarityGraph, Integer.MAX_VALUE, new LinkedList<int[]>());
        double bestModularity = comm.findBestApproximation();
        int[] prunedCommunities = comm.getCommunity2NodeMapping();
        int docsToDealWith = selectedDocIds.length == 0 ? this.totalDocumentsSize : selectedDocIds.length;
        this.communitySizes = new int[docsToDealWith];
        HashSet<Integer> differentCommIds = new HashSet<Integer>();
        int[] nArray = prunedCommunities;
        int n = prunedCommunities.length;
        int n2 = 0;
        while (n2 < n) {
            int communityId;
            int n3 = communityId = nArray[n2];
            this.communitySizes[n3] = this.communitySizes[n3] + 1;
            differentCommIds.add(communityId);
            ++n2;
        }
        this.communitySizes = Arrays.copyOf(this.communitySizes, differentCommIds.size());
        this.communities = new int[docsToDealWith];
        this.docsToCommunities = new HashMap<Integer, List<Integer>>();
        int docId = 0;
        int docsJumpedOver = 0;
        while (docId < docsToDealWith) {
            if (notConnectedIds.contains(docId)) {
                this.communities[docId] = -1;
                ++docsJumpedOver;
            } else {
                int communityId;
                this.communities[docId] = communityId = prunedCommunities[docId - docsJumpedOver];
                ArrayList<Integer> docsOfCommunity = this.docsToCommunities.get(communityId);
                docsOfCommunity = docsOfCommunity == null ? new ArrayList<Integer>() : docsOfCommunity;
                docsOfCommunity.add(docId);
                this.docsToCommunities.put(communityId, docsOfCommunity);
            }
            ++docId;
        }
        this.setKeyphraseMetrics(this.docsToCommunities, this.totalDocumentsSize);
        return bestModularity;
    }

    public void setKeyphraseMetrics(Map<Integer, List<Integer>> docsToAnalyze, int docNum) {
        this.chiSquare = new double[this.communitySizes.length][this.docsToKeyphrases.size()];
        int kpId = 0;
        while (kpId < this.docsToKeyphrases.size()) {
            int occurrences = 0;
            int[] presenceInCommunity = new int[this.communitySizes.length];
            int[] localCommunitySizes = new int[this.communitySizes.length];
            SparseKeyphrase sk = this.docsToKeyphrases.get(kpId);
            int nonZeroElements = sk.getNonzeroElements();
            int[] docIds = sk.getDocIndices();
            for (Map.Entry<Integer, List<Integer>> docsToCommunity : docsToAnalyze.entrySet()) {
                int communitySize;
                int communityId = docsToCommunity.getKey();
                List<Integer> docIdsOfCommunity = docsToCommunity.getValue();
                localCommunitySizes[communityId] = communitySize = docIdsOfCommunity.size();
                int cfr_ignored_0 = this.communitySizes[communityId];
                int doc = 0;
                int index = 0;
                while (doc < nonZeroElements && index < communitySize) {
                    if (docIds[doc] == docIdsOfCommunity.get(index)) {
                        ++occurrences;
                        int n = communityId;
                        presenceInCommunity[n] = presenceInCommunity[n] + 1;
                    } else if (docIds[doc] > docIdsOfCommunity.get(index)) {
                        --doc;
                    } else {
                        --index;
                    }
                    ++index;
                    ++doc;
                }
            }
            int c = 0;
            while (c < this.communitySizes.length) {
                if (occurrences == 0) {
                    this.chiSquare[c][kpId] = Double.NEGATIVE_INFINITY;
                } else {
                    int communitySize = localCommunitySizes[c];
                    int presenceWithinCommunity = presenceInCommunity[c];
                    if (communitySize > 0) {
                        int n01 = communitySize - presenceWithinCommunity;
                        int n10 = occurrences - presenceWithinCommunity;
                        int n00 = docNum - n01 - n10 - presenceWithinCommunity;
                        double numerator = Math.log(docNum) + 2.0 * Math.log(Math.abs(presenceWithinCommunity * n00 - n10 * n01));
                        this.chiSquare[c][kpId] = numerator - Math.log(communitySize) - Math.log(occurrences) - Math.log(n10 + n00) - Math.log(n01 + n00);
                    }
                }
                ++c;
            }
            ++kpId;
        }
    }

    private int determineReliableReducedDim() {
        return 150;
    }

    private Map<Integer, Boolean> generateRandomVector() {
        HashMap<Integer, Boolean> randomColumnVec = new HashMap<Integer, Boolean>();
        int dim = 0;
        while (dim < this.docsToKeyphrases.size()) {
            int r = this.rand.nextInt(6);
            if (r < 2) {
                randomColumnVec.put(dim, r % 2 == 0);
            }
            ++dim;
        }
        return randomColumnVec;
    }

    private double[][] performDimReduction() {
        int reducedColumnSize = this.determineReliableReducedDim();
        double[][] randomProjection = new double[this.totalDocumentsSize][reducedColumnSize];
        int c = 0;
        while (c < reducedColumnSize) {
            Map<Integer, Boolean> randCol = this.generateRandomVector();
            int docId = 0;
            while (docId < this.totalDocumentsSize) {
                SparseDocument documentRow = this.keyphrasesToDocs.get(docId);
                int[] indices = documentRow.getIndices();
                int[] valuesForIndices = documentRow.getFrequencies();
                double dotProduct = 0.0;
                int i = 0;
                while (i < documentRow.getNonzeroElements()) {
                    Boolean multiplier = randCol.get(indices[i]);
                    if (multiplier != null) {
                        dotProduct += (double)(multiplier != false ? valuesForIndices[i] : -valuesForIndices[i]);
                    }
                    ++i;
                }
                randomProjection[docId][c] = dotProduct / Math.sqrt(3.0);
                ++docId;
            }
            ++c;
        }
        PCA pca = new PCA(randomProjection);
        int r = 0;
        while (r < randomProjection.length) {
            double[] dimReducedRow = randomProjection[r];
            randomProjection[r] = pca.reduce(dimReducedRow);
            ++r;
        }
        return randomProjection;
    }

    public Color[] determineColors() {
        double[][] reducedDocVecs = this.performDimReduction();
        Color[] documentColors = new Color[this.totalDocumentsSize];
        int r = 0;
        while (r < this.totalDocumentsSize) {
            double reducedLength = 0.0;
            int c = 0;
            while (c < 3) {
                reducedLength += reducedDocVecs[r][c] * reducedDocVecs[r][c];
                ++c;
            }
            int[] colorComponents = new int[3];
            int i = 0;
            while (i < 3) {
                colorComponents[i] = (int)(255.0 * (1.0 + reducedDocVecs[r][i] / Math.sqrt(reducedLength)) / 2.0);
                ++i;
            }
            documentColors[r] = new Color(colorComponents[0], colorComponents[1], colorComponents[2]);
            ++r;
        }
        return documentColors;
    }

    public static enum Calculation {
        COSINE,
        MAX,
        MIN,
        MEAN,
        PRODUCT,
        HARMONIC_MEAN,
        DICE,
        JACCARD;

    }

    public static enum Coloring {
        COSINE,
        MINMAX;

    }

    public static enum KeyphraseStrategy {
        INFO_GAIN,
        CHI_SQUARE,
        WF_IWF,
        FREQ_BASED,
        MODEL_BASED;

    }

    public static enum Metric {
        PROBABILITY,
        FIRST_OCCURRENCE,
        TFIDF,
        OCCURRENCES;

    }

    public static enum PCAColoring {
        CHI_SQUARE,
        INFO_GAIN,
        MUTUAL_INFO;

    }
}

