/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.es.nuvo.crawler.web.archive;

import com.ibm.es.nuvo.crawler.util.archive.ArchiveEntry;
import com.ibm.es.nuvo.crawler.util.archive.ArchiveEntryFilter;
import com.ibm.es.nuvo.crawler.util.archive.ArchiveFile;
import com.ibm.es.nuvo.crawler.util.archive.ArchiveFileDescriptor;
import com.ibm.es.nuvo.crawler.util.archive.ArchiveFileRegistory;
import com.ibm.es.nuvo.crawler.util.archive.FilteredArchiveFile;
import com.ibm.es.nuvo.crawler.util.hash.Hash32;
import com.ibm.es.nuvo.crawler.util.hash.Hash64;
import com.ibm.es.nuvo.crawler.util.hash.HashableDataBufferOutputStream;
import com.ibm.es.nuvo.crawler.web.archive.ArchiveHandleException;
import com.ibm.es.nuvo.crawler.web.db.CrawlRec;
import com.ibm.es.nuvo.crawler.web.db.tables.ArchiveEntryTable;
import com.ibm.es.nuvo.crawler.web.error.WCException;
import com.ibm.es.nuvo.crawler.web.net.CURL;
import com.ibm.es.nuvo.crawler.web.rule.WebSpace;
import com.ibm.es.nuvo.crawler.web.util.StreamUtils;
import com.ibm.es.nuvo.logging.ExtendedLogger;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.logging.Level;

public class ArchiveEntryProcessor
implements ArchiveEntryFilter {
    private static final String copyright = "IBM Confidential OCO Source Materials 5724-R21 \u00a9 Copyright IBM Corp.  2006, 2007.   All Rights Reserved. The source code for this program is not published or otherwise divested of its trade secrets, irrespective of what has been deposited with the U.S. Copyright Office.";
    private static final ExtendedLogger tracer = ExtendedLogger.getLogger("NuvoTracer." + ArchiveEntryProcessor.class.getName());
    private CURL parentUrl;
    private CrawlRec parent;
    private String mimeType;
    private String encoding;
    private ArchiveFile archiveFile;
    private WebSpace spcae;
    private ArchiveEntryTable manager;
    private Map<Hash64, CrawlRec> oldEntry;

    public ArchiveEntryProcessor(CURL url, CrawlRec cr, WebSpace space, String encoding, ArchiveEntryTable mgr) throws WCException {
        if (url == null || cr == null) {
            return;
        }
        this.parentUrl = url;
        this.spcae = space;
        this.mimeType = cr.getContentTypeLC();
        this.parent = cr;
        this.manager = mgr;
        this.encoding = encoding != null ? encoding : "utf-8";
        ArchiveFileDescriptor descriptor = ArchiveEntryProcessor.getDescriptor(url, this.mimeType);
        this.archiveFile = null;
        if (descriptor != null) {
            InputStream zippedStream = null;
            try {
                if (cr.getContent() != null) {
                    zippedStream = cr.getContent().getInputStream();
                    this.archiveFile = descriptor.createArchiveFile(url.getPath(), this.mimeType, zippedStream);
                    this.archiveFile = new FilteredArchiveFile(this.archiveFile);
                    ((FilteredArchiveFile)this.archiveFile).addFilter(this);
                }
                this.oldEntry = cr.getOldFingerprint() != 0 ? this.manager.getEntries(this.parentUrl) : null;
                if (this.oldEntry == null) {
                    this.oldEntry = new HashMap<Hash64, CrawlRec>();
                }
            }
            catch (Exception e) {
                try {
                    if (this.archiveFile != null) {
                        this.archiveFile.close();
                    }
                }
                catch (IOException ignored) {
                    // empty catch block
                }
                StreamUtils.safeClose(zippedStream);
                this.archiveFile = null;
                throw new ArchiveHandleException("Falied to create the entry processor for archive support", (Throwable)e);
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private CrawlRec process() throws IOException, SQLException {
        block15: {
            if (this.archiveFile != null) {
                HashableDataBufferOutputStream content;
                CURL url;
                ArchiveEntry entry;
                CrawlRec rec;
                while (true) {
                    rec = null;
                    entry = this.archiveFile.getNextEntry(this.encoding);
                    if (entry == null) {
                        try {
                            this.archiveFile.close();
                        }
                        catch (IOException ignored) {
                            // empty catch block
                        }
                        this.archiveFile = null;
                        break block15;
                    }
                    if (this.isExclusive(entry.getName())) continue;
                    url = this.getEntryURL(entry.getName());
                    Hash64 hash = url.getURLHash();
                    rec = this.oldEntry.get(hash);
                    if (rec == null) {
                        rec = new CrawlRec();
                    } else {
                        this.oldEntry.remove(hash);
                    }
                    content = this.readStream();
                    if (content == null) continue;
                    InputStream is = null;
                    int fingerPrint = 0;
                    try {
                        is = content.getInputStream();
                        fingerPrint = new Hash32(is).intValue();
                        rec.setNewFingerprint(fingerPrint);
                    }
                    catch (Exception e) {
                        tracer.log(Level.FINER, "Calcurating hash", e);
                        continue;
                    }
                    finally {
                        StreamUtils.safeClose(is);
                        continue;
                    }
                    if (rec.getOldFingerprint() != fingerPrint || this.parent.getOldFingerprint() == 0) break;
                }
                rec.setURL(url);
                rec.setContent(content);
                int time = (int)(entry.getTime() / 1000L);
                rec.setCreationDate(time);
                rec.setLastModifiedDate(time);
                rec.setNewHTTPStatus(200);
                rec.setBucket(this.parent.getBucket());
                rec.setCompressed(false);
                rec.setCrawlDate(this.parent.getCrawlDate());
                rec.setEncoding(this.encoding);
                rec.setContentLanguage(this.parent.getContentLanguage());
                this.addEntry(url, rec);
                return rec;
            }
        }
        if (this.oldEntry.size() > 0) {
            Iterator<Hash64> iterator = this.oldEntry.keySet().iterator();
            Hash64 key = null;
            if (iterator.hasNext()) {
                key = iterator.next();
            }
            iterator = null;
            if (key != null) {
                CrawlRec rec = this.oldEntry.get(key);
                rec.setBucket(this.parent.getBucket());
                rec.setBucket(null);
                rec.setNewHTTPStatus(404);
                rec.setOldHTTPStatus(200);
                rec.setContent(null);
                this.addEntry(rec.getURL(), rec);
                this.oldEntry.remove(key);
                return rec;
            }
        }
        return null;
    }

    private void addEntry(CURL url, CrawlRec rec) throws SQLException {
        this.manager.put(url, rec, this.parent.getURL());
    }

    public CrawlRec next() {
        try {
            return this.process();
        }
        catch (Exception e) {
            if (this.archiveFile != null) {
                try {
                    this.archiveFile.close();
                }
                catch (IOException iOException) {
                    // empty catch block
                }
                this.archiveFile = null;
            }
            tracer.log(Level.FINE, "next entry", e);
            return null;
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private HashableDataBufferOutputStream readStream() throws IOException {
        InputStream inputStream = this.archiveFile.getInputStream();
        byte[] buffer = new byte[32768];
        HashableDataBufferOutputStream data = new HashableDataBufferOutputStream();
        int size = 0;
        try {
            int l;
            int maxPageSize = this.parent.getConfig().getMaxPageLength();
            while ((l = inputStream.read(buffer)) > 0) {
                data.write(buffer, 0, l);
                if ((size += l) <= maxPageSize) continue;
                if (tracer.isLoggable(Level.FINE)) {
                    tracer.log(Level.FINE, "truncated entry " + this.parentUrl);
                }
                data.dispose();
                data = null;
                HashableDataBufferOutputStream hashableDataBufferOutputStream = null;
                return hashableDataBufferOutputStream;
            }
        }
        finally {
            if (data != null) {
                data.close();
            }
        }
        if (size == 0) {
            return null;
        }
        return data;
    }

    public static boolean isArchive(CURL url, String mimeType) {
        return ArchiveEntryProcessor.getDescriptor(url, mimeType) != null;
    }

    private static ArchiveFileDescriptor getDescriptor(CURL url, String mimeType) {
        ArchiveFileRegistory fileRegistory = new ArchiveFileRegistory();
        return fileRegistory.getArchiveFileDescriptor(url.toString(), mimeType);
    }

    private static ArchiveFileDescriptor getDescriptor(String name) {
        ArchiveFileRegistory fileRegistory = new ArchiveFileRegistory();
        return fileRegistory.getArchiveFileDescriptor(name, "");
    }

    public boolean accept(ArchiveEntry entry) {
        String name = entry.getName();
        if (ArchiveEntryProcessor.getDescriptor(name) != null) {
            return false;
        }
        CURL url = new CURL("http://dummy/" + name);
        return this.spcae == null ? true : !this.spcae.isExcluded(url);
    }

    private CURL getEntryURL(String name) {
        StringBuilder buffer = new StringBuilder();
        buffer.append(this.parentUrl.toString());
        if (this.parentUrl.toString().indexOf("?") < 0) {
            buffer.append("?");
        } else {
            buffer.append("&");
        }
        buffer.append("ArchiveEntry=");
        try {
            buffer.append(URLEncoder.encode(name, "utf-8"));
        }
        catch (UnsupportedEncodingException e) {
            buffer.append(name);
        }
        CURL url = new CURL(buffer.toString());
        url.downstreamString();
        return url;
    }

    private boolean isExclusive(String filename) {
        boolean rc = false;
        if (filename == null) {
            return true;
        }
        String filenameLC = filename.toLowerCase();
        if (filenameLC.endsWith(".tar")) {
            rc = true;
        } else if (filenameLC.endsWith(".tar.gz") || filename.endsWith(".tgz")) {
            rc = true;
        } else if (filenameLC.endsWith(".zip")) {
            rc = true;
        }
        return rc;
    }
}

