/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.simdvec.internal.vectorization;

import java.io.IOException;
import java.lang.foreign.MemorySegment;
import java.nio.ByteOrder;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.VectorUtil;
import org.elasticsearch.simdvec.internal.vectorization.MemorySegmentESNextOSQVectorsScorer;
import org.elasticsearch.simdvec.internal.vectorization.PanamaESVectorUtilSupport;

final class MSBitToInt4ESNextOSQVectorsScorer
extends MemorySegmentESNextOSQVectorsScorer.MemorySegmentScorer {
    MSBitToInt4ESNextOSQVectorsScorer(IndexInput in, int dimensions, int dataLength, MemorySegment memorySegment) {
        super(in, dimensions, dataLength, memorySegment);
    }

    @Override
    public long quantizeScore(byte[] q) throws IOException {
        assert (q.length == this.length * 4);
        if (this.length >= 16 && PanamaESVectorUtilSupport.HAS_FAST_INTEGER_VECTORS) {
            if (PanamaESVectorUtilSupport.VECTOR_BITSIZE >= 256) {
                return this.quantizeScore256(q);
            }
            if (PanamaESVectorUtilSupport.VECTOR_BITSIZE == 128) {
                return this.quantizeScore128(q);
            }
        }
        return Long.MIN_VALUE;
    }

    private long quantizeScore256(byte[] q) throws IOException {
        LongVector vd;
        LongVector vq3;
        LongVector vq2;
        LongVector vq1;
        LongVector vq0;
        long subRet0 = 0L;
        long subRet1 = 0L;
        long subRet2 = 0L;
        long subRet3 = 0L;
        int i = 0;
        long offset = this.in.getFilePointer();
        if (this.length >= ByteVector.SPECIES_256.vectorByteSize() * 2) {
            int limit = ByteVector.SPECIES_256.loopBound(this.length);
            LongVector sum0 = LongVector.zero((VectorSpecies)LONG_SPECIES_256);
            LongVector sum1 = LongVector.zero((VectorSpecies)LONG_SPECIES_256);
            LongVector sum2 = LongVector.zero((VectorSpecies)LONG_SPECIES_256);
            LongVector sum3 = LongVector.zero((VectorSpecies)LONG_SPECIES_256);
            while (i < limit) {
                vq0 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_256, (byte[])q, (int)i).reinterpretAsLongs();
                vq1 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_256, (byte[])q, (int)(i + this.length)).reinterpretAsLongs();
                vq2 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_256, (byte[])q, (int)(i + this.length * 2)).reinterpretAsLongs();
                vq3 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_256, (byte[])q, (int)(i + this.length * 3)).reinterpretAsLongs();
                vd = LongVector.fromMemorySegment((VectorSpecies)LONG_SPECIES_256, (MemorySegment)this.memorySegment, (long)offset, (ByteOrder)ByteOrder.LITTLE_ENDIAN);
                sum0 = sum0.add((Vector)vq0.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum1 = sum1.add((Vector)vq1.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum2 = sum2.add((Vector)vq2.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum3 = sum3.add((Vector)vq3.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                i += ByteVector.SPECIES_256.length();
                offset += (long)LONG_SPECIES_256.vectorByteSize();
            }
            subRet0 += sum0.reduceLanes(VectorOperators.ADD);
            subRet1 += sum1.reduceLanes(VectorOperators.ADD);
            subRet2 += sum2.reduceLanes(VectorOperators.ADD);
            subRet3 += sum3.reduceLanes(VectorOperators.ADD);
        }
        if (this.length - i >= ByteVector.SPECIES_128.vectorByteSize()) {
            LongVector sum0 = LongVector.zero((VectorSpecies)LONG_SPECIES_128);
            LongVector sum1 = LongVector.zero((VectorSpecies)LONG_SPECIES_128);
            LongVector sum2 = LongVector.zero((VectorSpecies)LONG_SPECIES_128);
            LongVector sum3 = LongVector.zero((VectorSpecies)LONG_SPECIES_128);
            int limit = ByteVector.SPECIES_128.loopBound(this.length);
            while (i < limit) {
                vq0 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)i).reinterpretAsLongs();
                vq1 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length)).reinterpretAsLongs();
                vq2 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length * 2)).reinterpretAsLongs();
                vq3 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length * 3)).reinterpretAsLongs();
                vd = LongVector.fromMemorySegment((VectorSpecies)LONG_SPECIES_128, (MemorySegment)this.memorySegment, (long)offset, (ByteOrder)ByteOrder.LITTLE_ENDIAN);
                sum0 = sum0.add((Vector)vq0.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum1 = sum1.add((Vector)vq1.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum2 = sum2.add((Vector)vq2.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                sum3 = sum3.add((Vector)vq3.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                i += ByteVector.SPECIES_128.length();
                offset += (long)LONG_SPECIES_128.vectorByteSize();
            }
            subRet0 += sum0.reduceLanes(VectorOperators.ADD);
            subRet1 += sum1.reduceLanes(VectorOperators.ADD);
            subRet2 += sum2.reduceLanes(VectorOperators.ADD);
            subRet3 += sum3.reduceLanes(VectorOperators.ADD);
        }
        this.in.seek(offset);
        int upperBound = this.length & 0xFFFFFFF8;
        while (i < upperBound) {
            long value = this.in.readLong();
            subRet0 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i) & value);
            subRet1 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + this.length) & value);
            subRet2 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + 2 * this.length) & value);
            subRet3 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + 3 * this.length) & value);
            i += 8;
        }
        upperBound = this.length & 0xFFFFFFFC;
        while (i < upperBound) {
            int value = this.in.readInt();
            subRet0 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i) & value);
            subRet1 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + this.length) & value);
            subRet2 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + 2 * this.length) & value);
            subRet3 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + 3 * this.length) & value);
            i += 4;
        }
        while (i < this.length) {
            int dValue = this.in.readByte() & 0xFF;
            subRet0 += (long)Integer.bitCount(q[i] & dValue & 0xFF);
            subRet1 += (long)Integer.bitCount(q[i + this.length] & dValue & 0xFF);
            subRet2 += (long)Integer.bitCount(q[i + 2 * this.length] & dValue & 0xFF);
            subRet3 += (long)Integer.bitCount(q[i + 3 * this.length] & dValue & 0xFF);
            ++i;
        }
        return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
    }

    private long quantizeScore128(byte[] q) throws IOException {
        long subRet0 = 0L;
        long subRet1 = 0L;
        long subRet2 = 0L;
        long subRet3 = 0L;
        int i = 0;
        long offset = this.in.getFilePointer();
        IntVector sum0 = IntVector.zero((VectorSpecies)INT_SPECIES_128);
        IntVector sum1 = IntVector.zero((VectorSpecies)INT_SPECIES_128);
        IntVector sum2 = IntVector.zero((VectorSpecies)INT_SPECIES_128);
        IntVector sum3 = IntVector.zero((VectorSpecies)INT_SPECIES_128);
        int limit = ByteVector.SPECIES_128.loopBound(this.length);
        while (i < limit) {
            IntVector vd = IntVector.fromMemorySegment((VectorSpecies)INT_SPECIES_128, (MemorySegment)this.memorySegment, (long)offset, (ByteOrder)ByteOrder.LITTLE_ENDIAN);
            IntVector vq0 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)i).reinterpretAsInts();
            IntVector vq1 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length)).reinterpretAsInts();
            IntVector vq2 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length * 2)).reinterpretAsInts();
            IntVector vq3 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length * 3)).reinterpretAsInts();
            sum0 = sum0.add((Vector)vd.and((Vector)vq0).lanewise(VectorOperators.BIT_COUNT));
            sum1 = sum1.add((Vector)vd.and((Vector)vq1).lanewise(VectorOperators.BIT_COUNT));
            sum2 = sum2.add((Vector)vd.and((Vector)vq2).lanewise(VectorOperators.BIT_COUNT));
            sum3 = sum3.add((Vector)vd.and((Vector)vq3).lanewise(VectorOperators.BIT_COUNT));
            i += ByteVector.SPECIES_128.length();
            offset += (long)INT_SPECIES_128.vectorByteSize();
        }
        subRet0 += (long)sum0.reduceLanes(VectorOperators.ADD);
        subRet1 += (long)sum1.reduceLanes(VectorOperators.ADD);
        subRet2 += (long)sum2.reduceLanes(VectorOperators.ADD);
        subRet3 += (long)sum3.reduceLanes(VectorOperators.ADD);
        this.in.seek(offset);
        int upperBound = this.length & 0xFFFFFFF8;
        while (i < upperBound) {
            long value = this.in.readLong();
            subRet0 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i) & value);
            subRet1 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + this.length) & value);
            subRet2 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + 2 * this.length) & value);
            subRet3 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + 3 * this.length) & value);
            i += 8;
        }
        upperBound = this.length & 0xFFFFFFFC;
        while (i < upperBound) {
            int value = this.in.readInt();
            subRet0 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i) & value);
            subRet1 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + this.length) & value);
            subRet2 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + 2 * this.length) & value);
            subRet3 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + 3 * this.length) & value);
            i += 4;
        }
        while (i < this.length) {
            int dValue = this.in.readByte() & 0xFF;
            subRet0 += (long)Integer.bitCount(q[i] & dValue & 0xFF);
            subRet1 += (long)Integer.bitCount(q[i + this.length] & dValue & 0xFF);
            subRet2 += (long)Integer.bitCount(q[i + 2 * this.length] & dValue & 0xFF);
            subRet3 += (long)Integer.bitCount(q[i + 3 * this.length] & dValue & 0xFF);
            ++i;
        }
        return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
    }

    @Override
    public boolean quantizeScoreBulk(byte[] q, int count, float[] scores) throws IOException {
        assert (q.length == this.length * 4);
        if (this.length >= 16 && PanamaESVectorUtilSupport.HAS_FAST_INTEGER_VECTORS) {
            if (PanamaESVectorUtilSupport.VECTOR_BITSIZE >= 256) {
                this.quantizeScore256Bulk(q, count, scores);
                return true;
            }
            if (PanamaESVectorUtilSupport.VECTOR_BITSIZE == 128) {
                this.quantizeScore128Bulk(q, count, scores);
                return true;
            }
        }
        return false;
    }

    private void quantizeScore128Bulk(byte[] q, int count, float[] scores) throws IOException {
        for (int iter = 0; iter < count; ++iter) {
            long subRet0 = 0L;
            long subRet1 = 0L;
            long subRet2 = 0L;
            long subRet3 = 0L;
            int i = 0;
            long offset = this.in.getFilePointer();
            IntVector sum0 = IntVector.zero((VectorSpecies)INT_SPECIES_128);
            IntVector sum1 = IntVector.zero((VectorSpecies)INT_SPECIES_128);
            IntVector sum2 = IntVector.zero((VectorSpecies)INT_SPECIES_128);
            IntVector sum3 = IntVector.zero((VectorSpecies)INT_SPECIES_128);
            int limit = ByteVector.SPECIES_128.loopBound(this.length);
            while (i < limit) {
                IntVector vd = IntVector.fromMemorySegment((VectorSpecies)INT_SPECIES_128, (MemorySegment)this.memorySegment, (long)offset, (ByteOrder)ByteOrder.LITTLE_ENDIAN);
                IntVector vq0 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)i).reinterpretAsInts();
                IntVector vq1 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length)).reinterpretAsInts();
                IntVector vq2 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length * 2)).reinterpretAsInts();
                IntVector vq3 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length * 3)).reinterpretAsInts();
                sum0 = sum0.add((Vector)vd.and((Vector)vq0).lanewise(VectorOperators.BIT_COUNT));
                sum1 = sum1.add((Vector)vd.and((Vector)vq1).lanewise(VectorOperators.BIT_COUNT));
                sum2 = sum2.add((Vector)vd.and((Vector)vq2).lanewise(VectorOperators.BIT_COUNT));
                sum3 = sum3.add((Vector)vd.and((Vector)vq3).lanewise(VectorOperators.BIT_COUNT));
                i += ByteVector.SPECIES_128.length();
                offset += (long)INT_SPECIES_128.vectorByteSize();
            }
            subRet0 += (long)sum0.reduceLanes(VectorOperators.ADD);
            subRet1 += (long)sum1.reduceLanes(VectorOperators.ADD);
            subRet2 += (long)sum2.reduceLanes(VectorOperators.ADD);
            subRet3 += (long)sum3.reduceLanes(VectorOperators.ADD);
            this.in.seek(offset);
            int upperBound = this.length & 0xFFFFFFF8;
            while (i < upperBound) {
                long value = this.in.readLong();
                subRet0 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i) & value);
                subRet1 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + this.length) & value);
                subRet2 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + 2 * this.length) & value);
                subRet3 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + 3 * this.length) & value);
                i += 8;
            }
            upperBound = this.length & 0xFFFFFFFC;
            while (i < upperBound) {
                int value = this.in.readInt();
                subRet0 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i) & value);
                subRet1 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + this.length) & value);
                subRet2 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + 2 * this.length) & value);
                subRet3 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + 3 * this.length) & value);
                i += 4;
            }
            while (i < this.length) {
                int dValue = this.in.readByte() & 0xFF;
                subRet0 += (long)Integer.bitCount(q[i] & dValue & 0xFF);
                subRet1 += (long)Integer.bitCount(q[i + this.length] & dValue & 0xFF);
                subRet2 += (long)Integer.bitCount(q[i + 2 * this.length] & dValue & 0xFF);
                subRet3 += (long)Integer.bitCount(q[i + 3 * this.length] & dValue & 0xFF);
                ++i;
            }
            scores[iter] = subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
        }
    }

    private void quantizeScore256Bulk(byte[] q, int count, float[] scores) throws IOException {
        for (int iter = 0; iter < count; ++iter) {
            LongVector vd;
            LongVector vq3;
            LongVector vq2;
            LongVector vq1;
            LongVector vq0;
            long subRet0 = 0L;
            long subRet1 = 0L;
            long subRet2 = 0L;
            long subRet3 = 0L;
            int i = 0;
            long offset = this.in.getFilePointer();
            if (this.length >= ByteVector.SPECIES_256.vectorByteSize() * 2) {
                int limit = ByteVector.SPECIES_256.loopBound(this.length);
                LongVector sum0 = LongVector.zero((VectorSpecies)LONG_SPECIES_256);
                LongVector sum1 = LongVector.zero((VectorSpecies)LONG_SPECIES_256);
                LongVector sum2 = LongVector.zero((VectorSpecies)LONG_SPECIES_256);
                LongVector sum3 = LongVector.zero((VectorSpecies)LONG_SPECIES_256);
                while (i < limit) {
                    vq0 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_256, (byte[])q, (int)i).reinterpretAsLongs();
                    vq1 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_256, (byte[])q, (int)(i + this.length)).reinterpretAsLongs();
                    vq2 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_256, (byte[])q, (int)(i + this.length * 2)).reinterpretAsLongs();
                    vq3 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_256, (byte[])q, (int)(i + this.length * 3)).reinterpretAsLongs();
                    vd = LongVector.fromMemorySegment((VectorSpecies)LONG_SPECIES_256, (MemorySegment)this.memorySegment, (long)offset, (ByteOrder)ByteOrder.LITTLE_ENDIAN);
                    sum0 = sum0.add((Vector)vq0.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                    sum1 = sum1.add((Vector)vq1.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                    sum2 = sum2.add((Vector)vq2.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                    sum3 = sum3.add((Vector)vq3.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                    i += ByteVector.SPECIES_256.length();
                    offset += (long)LONG_SPECIES_256.vectorByteSize();
                }
                subRet0 += sum0.reduceLanes(VectorOperators.ADD);
                subRet1 += sum1.reduceLanes(VectorOperators.ADD);
                subRet2 += sum2.reduceLanes(VectorOperators.ADD);
                subRet3 += sum3.reduceLanes(VectorOperators.ADD);
            }
            if (this.length - i >= ByteVector.SPECIES_128.vectorByteSize()) {
                LongVector sum0 = LongVector.zero((VectorSpecies)LONG_SPECIES_128);
                LongVector sum1 = LongVector.zero((VectorSpecies)LONG_SPECIES_128);
                LongVector sum2 = LongVector.zero((VectorSpecies)LONG_SPECIES_128);
                LongVector sum3 = LongVector.zero((VectorSpecies)LONG_SPECIES_128);
                int limit = ByteVector.SPECIES_128.loopBound(this.length);
                while (i < limit) {
                    vq0 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)i).reinterpretAsLongs();
                    vq1 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length)).reinterpretAsLongs();
                    vq2 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length * 2)).reinterpretAsLongs();
                    vq3 = ByteVector.fromArray((VectorSpecies)BYTE_SPECIES_128, (byte[])q, (int)(i + this.length * 3)).reinterpretAsLongs();
                    vd = LongVector.fromMemorySegment((VectorSpecies)LONG_SPECIES_128, (MemorySegment)this.memorySegment, (long)offset, (ByteOrder)ByteOrder.LITTLE_ENDIAN);
                    sum0 = sum0.add((Vector)vq0.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                    sum1 = sum1.add((Vector)vq1.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                    sum2 = sum2.add((Vector)vq2.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                    sum3 = sum3.add((Vector)vq3.and((Vector)vd).lanewise(VectorOperators.BIT_COUNT));
                    i += ByteVector.SPECIES_128.length();
                    offset += (long)LONG_SPECIES_128.vectorByteSize();
                }
                subRet0 += sum0.reduceLanes(VectorOperators.ADD);
                subRet1 += sum1.reduceLanes(VectorOperators.ADD);
                subRet2 += sum2.reduceLanes(VectorOperators.ADD);
                subRet3 += sum3.reduceLanes(VectorOperators.ADD);
            }
            this.in.seek(offset);
            int upperBound = this.length & 0xFFFFFFF8;
            while (i < upperBound) {
                long value = this.in.readLong();
                subRet0 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i) & value);
                subRet1 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + this.length) & value);
                subRet2 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + 2 * this.length) & value);
                subRet3 += (long)Long.bitCount(BitUtil.VH_LE_LONG.get(q, i + 3 * this.length) & value);
                i += 8;
            }
            upperBound = this.length & 0xFFFFFFFC;
            while (i < upperBound) {
                int value = this.in.readInt();
                subRet0 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i) & value);
                subRet1 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + this.length) & value);
                subRet2 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + 2 * this.length) & value);
                subRet3 += (long)Integer.bitCount(BitUtil.VH_LE_INT.get(q, i + 3 * this.length) & value);
                i += 4;
            }
            while (i < this.length) {
                int dValue = this.in.readByte() & 0xFF;
                subRet0 += (long)Integer.bitCount(q[i] & dValue & 0xFF);
                subRet1 += (long)Integer.bitCount(q[i + this.length] & dValue & 0xFF);
                subRet2 += (long)Integer.bitCount(q[i + 2 * this.length] & dValue & 0xFF);
                subRet3 += (long)Integer.bitCount(q[i + 3 * this.length] & dValue & 0xFF);
                ++i;
            }
            scores[iter] = subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
        }
    }

    @Override
    public float scoreBulk(byte[] q, float queryLowerInterval, float queryUpperInterval, int queryComponentSum, float queryAdditionalCorrection, VectorSimilarityFunction similarityFunction, float centroidDp, float[] scores) throws IOException {
        assert (q.length == this.length * 4);
        if (this.length >= 16 && PanamaESVectorUtilSupport.HAS_FAST_INTEGER_VECTORS) {
            if (PanamaESVectorUtilSupport.VECTOR_BITSIZE >= 256) {
                return this.score256Bulk(q, queryLowerInterval, queryUpperInterval, queryComponentSum, queryAdditionalCorrection, similarityFunction, centroidDp, scores);
            }
            if (PanamaESVectorUtilSupport.VECTOR_BITSIZE == 128) {
                return this.score128Bulk(q, queryLowerInterval, queryUpperInterval, queryComponentSum, queryAdditionalCorrection, similarityFunction, centroidDp, scores);
            }
        }
        return Float.NEGATIVE_INFINITY;
    }

    private float score128Bulk(byte[] q, float queryLowerInterval, float queryUpperInterval, int queryComponentSum, float queryAdditionalCorrection, VectorSimilarityFunction similarityFunction, float centroidDp, float[] scores) throws IOException {
        this.quantizeScore128Bulk(q, 16, scores);
        int limit = FLOAT_SPECIES_128.loopBound(16);
        long offset = this.in.getFilePointer();
        float ay = queryLowerInterval;
        float ly = (queryUpperInterval - ay) * 0.06666667f;
        float y1 = queryComponentSum;
        float maxScore = Float.NEGATIVE_INFINITY;
        for (int i = 0; i < limit; i += FLOAT_SPECIES_128.length()) {
            FloatVector ax = FloatVector.fromMemorySegment((VectorSpecies)FLOAT_SPECIES_128, (MemorySegment)this.memorySegment, (long)(offset + (long)(i * 4)), (ByteOrder)ByteOrder.LITTLE_ENDIAN);
            FloatVector lx = FloatVector.fromMemorySegment((VectorSpecies)FLOAT_SPECIES_128, (MemorySegment)this.memorySegment, (long)(offset + 64L + (long)(i * 4)), (ByteOrder)ByteOrder.LITTLE_ENDIAN).sub((Vector)ax);
            Vector targetComponentSums = ShortVector.fromMemorySegment((VectorSpecies)SHORT_SPECIES_128, (MemorySegment)this.memorySegment, (long)(offset + 128L + (long)(i * 2)), (ByteOrder)ByteOrder.LITTLE_ENDIAN).convert(VectorOperators.S2I, 0).reinterpretAsInts().and(65535).convert(VectorOperators.I2F, 0);
            FloatVector additionalCorrections = FloatVector.fromMemorySegment((VectorSpecies)FLOAT_SPECIES_128, (MemorySegment)this.memorySegment, (long)(offset + 160L + (long)(i * 4)), (ByteOrder)ByteOrder.LITTLE_ENDIAN);
            FloatVector qcDist = FloatVector.fromArray((VectorSpecies)FLOAT_SPECIES_128, (float[])scores, (int)i);
            FloatVector res1 = ax.mul(ay).mul((float)this.dimensions);
            FloatVector res2 = lx.mul(ay).mul(targetComponentSums);
            FloatVector res3 = ax.mul(ly).mul(y1);
            FloatVector res4 = lx.mul(ly).mul((Vector)qcDist);
            FloatVector res = res1.add((Vector)res2).add((Vector)res3).add((Vector)res4);
            if (similarityFunction == VectorSimilarityFunction.EUCLIDEAN) {
                res = res.mul(-2.0f).add((Vector)additionalCorrections).add(queryAdditionalCorrection).add(1.0f);
                res = FloatVector.broadcast((VectorSpecies)FLOAT_SPECIES_128, (long)1L).div((Vector)res).max(0.0f);
                maxScore = Math.max(maxScore, res.reduceLanes(VectorOperators.MAX));
                res.intoArray(scores, i);
                continue;
            }
            res = res.add(queryAdditionalCorrection).add((Vector)additionalCorrections).sub(centroidDp);
            if (similarityFunction == VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT) {
                res.intoArray(scores, i);
                for (int j = 0; j < FLOAT_SPECIES_128.length(); ++j) {
                    scores[i + j] = VectorUtil.scaleMaxInnerProductScore((float)scores[i + j]);
                    maxScore = Math.max(maxScore, scores[i + j]);
                }
                continue;
            }
            res = res.add(1.0f).mul(0.5f).max(0.0f);
            res.intoArray(scores, i);
            maxScore = Math.max(maxScore, res.reduceLanes(VectorOperators.MAX));
        }
        this.in.seek(offset + 224L);
        return maxScore;
    }

    private float score256Bulk(byte[] q, float queryLowerInterval, float queryUpperInterval, int queryComponentSum, float queryAdditionalCorrection, VectorSimilarityFunction similarityFunction, float centroidDp, float[] scores) throws IOException {
        this.quantizeScore256Bulk(q, 16, scores);
        int limit = FLOAT_SPECIES_256.loopBound(16);
        long offset = this.in.getFilePointer();
        float ay = queryLowerInterval;
        float ly = (queryUpperInterval - ay) * 0.06666667f;
        float y1 = queryComponentSum;
        float maxScore = Float.NEGATIVE_INFINITY;
        for (int i = 0; i < limit; i += FLOAT_SPECIES_256.length()) {
            FloatVector ax = FloatVector.fromMemorySegment((VectorSpecies)FLOAT_SPECIES_256, (MemorySegment)this.memorySegment, (long)(offset + (long)(i * 4)), (ByteOrder)ByteOrder.LITTLE_ENDIAN);
            FloatVector lx = FloatVector.fromMemorySegment((VectorSpecies)FLOAT_SPECIES_256, (MemorySegment)this.memorySegment, (long)(offset + 64L + (long)(i * 4)), (ByteOrder)ByteOrder.LITTLE_ENDIAN).sub((Vector)ax);
            Vector targetComponentSums = ShortVector.fromMemorySegment((VectorSpecies)SHORT_SPECIES_256, (MemorySegment)this.memorySegment, (long)(offset + 128L + (long)(i * 2)), (ByteOrder)ByteOrder.LITTLE_ENDIAN).convert(VectorOperators.S2I, 0).reinterpretAsInts().and(65535).convert(VectorOperators.I2F, 0);
            FloatVector additionalCorrections = FloatVector.fromMemorySegment((VectorSpecies)FLOAT_SPECIES_256, (MemorySegment)this.memorySegment, (long)(offset + 160L + (long)(i * 4)), (ByteOrder)ByteOrder.LITTLE_ENDIAN);
            FloatVector qcDist = FloatVector.fromArray((VectorSpecies)FLOAT_SPECIES_256, (float[])scores, (int)i);
            FloatVector res1 = ax.mul(ay).mul((float)this.dimensions);
            FloatVector res2 = lx.mul(ay).mul(targetComponentSums);
            FloatVector res3 = ax.mul(ly).mul(y1);
            FloatVector res4 = lx.mul(ly).mul((Vector)qcDist);
            FloatVector res = res1.add((Vector)res2).add((Vector)res3).add((Vector)res4);
            if (similarityFunction == VectorSimilarityFunction.EUCLIDEAN) {
                res = res.mul(-2.0f).add((Vector)additionalCorrections).add(queryAdditionalCorrection).add(1.0f);
                res = FloatVector.broadcast((VectorSpecies)FLOAT_SPECIES_256, (long)1L).div((Vector)res).max(0.0f);
                maxScore = Math.max(maxScore, res.reduceLanes(VectorOperators.MAX));
                res.intoArray(scores, i);
                continue;
            }
            res = res.add(queryAdditionalCorrection).add((Vector)additionalCorrections).sub(centroidDp);
            if (similarityFunction == VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT) {
                res.intoArray(scores, i);
                for (int j = 0; j < FLOAT_SPECIES_256.length(); ++j) {
                    scores[i + j] = VectorUtil.scaleMaxInnerProductScore((float)scores[i + j]);
                    maxScore = Math.max(maxScore, scores[i + j]);
                }
                continue;
            }
            res = res.add(1.0f).mul(0.5f).max(0.0f);
            maxScore = Math.max(maxScore, res.reduceLanes(VectorOperators.MAX));
            res.intoArray(scores, i);
        }
        this.in.seek(offset + 224L);
        return maxScore;
    }
}

