// Copyright 1996-2007 by Jon Dart.  All Rights Reserved.
#ifndef _BITMAP_H
#define _BITMAP_H

#include "types.h"
#include <iostream>
#ifdef USE_INTRINSICS
#include <intrin.h>
#endif

using namespace std;

#define FAST_SHIFT_R(data,n) (n>=32) ? Bitmap(data.hivalue()>>(n)-32) : Bitmap((uint32)data.value() >> (n))

#ifdef USE_ASM
#ifdef _MSC_VER

// This finds the first bit in a 64-bit int.
// This assembly seems to be faster only
// on PIII CPUs.
//
FORCEINLINE int FirstOneAsm(uint64 a) {
#pragma warning(disable : 4035)
      __asm {
            bsf     edx, dword ptr a
            mov     eax, 0
            jnz     l1
            bsf     edx, dword ptr a+4
            mov     eax, 32
            jnz     l1
            xor     eax, eax
            mov     edx, 127
      l1:   add     eax, edx
        }
}
FORCEINLINE int BitCountAsm(uint64 a) {
  __asm {
        mov     ecx, dword ptr a
        xor     eax, eax
        test    ecx, ecx
        jz      l1
    l0: lea     edx, [ecx-1]
        inc     eax
        and     ecx, edx
        jnz     l0
    l1: mov     ecx, dword ptr a+4
        test    ecx, ecx
        jz      l3
    l2: lea     edx, [ecx-1]
        inc     eax
        and     ecx, edx
        jnz     l2
    l3: 
  }
}
#else
// Linux
extern "C" {
int FirstOneAsm(uint64);
int BitCountAsm(uint64);
}
#endif
#endif

    struct masks
    {
       uint64 mask;
       uint64 maskDa1;
       uint64 maskDa8;
       uint64 maskR90;
       uint64 not_mask;
       uint64 not_maskDa1;
       uint64 not_maskDa8;
       uint64 not_maskR90;
    };
    
extern masks all_masks[64];

class Bitmap
{
       friend ostream & operator << (ostream &o, const Bitmap &b);

#define GETBIT64(x) (uint64)(((int64)x)&-((int64)x))
#define GETBIT32(x) (uint32)(((int32)x)&-((int32)x))

    public:	

       struct ints
       {
          uint32 loval,hival;
       };
       struct shorts
       {
          uint16 lo1,lo2,hi1,hi2;
       };
       union conv
       {
          uint64 val1;
          ints val2;
          shorts val3;
       };

    Bitmap()
    : data(0)
    {
    }
    
    Bitmap(uint64 n)
    : data(n)
    {
    }

    uint64 value() const
    {
       return data;
    }
    
    void *addr() const
    {
       return (void*)&data;
    }
    
    byte byte_value() const
    {
       // PC is little-endian
       return *((byte*)&data);
    }
    
    uint32 FORCEINLINE hivalue() const
    {
       return ((conv*)(this))->val2.hival;
    }
    
    uint32 FORCEINLINE lovalue() const
    {
       return ((conv*)(this))->val2.loval;
    }
    
    void clear()
    {
       data = 0;
    }
    void set_lsb()
    {
       data &= ~(data-1);
    }
    void set(int n)
    {
#if defined(_WIN64) && defined(_MSC_VER) && defined(USE_INTRINSICS)
       _bittestandset64((LONG64*)&data,(DWORD)n);
#elif defined(_64BIT)
       data |= ((uint64)1)<<n;
#elif defined (_MSC_VER) && defined(USE_INTRINSICS)
       if (n<32)
         _bittestandset((LONG*)&data,n);
       else
         _bittestandset(((LONG*)&data)+1,n-32);
#else
       data |= all_masks[n].mask;
#endif
    }
    void clear(int n)
    {
#if defined(_WIN64) && defined(_MSC_VER) && defined(USE_INTRINSICS)
       _bittestandreset64((LONG64*)&data,(DWORD)n);
#elif defined(_64BIT)
       data &= ~(((uint64)1)<<n);
#elif defined(_WIN32) && defined(USE_INTRINSICS)
       if (n<32) _bittestandreset((LONG*)&data,n);
       else _bittestandreset(((LONG*)&data)+1,n-32);
#else
       data &= all_masks[n].not_mask;
#endif
    }
    void setR90(int n)
    {
        data |= all_masks[n].maskR90;
    }
    void clearR90(int n)
    {
        data &= all_masks[n].not_maskR90;
    }
    void setDa1(int n)
    {
        data |= all_masks[n].maskDa1;
    }
    void clearDa1(int n)
    {
        data &= all_masks[n].not_maskDa1;
    }
    void setDa8(int n)
    {
        data |= all_masks[n].maskDa8;
    }
    void clearDa8(int n)
    {
        data &= all_masks[n].not_maskDa8;
    }
    void setClear(const Bitmap &b)
    {
        data ^= b.data;
    }
    int is_set(int n) const
    {
#if defined(_WIN64) & defined(USE_INTRINSICS)
        return _bittest64((__int64*)&data,n);
#elif defined(_WIN32) & defined(USE_INTRINSICS)
        if (n<32)
          return _bittest((const LONG*)&data,n);
        else
          return _bittest(((const LONG*)&data)+1,n-32);
#elif defined(_64BIT)
        return (data & (((uint64)1)<<n)) != (uint64)0;
#else
        return ((data & mask[n]) != (uint64)0);
#endif
    }
    int is_clear()const
    {
        return (data == (uint64)0);
    }
    static Bitmap And( const Bitmap &src1, const Bitmap &src2)
    {
        return Bitmap(src1.data & src2.data);
    }
    void And( const Bitmap &b)
    {
        data &= b.data;
    }
    void andNot(const Bitmap &b)
    {
       data &= ~b.data;
    }
    static Bitmap andNot( const Bitmap &src1, const Bitmap &src2)
    {
       return Bitmap(src1.data & ~src2.data);
    }
    static Bitmap Or( const Bitmap &src1, const Bitmap &src2)
    {
       return Bitmap(src1.data | src2.data);
    }
    static Bitmap Not( const Bitmap &src) {
      return Bitmap(~src.data);
    }
    void Or( const Bitmap &b)
    {
        data |= b.data;
    }
    void shr(int n) {
        data = data >> n;
    }
    static inline Bitmap shr(const Bitmap &src, int n) 
    {
        return Bitmap(src.data >> n);    
    }
    void shr8()
    {
#if defined(_64BIT)
        data >>= 8;
#else
        char *p = (char*)&data;
        byte b = p[4];
        ((conv*)&data)->val2.hival >>= 8;
        ((conv*)&data)->val2.loval >>= 8;
        p[3] = b;
#endif
    }
    static Bitmap shl(const Bitmap &src, int n)
    {
        return Bitmap(src.data << n);
    }
    void shl(int n)
    {
        data = data << n;    
    }
    void shl_byte(int n)
    {
        // shift lowest 8 bits left by n bytes
        char *p = (char*)&data;
        p[n] = *p;
        if (n>0) *p = 0;
    }
    static inline Bitmap shl_byte(byte b, int n)
    {
        // shift lowest 8 bits left by n bytes
        uint64 new_data = 0;
        char *p = (char*)&new_data;
        p[n] = b;
        return Bitmap(new_data);
    }
    FORCEINLINE Bitmap shl_all(int n) const
    {
        // shift n bits, assuming that you will not
        // cross the 32-bit boundary
        return Bitmap(MAKELONGLONG(((conv*)&data)->val2.hival >> n,
          ((conv*)&data)->val2.loval  >> n));
    }
    void shl8()
    {
#if defined(_64BIT)
        data <<= 8;
#else
        char *p = (char*)&data;
        byte b = p[3];
        ((conv*)&data)->val2.loval <<= 8;
        ((conv*)&data)->val2.hival <<= 8;
        p[4] = b;
#endif
    }
    
    int operator == (const Bitmap &b)
    {
       return b.data == data;
    }

    int operator != (const Bitmap &b)
    {
       return b.data != data;
    }

    // cast operator
    operator uint64() const {
      return data;
    }

    // extract the bits for the nth rank (from the Black side)
    unsigned rank_bits(int n) const
    {
       byte *p = (byte*)&data;
       return (unsigned)p[n];
    }

    // return the number of bits set
    FORCEINLINE unsigned int bit_count() const
    {
#ifdef USE_ASM
	   return BitCountAsm(data);
#else
       unsigned register int n;
       uint64 tmp = data;
       for (n = 0; tmp != 0; n++, tmp &= (tmp - 1));
       return n;
#endif
    }
    
    FORCEINLINE Square first_one() const {
#if defined(_WIN64) && defined(_MSC_VER) && defined(USE_INTRINSICS)
      DWORD index;
      if (_BitScanForward64(&index,data))
        return (Square)index;
      else 
        return InvalidSquare;
#else
#ifdef USE_ASM
      return FirstOneAsm(data);
#elif defined(_64BIT)
      if (data == 0) return InvalidSquare;
      else return MagicTable64[(GETBIT64(data)*MAGIC64)>>58];
#else
      if (data == 0) return InvalidSquare;
      // use De Bruijn multiplication code from Lasse Hansen
      if (lovalue())
        return MagicTable32[(GETBIT32(lovalue())*MAGIC32)>>27];
      else
        return MagicTable32[(GETBIT32(hivalue())*MAGIC32)>>27]+32;
#endif
#endif
    }
    
    FORCEINLINE int iterate(Square &sq) {
#if defined(_WIN64) && defined(_MSC_VER) && defined(USE_INTRINSICS)
      if (!_BitScanForward64((DWORD*)&sq,data)) {
         return 0;
      }
      else {
         _bittestandreset64((LONG64*)&data,(DWORD)sq);
         return 1;
      }
#else
#ifdef USE_ASM
      sq = FirstOneAsm(data);
      return sq != (InvalidSquare);
#elif defined(_64BIT)
      if (data == 0) return 0;
      uint64 tmp = (uint64)GETBIT64(data);
      sq = MagicTable64[(tmp*MAGIC64)>>58];
      // clear bit we will return
      data &= ~tmp;
#else
      if (data == 0) return 0;
      // use De Bruijn multiplication code from Lasse Hansen
      uint32 tmp;
      if (lovalue()) {
        tmp = GETBIT32(lovalue());
        sq = MagicTable32[(tmp*MAGIC32)>>27];
        ((conv*)&data)->val2.loval &= ~tmp;
      }
      else {
        tmp = GETBIT32(hivalue());
        sq = MagicTable32[(tmp*MAGIC32)>>27]+32;
        ((conv*)&data)->val2.hival &= ~tmp;
      }
#endif
      return 1;
#endif
    }
    
    static void init();
    
    static void cleanup();

    static CACHE_ALIGN int MagicTable32[32];
#if defined(_64BIT)
    static CACHE_ALIGN int MagicTable64[64];
#endif
    static CACHE_ALIGN const uint64 mask[64];
    static CACHE_ALIGN int unrot90[64];
    static CACHE_ALIGN int R90[64];
    static CACHE_ALIGN int da1[64];
    static CACHE_ALIGN int da1mod8[64];
    static CACHE_ALIGN int da1div8[64];
    static CACHE_ALIGN int unrot_da1[64];
    static CACHE_ALIGN int da1_mask[64];
    static CACHE_ALIGN int da8[64];
    static CACHE_ALIGN int da8mod8[64];
    static CACHE_ALIGN int da8div8[64];
    static CACHE_ALIGN int unrot_da8[64];
    static CACHE_ALIGN int da8_mask[64];
    
    uint64 data;

    private:
#ifdef _64BIT
    static const uint64 MAGIC64 = 0x07EDD5E59A4E28C2;
#endif
    static const unsigned MAGIC32 = 0xe89b2be;
};

#define DIAG_UPPER_LEFT 0
#define DIAG_UPPER_RIGHT 1
#define DIAG_LOWER_LEFT 2
#define DIAG_LOWER_RIGHT 3

struct DiagInfo2
{
   int shift;
   int byte_shift;
   int bit_shift;
   int bit;
   int mask;
   Bitmap *attacks;
   Bitmap upperMask;
   Bitmap lowerMask;
};

extern CACHE_ALIGN DiagInfo2 DiagInfoA1[64];
extern CACHE_ALIGN DiagInfo2 DiagInfoA8[64];

inline int TEST_MASK(const Bitmap &b1,const Bitmap &b2) {
  return !Bitmap::And(b1,b2).is_clear();
}

inline uint64 And( const Bitmap &src1, const Bitmap &src2)
{
    return (src1.data & src2.data);
}

inline uint64 Or( const Bitmap &src1, const Bitmap &src2)
{
    return (src1.data | src2.data);
}
#endif
