Cleanup in bitvec.h and bugfix in principal ideal generation

This commit is contained in:
Florian Stecker 2016-11-18 20:39:19 +01:00
parent ed63dc2b82
commit 882695c15e
3 changed files with 106 additions and 145 deletions

View File

@ -1,6 +1,6 @@
HEADERS=coxeter.h thickenings.h queue.h bitvec.h HEADERS=coxeter.h thickenings.h queue.h bitvec.h
#OPTIONS=-O3 -m64 -march=native -flto -funroll-loops -std=gnu99 OPTIONS=-O3 -m64 -march=native -flto -funroll-loops -std=gnu99 -Winline
OPTIONS=-m64 -march=native -O0 -g -std=gnu99 #OPTIONS=-m64 -march=native -O0 -g -std=gnu99
#OPTIONS=-O3 -m64 -march=native -funroll-loops -fno-inline -std=gnu99 -pg #OPTIONS=-O3 -m64 -march=native -funroll-loops -fno-inline -std=gnu99 -pg
all: generate process all: generate process

135
bitvec.h
View File

@ -16,99 +16,39 @@
#ifndef __BITVEC_H__ #ifndef __BITVEC_H__
#define __BITVEC_H__ 1 #define __BITVEC_H__ 1
// #define _GNU_SOURCE /* enable ffsll */ #define _GNU_SOURCE
// #include <string.h> #include <string.h>
#include <inttypes.h> #include <inttypes.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
/* For _mm_popcnt_u64 */
#include <x86intrin.h>
// FIRSTBITS(n) only yields useful result when 0 <= n < 64 // FIRSTBITS(n) only yields useful result when 0 <= n < 64
#define FIRSTBITS(n) ((1l << (n)) - 1l) #define BLOCKSIZE 64
#define FIRSTBITS(n) (((uint64_t)1 << (n)) - 1l)
#define BIT(n) (((uint64_t)1 << (n)))
#define ALLBITS ((uint64_t)-1) #define ALLBITS ((uint64_t)-1)
#define BLOCK(n) ((n)/64)
#define INDEX(n) ((n)%64)
typedef struct { typedef struct {
uint64_t v[BV_QWORD_RANK]; uint64_t v[BV_QWORD_RANK];
} bitvec_t; } bitvec_t;
/*
static inline unsigned long bv_hash(bitvec_t S)
{
unsigned long h = 5381;
int i,j;
uint8_t x;
for (i = 0; i < BV_QWORD_RANK*4; i++) {
x = (S.v[i/8] >> (8*(i % 8))) & 0xff;
h = ((h << 5) + h) ^ x;
}
return h;
}
static inline int bv_equal(bitvec_t x,bitvec_t y)
{
int i;
for (i=0; i < BV_QWORD_RANK; i++) {
if (x.v[i] != y.v[i]) {
return 0;
}
}
return 1;
// return ((x.v[0] == y.v[0]) && (x.v[1] == y.v[1]));
}
static inline void bv_difference(bitvec_t *pile, bitvec_t toremove)
{
int i;
for (i=0; i < BV_QWORD_RANK; i++) {
pile->v[i] &= ~toremove.v[i];
}
}
*/
/* /\* static inline *\/ int old_popcount64(uint64_t x) */
/* { */
/* int i; */
/* for (i = 0; x; x >>= 1) { */
/* i += x & 1; */
/* } */
/* return i; */
/* } */
/*
static inline int popcount64(uint64_t x)
{
return _mm_popcnt_u64(x);
}
static inline int bv_popcount(bitvec_t x)
{
int i;
int accum=0;
for (i=0; i<BV_QWORD_RANK; i++) {
accum += popcount64(x.v[i]);
}
return accum;
}
*/
static inline void bv_clear_bit(bitvec_t *x, int k) static inline void bv_clear_bit(bitvec_t *x, int k)
{ {
x->v[k/64] &= ~((uint64_t)1 << (k % 64)); x->v[BLOCK(k)] &= ~BIT(INDEX(k));
} }
static inline void bv_set_bit(bitvec_t *x, int k) static inline void bv_set_bit(bitvec_t *x, int k)
{ {
x->v[k/64] |= ((uint64_t)1 << (k % 64)); x->v[BLOCK(k)] |= BIT(INDEX(k));
} }
static inline int bv_get_bit(const bitvec_t *x, int k) static inline int bv_get_bit(const bitvec_t *x, int k)
{ {
return (x->v[k/64] >> (k % 64)) & 0x1; return (x->v[BLOCK(k)] >> INDEX(k)) & 0x1;
} }
static inline void bv_clear(bitvec_t *x) static inline void bv_clear(bitvec_t *x)
@ -132,8 +72,24 @@ static inline void bv_print(FILE *f, const bitvec_t *x, int len)
{ {
for(int i = 0; i < len; i++) { for(int i = 0; i < len; i++) {
fputc(bv_get_bit(x, i) ? '1' : '0', f); fputc(bv_get_bit(x, i) ? '1' : '0', f);
if(i % 64 == 63) // if(i % BLOCKSIZE == BLOCKSIZE - 1)
fputc('-',f); // fputc('-',f);
}
}
static inline void bv_print_nice(FILE *f, const bitvec_t *pos, const bitvec_t *neg, int special, int len)
{
for(int i = 0; i < len; i++) {
if(i == special)
fputc('X', f);
else if(bv_get_bit(pos, i) && !bv_get_bit(neg, i))
fputc('1', f);
else if(!bv_get_bit(pos, i) && bv_get_bit(neg, i))
fputc('0', f);
else if(!bv_get_bit(pos, i) && !bv_get_bit(neg, i))
fputc(' ', f);
else
fputc('-', f);
} }
} }
@ -165,23 +121,36 @@ static inline int bv_disjoint(const bitvec_t *x, const bitvec_t *y)
static inline int bv_full(const bitvec_t *x, int len) static inline int bv_full(const bitvec_t *x, int len)
{ {
int i; int i;
for(i = 0; i < len/64; i++) for(i = 0; i < BLOCK(len); i++)
if(x->v[i] != ALLBITS) if(x->v[i] != ALLBITS)
return 0; return 0;
return (x->v[i] & FIRSTBITS(len%64)) == FIRSTBITS(len%64); return (x->v[i] & FIRSTBITS(INDEX(len))) == FIRSTBITS(INDEX(len));
} }
// set bits in range start...end (including start and excluding end) // set bits in range start...end (including start and excluding end)
static inline void bv_set_range(bitvec_t *x, int start, int end) static inline void bv_set_range(bitvec_t *x, int start, int end)
{ {
if(start/64 == end/64) if(BLOCK(start) == BLOCK(end))
x->v[start/64] |= ~FIRSTBITS(start % 64) & FIRSTBITS(end % 64); x->v[BLOCK(start)] |= ~FIRSTBITS(INDEX(start)) & FIRSTBITS(INDEX(end));
else { else {
x->v[start/64] |= ~FIRSTBITS(start % 64); x->v[BLOCK(start)] |= ~FIRSTBITS(INDEX(start));
for(int i = start/64 + 1; i < end/64; i++) for(int i = BLOCK(start) + 1; i < BLOCK(end); i++)
x->v[i] = ALLBITS; x->v[i] = ALLBITS;
x->v[end/64] |= FIRSTBITS(end % 64); x->v[BLOCK(end)] |= FIRSTBITS(INDEX(end));
}
}
// set bits in range start...end (including start and excluding end), except if they are set in mask
static inline void bv_set_range_except(bitvec_t *x, const bitvec_t *mask, int start, int end)
{
if(BLOCK(start) == BLOCK(end))
x->v[BLOCK(start)] |= ~FIRSTBITS(INDEX(start)) & FIRSTBITS(INDEX(end)) & ~mask->v[BLOCK(start)];
else {
x->v[BLOCK(start)] |= ~FIRSTBITS(INDEX(start)) & ~mask->v[BLOCK(start)];
for(int i = BLOCK(start) + 1; i < BLOCK(end); i++)
x->v[i] |= ~mask->v[i];
x->v[BLOCK(end)] |= FIRSTBITS(INDEX(end)) & ~mask->v[BLOCK(end)];
} }
} }
@ -190,15 +159,15 @@ static inline int bv_next_zero(const bitvec_t *x, int start)
{ {
int position; int position;
position = ffsll(~(x->v[start/64] | FIRSTBITS(start % 64))); position = ffsll(~(x->v[BLOCK(start)] | FIRSTBITS(INDEX(start))));
if(position) if(position)
return (start/64)*64 + position - 1; // found zero in same chunk return BLOCK(start)*BLOCKSIZE + position - 1; // found zero in same chunk
for(int i = start/64 + 1; i < BV_QWORD_RANK; i++) { for(int i = BLOCK(start) + 1; i < BV_QWORD_RANK; i++) {
position = ffsll(~x->v[i]); position = ffsll(~x->v[i]);
if(position) // found a 0 if(position) // found a 0
return i*64 + position - 1; return i*BLOCKSIZE + position - 1;
} }
return BV_QWORD_RANK; // found nothing return BV_QWORD_RANK; // found nothing

View File

@ -567,21 +567,15 @@ void graph_free(semisimple_type_t type, node_t *graph)
/*********************************** THE ACTUAL ENUMERATION ****************************************/ /*********************************** THE ACTUAL ENUMERATION ****************************************/
typedef struct { typedef struct {
int rank; int size; // the size of the weyl group. We store however only the first size/2 elements
int order;
int size; // the size of the graph; this can vary from the order if we take quotients beforehand
const node_t *graph;
int printstep;
const char *alphabet;
FILE *outfile; FILE *outfile;
bitvec_t *principal_pos; bitvec_t *principal_pos;
bitvec_t *principal_neg; bitvec_t *principal_neg;
int *principal_is_slim;
} enumeration_info_t; } enumeration_info_t;
/* /*
ok this is screwed up, let's start over:
pos and neg are bitvectors of size info.size/2 pos and neg are bitvectors of size info.size/2
they stand for the first (shortest) info.size/2 elements of the weyl group they stand for the first (shortest) info.size/2 elements of the weyl group
the least siginficant bit is the identity the least siginficant bit is the identity
@ -595,8 +589,11 @@ typedef struct {
// returns number of found balanced ideals // returns number of found balanced ideals
// next_neg can be info.size/2; in that case, everything between known_until and info.size/2 is required to be in the ideal, but it does not mean that next_neg is really not contained in the ideal // next_neg can be info.size/2; in that case, everything between known_until and info.size/2 is required to be in the ideal, but it does not mean that next_neg is really not contained in the ideal
// next_neg must be strictly greater than known_until, and less or equal to info.size/2 // next_neg must be strictly greater than known_until, and less or equal to info.size/2
// we use bv_union, bv_copy, bv_set_range_except, bv_disjoint, bv_next_zero
static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos, const bitvec_t *neg, int first_unknown, int next_neg) static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos, const bitvec_t *neg, int first_unknown, int next_neg)
{ {
static long totcount = 0;
bitvec_t newpos, newneg, known; bitvec_t newpos, newneg, known;
int next_next_neg; int next_next_neg;
long count = 0; long count = 0;
@ -604,6 +601,8 @@ static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos,
// the omission of next_neg means inclusion of info->size - 1 - next_neg // the omission of next_neg means inclusion of info->size - 1 - next_neg
// add its principal ideal to pos and the opposite to neg // add its principal ideal to pos and the opposite to neg
if(next_neg != info->size/2) { if(next_neg != info->size/2) {
// if(!info->principal_is_slim[info->size - 1 - next_neg]) // if the principal ideal we want to add is not slim by itself, we don't even have to try; but there is not really a performance benefit
// return 0;
bv_union(&info->principal_pos[info->size - 1 - next_neg], pos, &newpos); bv_union(&info->principal_pos[info->size - 1 - next_neg], pos, &newpos);
bv_union(&info->principal_neg[info->size - 1 - next_neg], neg, &newneg); bv_union(&info->principal_neg[info->size - 1 - next_neg], neg, &newneg);
} else { // or, if there is no next_neg, just copy } else { // or, if there is no next_neg, just copy
@ -611,8 +610,8 @@ static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos,
bv_copy(neg, &newneg); bv_copy(neg, &newneg);
} }
// add the range from first_unknown to next_neg to newpos // everything before next_neg which was unknown should be set to positive; to speed this up, we can start with first_unknown
bv_set_range(&newpos, first_unknown, next_neg); // including the start, excluding end bv_set_range_except(&newpos, neg, first_unknown, next_neg);
// check if this leads to any conflicts (equivalently, violates slimness) // check if this leads to any conflicts (equivalently, violates slimness)
if(!bv_disjoint(&newpos, &newneg)) if(!bv_disjoint(&newpos, &newneg))
@ -621,30 +620,23 @@ static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos,
// what do we know so far? // what do we know so far?
bv_union(&newpos, &newneg, &known); bv_union(&newpos, &newneg, &known);
// do we know everything already? we have a balanced ideal then next_next_neg = bv_next_zero(&known, next_neg + 1);
if(bv_full(&known, info->size/2)) {
fprintf(stderr, "Found balanced ideal: "); if(next_next_neg >= info->size/2) {
bv_print(stderr, &newpos, info->size/2);
fprintf(stderr, " "); if((++totcount) % 100000000 == 0) {
bv_print(stderr, &newneg, info->size/2); fprintf(stderr, "Found balanced ideal: ");
fprintf(stderr, "\n"); bv_print(stderr, &newpos, info->size/2);
fprintf(stderr, "\n");
}
return 1; return 1;
} }
next_next_neg = next_neg; do {
while(next_next_neg < info->size/2) {
int tmp = bv_next_zero(&known, next_next_neg + 1); // this could return info->size/2, but that's fine for enumerate_tree
if(tmp <= next_next_neg) {
fprintf(stderr, "%d <= %d\n", tmp, next_next_neg);
bv_print(stderr, &known, info->size/2);
fprintf(stderr, "\n");
exit(-1);
}
next_next_neg = tmp;
count += enumerate_tree(info, &newpos, &newneg, next_neg + 1, next_next_neg); count += enumerate_tree(info, &newpos, &newneg, next_neg + 1, next_next_neg);
} next_next_neg = bv_next_zero(&known, next_next_neg + 1);
} while(next_next_neg <= info->size/2);
return count; return count;
} }
@ -658,16 +650,15 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s
int current; int current;
edgelist_t *edge; edgelist_t *edge;
info.rank = coxeter_rank(type);
info.order = coxeter_order(type);
info.size = size; info.size = size;
info.graph = graph;
info.alphabet = (char*)alphabet;
info.outfile = outfile; info.outfile = outfile;
info.principal_pos = (bitvec_t*)malloc(info.size*sizeof(bitvec_t));
info.principal_neg = (bitvec_t*)malloc(info.size*sizeof(bitvec_t));
info.principal_is_slim = (int*)malloc(info.size*sizeof(int));
info.printstep = 0; // info.printstep = 0;
if(getenv("PRINTSTEP")) // if(getenv("PRINTSTEP"))
info.printstep = atoi(getenv("PRINTSTEP")); // info.printstep = atoi(getenv("PRINTSTEP"));
// the algorithm only works if the opposition pairing does not stabilize any element // the algorithm only works if the opposition pairing does not stabilize any element
// if this happens, there can be no balanced thickenings // if this happens, there can be no balanced thickenings
@ -675,39 +666,39 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s
if(graph[i].opposite == i) if(graph[i].opposite == i)
return 0; return 0;
// we can only handle bitvectors up to 64*BV_QWORD_RANK bits // we can only handle bitvectors up to 64*BV_QWORD_RANK bits, but we only store half of the weyl group
if(info.size > 128*BV_QWORD_RANK) if(info.size > 128*BV_QWORD_RANK)
return -1; return -1;
// generate principal ideals, needed bitvec operations: bv_clear, bv_set_bit, bv_get_bit // generate principal ideals
bitvec_t *principal_pos = (bitvec_t*)malloc(info.size*sizeof(bitvec_t)); int *principal = (int*)malloc(info.size*sizeof(int));
bitvec_t *principal_neg = (bitvec_t*)malloc(info.size*sizeof(bitvec_t));
for(int i = 0; i < info.size; i++) { for(int i = 0; i < info.size; i++) {
bv_clear(&principal_pos[i]); memset(principal, 0, info.size*sizeof(int));
bv_clear(&principal_neg[i]); principal[i] = 1;
bv_set_bit(&principal_pos[i], i);
bv_set_bit(&principal_neg[i], info.size - 1 - i);
queue_init(&queue); queue_init(&queue);
queue_put(&queue, i); queue_put(&queue, i);
while((current = queue_get(&queue)) != -1) { while((current = queue_get(&queue)) != -1)
for(edge = graph[current].bruhat_lower; edge; edge = edge->next) for(edge = graph[current].bruhat_lower; edge; edge = edge->next)
if(!bv_get_bit(&principal_pos[i], edge->to)) { if(!principal[edge->to]) {
bv_set_bit(&principal_pos[i], edge->to); principal[edge->to] = 1;
bv_set_bit(&principal_neg[i], info.size - 1 - edge->to);
queue_put(&queue, edge->to); queue_put(&queue, edge->to);
} }
}
// copy the first half into bitvectors
bv_clear(&info.principal_pos[i]);
bv_clear(&info.principal_neg[i]);
info.principal_is_slim[i] = 1;
for(int j = 0; j < info.size/2; j++)
if(principal[j])
bv_set_bit(&info.principal_pos[i], j);
for(int j = 0; j < info.size/2; j++)
if(principal[info.size - 1 - j]) {
bv_set_bit(&info.principal_neg[i], j);
if(bv_get_bit(&info.principal_pos[i], j))
info.principal_is_slim[i] = 0;
}
} }
free(principal);
// truncate them, as we only need the first info.size/2 elements
for(int i = 0; i < info.size; i++)
for(int j = info.size/2; j < info.size; j++) {
bv_clear_bit(&principal_pos[i], j);
bv_clear_bit(&principal_neg[i], j);
}
info.principal_pos = principal_pos;
info.principal_neg = principal_neg;
// enumerate balanced ideals // enumerate balanced ideals
bitvec_t pos, neg; bitvec_t pos, neg;
@ -716,8 +707,9 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s
for(int i = 0; i <= info.size/2; i++) for(int i = 0; i <= info.size/2; i++)
count += enumerate_tree(&info, &pos, &neg, 0, i); count += enumerate_tree(&info, &pos, &neg, 0, i);
free(principal_pos); free(info.principal_is_slim);
free(principal_neg); free(info.principal_pos);
free(info.principal_neg);
return count; return count;
} }