From 882695c15e8732afba8561337204543ceedc5ae2 Mon Sep 17 00:00:00 2001 From: Florian Stecker Date: Fri, 18 Nov 2016 20:39:19 +0100 Subject: [PATCH] Cleanup in bitvec.h and bugfix in principal ideal generation --- Makefile | 4 +- bitvec.h | 135 +++++++++++++++++++------------------------------- thickenings.c | 112 +++++++++++++++++++---------------------- 3 files changed, 106 insertions(+), 145 deletions(-) diff --git a/Makefile b/Makefile index 31883f5..47be7af 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ HEADERS=coxeter.h thickenings.h queue.h bitvec.h -#OPTIONS=-O3 -m64 -march=native -flto -funroll-loops -std=gnu99 -OPTIONS=-m64 -march=native -O0 -g -std=gnu99 +OPTIONS=-O3 -m64 -march=native -flto -funroll-loops -std=gnu99 -Winline +#OPTIONS=-m64 -march=native -O0 -g -std=gnu99 #OPTIONS=-O3 -m64 -march=native -funroll-loops -fno-inline -std=gnu99 -pg all: generate process diff --git a/bitvec.h b/bitvec.h index d4e91c6..39331f8 100644 --- a/bitvec.h +++ b/bitvec.h @@ -16,99 +16,39 @@ #ifndef __BITVEC_H__ #define __BITVEC_H__ 1 -// #define _GNU_SOURCE /* enable ffsll */ -// #include +#define _GNU_SOURCE +#include #include #include #include -/* For _mm_popcnt_u64 */ -#include - // FIRSTBITS(n) only yields useful result when 0 <= n < 64 -#define FIRSTBITS(n) ((1l << (n)) - 1l) +#define BLOCKSIZE 64 +#define FIRSTBITS(n) (((uint64_t)1 << (n)) - 1l) +#define BIT(n) (((uint64_t)1 << (n))) #define ALLBITS ((uint64_t)-1) +#define BLOCK(n) ((n)/64) +#define INDEX(n) ((n)%64) typedef struct { uint64_t v[BV_QWORD_RANK]; } bitvec_t; -/* -static inline unsigned long bv_hash(bitvec_t S) -{ - unsigned long h = 5381; - int i,j; - uint8_t x; - - for (i = 0; i < BV_QWORD_RANK*4; i++) { - x = (S.v[i/8] >> (8*(i % 8))) & 0xff; - h = ((h << 5) + h) ^ x; - } - return h; -} - -static inline int bv_equal(bitvec_t x,bitvec_t y) -{ - int i; - for (i=0; i < BV_QWORD_RANK; i++) { - if (x.v[i] != y.v[i]) { - return 0; - } - } - return 1; - // return ((x.v[0] == y.v[0]) && (x.v[1] == y.v[1])); -} - -static inline void bv_difference(bitvec_t *pile, bitvec_t toremove) -{ - int i; - for (i=0; i < BV_QWORD_RANK; i++) { - pile->v[i] &= ~toremove.v[i]; - } -} -*/ - -/* /\* static inline *\/ int old_popcount64(uint64_t x) */ -/* { */ -/* int i; */ -/* for (i = 0; x; x >>= 1) { */ -/* i += x & 1; */ -/* } */ -/* return i; */ -/* } */ - -/* -static inline int popcount64(uint64_t x) -{ - return _mm_popcnt_u64(x); -} - -static inline int bv_popcount(bitvec_t x) -{ - int i; - int accum=0; - for (i=0; iv[k/64] &= ~((uint64_t)1 << (k % 64)); + x->v[BLOCK(k)] &= ~BIT(INDEX(k)); } static inline void bv_set_bit(bitvec_t *x, int k) { - x->v[k/64] |= ((uint64_t)1 << (k % 64)); + x->v[BLOCK(k)] |= BIT(INDEX(k)); } static inline int bv_get_bit(const bitvec_t *x, int k) { - return (x->v[k/64] >> (k % 64)) & 0x1; + return (x->v[BLOCK(k)] >> INDEX(k)) & 0x1; } static inline void bv_clear(bitvec_t *x) @@ -132,8 +72,24 @@ static inline void bv_print(FILE *f, const bitvec_t *x, int len) { for(int i = 0; i < len; i++) { fputc(bv_get_bit(x, i) ? '1' : '0', f); - if(i % 64 == 63) - fputc('-',f); + // if(i % BLOCKSIZE == BLOCKSIZE - 1) + // fputc('-',f); + } +} + +static inline void bv_print_nice(FILE *f, const bitvec_t *pos, const bitvec_t *neg, int special, int len) +{ + for(int i = 0; i < len; i++) { + if(i == special) + fputc('X', f); + else if(bv_get_bit(pos, i) && !bv_get_bit(neg, i)) + fputc('1', f); + else if(!bv_get_bit(pos, i) && bv_get_bit(neg, i)) + fputc('0', f); + else if(!bv_get_bit(pos, i) && !bv_get_bit(neg, i)) + fputc(' ', f); + else + fputc('-', f); } } @@ -165,23 +121,36 @@ static inline int bv_disjoint(const bitvec_t *x, const bitvec_t *y) static inline int bv_full(const bitvec_t *x, int len) { int i; - for(i = 0; i < len/64; i++) + for(i = 0; i < BLOCK(len); i++) if(x->v[i] != ALLBITS) return 0; - return (x->v[i] & FIRSTBITS(len%64)) == FIRSTBITS(len%64); + return (x->v[i] & FIRSTBITS(INDEX(len))) == FIRSTBITS(INDEX(len)); } // set bits in range start...end (including start and excluding end) static inline void bv_set_range(bitvec_t *x, int start, int end) { - if(start/64 == end/64) - x->v[start/64] |= ~FIRSTBITS(start % 64) & FIRSTBITS(end % 64); + if(BLOCK(start) == BLOCK(end)) + x->v[BLOCK(start)] |= ~FIRSTBITS(INDEX(start)) & FIRSTBITS(INDEX(end)); else { - x->v[start/64] |= ~FIRSTBITS(start % 64); - for(int i = start/64 + 1; i < end/64; i++) + x->v[BLOCK(start)] |= ~FIRSTBITS(INDEX(start)); + for(int i = BLOCK(start) + 1; i < BLOCK(end); i++) x->v[i] = ALLBITS; - x->v[end/64] |= FIRSTBITS(end % 64); + x->v[BLOCK(end)] |= FIRSTBITS(INDEX(end)); + } +} + +// set bits in range start...end (including start and excluding end), except if they are set in mask +static inline void bv_set_range_except(bitvec_t *x, const bitvec_t *mask, int start, int end) +{ + if(BLOCK(start) == BLOCK(end)) + x->v[BLOCK(start)] |= ~FIRSTBITS(INDEX(start)) & FIRSTBITS(INDEX(end)) & ~mask->v[BLOCK(start)]; + else { + x->v[BLOCK(start)] |= ~FIRSTBITS(INDEX(start)) & ~mask->v[BLOCK(start)]; + for(int i = BLOCK(start) + 1; i < BLOCK(end); i++) + x->v[i] |= ~mask->v[i]; + x->v[BLOCK(end)] |= FIRSTBITS(INDEX(end)) & ~mask->v[BLOCK(end)]; } } @@ -190,15 +159,15 @@ static inline int bv_next_zero(const bitvec_t *x, int start) { int position; - position = ffsll(~(x->v[start/64] | FIRSTBITS(start % 64))); + position = ffsll(~(x->v[BLOCK(start)] | FIRSTBITS(INDEX(start)))); if(position) - return (start/64)*64 + position - 1; // found zero in same chunk + return BLOCK(start)*BLOCKSIZE + position - 1; // found zero in same chunk - for(int i = start/64 + 1; i < BV_QWORD_RANK; i++) { + for(int i = BLOCK(start) + 1; i < BV_QWORD_RANK; i++) { position = ffsll(~x->v[i]); if(position) // found a 0 - return i*64 + position - 1; + return i*BLOCKSIZE + position - 1; } return BV_QWORD_RANK; // found nothing diff --git a/thickenings.c b/thickenings.c index 3f043df..5d90176 100644 --- a/thickenings.c +++ b/thickenings.c @@ -567,21 +567,15 @@ void graph_free(semisimple_type_t type, node_t *graph) /*********************************** THE ACTUAL ENUMERATION ****************************************/ typedef struct { - int rank; - int order; - int size; // the size of the graph; this can vary from the order if we take quotients beforehand - const node_t *graph; - int printstep; - const char *alphabet; + int size; // the size of the weyl group. We store however only the first size/2 elements FILE *outfile; bitvec_t *principal_pos; bitvec_t *principal_neg; + int *principal_is_slim; } enumeration_info_t; /* - ok this is screwed up, let's start over: - pos and neg are bitvectors of size info.size/2 they stand for the first (shortest) info.size/2 elements of the weyl group the least siginficant bit is the identity @@ -595,8 +589,11 @@ typedef struct { // returns number of found balanced ideals // next_neg can be info.size/2; in that case, everything between known_until and info.size/2 is required to be in the ideal, but it does not mean that next_neg is really not contained in the ideal // next_neg must be strictly greater than known_until, and less or equal to info.size/2 +// we use bv_union, bv_copy, bv_set_range_except, bv_disjoint, bv_next_zero + static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos, const bitvec_t *neg, int first_unknown, int next_neg) { + static long totcount = 0; bitvec_t newpos, newneg, known; int next_next_neg; long count = 0; @@ -604,6 +601,8 @@ static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos, // the omission of next_neg means inclusion of info->size - 1 - next_neg // add its principal ideal to pos and the opposite to neg if(next_neg != info->size/2) { + // if(!info->principal_is_slim[info->size - 1 - next_neg]) // if the principal ideal we want to add is not slim by itself, we don't even have to try; but there is not really a performance benefit + // return 0; bv_union(&info->principal_pos[info->size - 1 - next_neg], pos, &newpos); bv_union(&info->principal_neg[info->size - 1 - next_neg], neg, &newneg); } else { // or, if there is no next_neg, just copy @@ -611,8 +610,8 @@ static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos, bv_copy(neg, &newneg); } - // add the range from first_unknown to next_neg to newpos - bv_set_range(&newpos, first_unknown, next_neg); // including the start, excluding end + // everything before next_neg which was unknown should be set to positive; to speed this up, we can start with first_unknown + bv_set_range_except(&newpos, neg, first_unknown, next_neg); // check if this leads to any conflicts (equivalently, violates slimness) if(!bv_disjoint(&newpos, &newneg)) @@ -621,30 +620,23 @@ static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos, // what do we know so far? bv_union(&newpos, &newneg, &known); - // do we know everything already? we have a balanced ideal then - if(bv_full(&known, info->size/2)) { + next_next_neg = bv_next_zero(&known, next_neg + 1); - fprintf(stderr, "Found balanced ideal: "); - bv_print(stderr, &newpos, info->size/2); - fprintf(stderr, " "); - bv_print(stderr, &newneg, info->size/2); - fprintf(stderr, "\n"); + if(next_next_neg >= info->size/2) { + + if((++totcount) % 100000000 == 0) { + fprintf(stderr, "Found balanced ideal: "); + bv_print(stderr, &newpos, info->size/2); + fprintf(stderr, "\n"); + } return 1; } - next_next_neg = next_neg; - while(next_next_neg < info->size/2) { - int tmp = bv_next_zero(&known, next_next_neg + 1); // this could return info->size/2, but that's fine for enumerate_tree - if(tmp <= next_next_neg) { - fprintf(stderr, "%d <= %d\n", tmp, next_next_neg); - bv_print(stderr, &known, info->size/2); - fprintf(stderr, "\n"); - exit(-1); - } - next_next_neg = tmp; + do { count += enumerate_tree(info, &newpos, &newneg, next_neg + 1, next_next_neg); - } + next_next_neg = bv_next_zero(&known, next_next_neg + 1); + } while(next_next_neg <= info->size/2); return count; } @@ -658,16 +650,15 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s int current; edgelist_t *edge; - info.rank = coxeter_rank(type); - info.order = coxeter_order(type); info.size = size; - info.graph = graph; - info.alphabet = (char*)alphabet; info.outfile = outfile; + info.principal_pos = (bitvec_t*)malloc(info.size*sizeof(bitvec_t)); + info.principal_neg = (bitvec_t*)malloc(info.size*sizeof(bitvec_t)); + info.principal_is_slim = (int*)malloc(info.size*sizeof(int)); - info.printstep = 0; - if(getenv("PRINTSTEP")) - info.printstep = atoi(getenv("PRINTSTEP")); + // info.printstep = 0; + // if(getenv("PRINTSTEP")) + // info.printstep = atoi(getenv("PRINTSTEP")); // the algorithm only works if the opposition pairing does not stabilize any element // if this happens, there can be no balanced thickenings @@ -675,39 +666,39 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s if(graph[i].opposite == i) return 0; - // we can only handle bitvectors up to 64*BV_QWORD_RANK bits + // we can only handle bitvectors up to 64*BV_QWORD_RANK bits, but we only store half of the weyl group if(info.size > 128*BV_QWORD_RANK) return -1; - // generate principal ideals, needed bitvec operations: bv_clear, bv_set_bit, bv_get_bit - bitvec_t *principal_pos = (bitvec_t*)malloc(info.size*sizeof(bitvec_t)); - bitvec_t *principal_neg = (bitvec_t*)malloc(info.size*sizeof(bitvec_t)); + // generate principal ideals + int *principal = (int*)malloc(info.size*sizeof(int)); for(int i = 0; i < info.size; i++) { - bv_clear(&principal_pos[i]); - bv_clear(&principal_neg[i]); - bv_set_bit(&principal_pos[i], i); - bv_set_bit(&principal_neg[i], info.size - 1 - i); + memset(principal, 0, info.size*sizeof(int)); + principal[i] = 1; queue_init(&queue); queue_put(&queue, i); - while((current = queue_get(&queue)) != -1) { + while((current = queue_get(&queue)) != -1) for(edge = graph[current].bruhat_lower; edge; edge = edge->next) - if(!bv_get_bit(&principal_pos[i], edge->to)) { - bv_set_bit(&principal_pos[i], edge->to); - bv_set_bit(&principal_neg[i], info.size - 1 - edge->to); + if(!principal[edge->to]) { + principal[edge->to] = 1; queue_put(&queue, edge->to); } - } + + // copy the first half into bitvectors + bv_clear(&info.principal_pos[i]); + bv_clear(&info.principal_neg[i]); + info.principal_is_slim[i] = 1; + for(int j = 0; j < info.size/2; j++) + if(principal[j]) + bv_set_bit(&info.principal_pos[i], j); + for(int j = 0; j < info.size/2; j++) + if(principal[info.size - 1 - j]) { + bv_set_bit(&info.principal_neg[i], j); + if(bv_get_bit(&info.principal_pos[i], j)) + info.principal_is_slim[i] = 0; + } } - - // truncate them, as we only need the first info.size/2 elements - for(int i = 0; i < info.size; i++) - for(int j = info.size/2; j < info.size; j++) { - bv_clear_bit(&principal_pos[i], j); - bv_clear_bit(&principal_neg[i], j); - } - - info.principal_pos = principal_pos; - info.principal_neg = principal_neg; + free(principal); // enumerate balanced ideals bitvec_t pos, neg; @@ -716,8 +707,9 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s for(int i = 0; i <= info.size/2; i++) count += enumerate_tree(&info, &pos, &neg, 0, i); - free(principal_pos); - free(principal_neg); + free(info.principal_is_slim); + free(info.principal_pos); + free(info.principal_neg); return count; }