From 562e9bb50acb3189e44afe6f213463d100a64bbd Mon Sep 17 00:00:00 2001 From: Florian Stecker Date: Tue, 15 Nov 2016 18:55:30 +0100 Subject: [PATCH] Including bitvectors, but slow and wrong --- Makefile | 7 +- bitvec.h | 204 ++++++++++++++++++++++++++++++++++++++++++++++++++ thickenings.c | 185 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 379 insertions(+), 17 deletions(-) create mode 100644 bitvec.h diff --git a/Makefile b/Makefile index b3eafce..d308bf4 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ -HEADERS=coxeter.h thickenings.h queue.h -OPTIONS=-O3 -m64 -march=native -flto -funroll-loops -std=gnu99 -#OPTIONS=-O0 -g -std=gnu99 +HEADERS=coxeter.h thickenings.h queue.h bitvec.h +#OPTIONS=-O3 -m64 -march=native -flto -funroll-loops -std=gnu99 +OPTIONS=-O0 -g -std=gnu99 +#OPTIONS=-O3 -m64 -march=native -funroll-loops -fno-inline -std=gnu99 -pg all: generate process diff --git a/bitvec.h b/bitvec.h new file mode 100644 index 0000000..3d4ec6e --- /dev/null +++ b/bitvec.h @@ -0,0 +1,204 @@ +/********************************************************************* + * Filename: bitvec.h + * + * Description: Bit vectors implemented as uint64_t arrays, size + * fixed at compile time (in weylconfig.h). Supports + * efficient set operations: Union, difference, count. + * Uses SSE4 64-bit popcount instruction. + * + * Author: David Dumas + * + * This program is free software distributed under the MIT license. + * See the file LICENSE for details. + ********************************************************************/ + + +#ifndef __BITVEC_H__ +#define __BITVEC_H__ 1 + +// #define _GNU_SOURCE /* enable ffsll */ +// #include + +#include + +#include +#include + +/* For _mm_popcnt_u64 */ +#include + +typedef struct { + uint64_t v[BV_QWORD_RANK]; +} bitvec_t; + +/* +static inline unsigned long bv_hash(bitvec_t S) +{ + unsigned long h = 5381; + int i,j; + uint8_t x; + + for (i = 0; i < BV_QWORD_RANK*4; i++) { + x = (S.v[i/8] >> (8*(i % 8))) & 0xff; + h = ((h << 5) + h) ^ x; + } + return h; +} + +static inline int bv_equal(bitvec_t x,bitvec_t y) +{ + int i; + for (i=0; i < BV_QWORD_RANK; i++) { + if (x.v[i] != y.v[i]) { + return 0; + } + } + return 1; + // return ((x.v[0] == y.v[0]) && (x.v[1] == y.v[1])); +} + +static inline void bv_difference(bitvec_t *pile, bitvec_t toremove) +{ + int i; + for (i=0; i < BV_QWORD_RANK; i++) { + pile->v[i] &= ~toremove.v[i]; + } +} +*/ + +/* /\* static inline *\/ int old_popcount64(uint64_t x) */ +/* { */ +/* int i; */ +/* for (i = 0; x; x >>= 1) { */ +/* i += x & 1; */ +/* } */ +/* return i; */ +/* } */ + +/* +static inline int popcount64(uint64_t x) +{ + return _mm_popcnt_u64(x); +} + +static inline int bv_popcount(bitvec_t x) +{ + int i; + int accum=0; + for (i=0; iv[k/64] &= ~((uint64_t)1 << (k % 64)); +} + +static inline void bv_set_bit(bitvec_t *x, int k) +{ + x->v[k/64] |= ((uint64_t)1 << (k % 64)); +} + +static inline int bv_get_bit(const bitvec_t *x, int k) +{ + return (x->v[k/64] >> (k % 64)) & 0x1; +} + +static inline void bv_clear(bitvec_t *x) +{ + int i; + for (i=0;iv[i] = 0; + } +} + +static inline int bv_is_zero(const bitvec_t *x) +{ + int i; + for (i=0;iv[i]) + return 0; + } + return 1; +} + +static inline void bv_print(FILE *f, const bitvec_t *x, int len) +{ + for(int i = 0; i < len; i++) { + fputc(bv_get_bit(x, i) ? '1' : '0', f); + } +} + +static inline void bv_union(const bitvec_t *x, const bitvec_t *y, bitvec_t *result) +{ + int i; + for (i=0; i < BV_QWORD_RANK; i++) { + result->v[i] = x->v[i] | y->v[i]; + } +} + +static inline void bv_intersection(const bitvec_t *x, const bitvec_t *y, bitvec_t *result) +{ + int i; + for (i=0; i < BV_QWORD_RANK; i++) { + result->v[i] = x->v[i] & y->v[i]; + } +} + +static inline int bv_disjoint(const bitvec_t *x, const bitvec_t *y) +{ + for(int i = 0; i < BV_QWORD_RANK; i++) + if(x->v[i] & y->v[i]) + return 0; + + return 1; +} + +static inline int bv_full(const bitvec_t *x, int len) +{ + int i; + for(i = 0; i < len/64; i++) + if(x->v[i] != (uint64_t)-1) + return 0; + + return (x->v[i] & ((1<<(len%64))-1)) == ((1<<(len%64))-1); +} + +// most sigificant set bit in bits 0...start-1 +static inline int bv_last_zero_before(const bitvec_t *x, int start) +{ + // naive implementation + int i; + for(i = start - 1; i >= 0; i--) + if(!bv_get_bit(x, i)) + break; + + return i; +} + +/* static inline int bv_lowest_bit_set(bitvec_t x) */ +/* { */ +/* int i=0, k; */ +/* for (i=0;i old index + ordering[i] = i; + qsort_r(ordering, order, sizeof(int), compare_wordlength, graph_unsorted); // so ordering is a map new index -> old index for(int i = 0; i < order; i++) - reverse_wordlength_order[wordlength_order[i]] = i; // reverse_wordlength_order is a map old index -> new index + reverse_ordering[ordering[i]] = i; // reverse_ordering is a map old index -> new index for(int i = 0; i < order; i++) { // we have only set left and wordlength so far, so just copy these - graph[i].wordlength = graph_unsorted[wordlength_order[i]].wordlength; + graph[i].wordlength = graph_unsorted[ordering[i]].wordlength; for(int j = 0; j < rank; j++) - graph[i].left[j] = reverse_wordlength_order[graph_unsorted[wordlength_order[i]].left[j]]; // rewrite references + graph[i].left[j] = reverse_ordering[graph_unsorted[ordering[i]].left[j]]; // rewrite references } // find words @@ -269,10 +272,37 @@ void prepare_graph(semisimple_type_t type, node_t *graph) } } + // additional sorting step to force opposite property (opposite of j is at n - j - 1) + + for(int i = 0; i < order; i++) + reverse_ordering[i] = -1; + for(int i = 0, j = 0; i < order; i++) { // i = old index, j = new index + if(reverse_ordering[i] == -1) { + reverse_ordering[i] = j; + ordering[j] = i; + reverse_ordering[graph[i].opposite] = order - j - 1; + ordering[order - j - 1] = graph[i].opposite; + j++; + } + } + memcpy(graph_unsorted, graph, order*sizeof(node_t)); + for(int i = 0; i < order; i++) { + graph[i] = graph_unsorted[ordering[i]]; + graph[i].opposite = reverse_ordering[graph[i].opposite]; + for(int j = 0; j < rank; j++) { + graph[i].left[j] = reverse_ordering[graph[i].left[j]]; + graph[i].right[j] = reverse_ordering[graph[i].right[j]]; + } + for(edge = graph[i].bruhat_lower; edge; edge = edge->next) + edge->to = reverse_ordering[edge->to]; + for(edge = graph[i].bruhat_higher; edge; edge = edge->next) + edge->to = reverse_ordering[edge->to]; + } + free(graph_data); free(graph_unsorted); - free(wordlength_order); - free(reverse_wordlength_order); + free(ordering); + free(reverse_ordering); free(seen); } @@ -544,6 +574,8 @@ typedef struct { int printstep; const char *alphabet; FILE *outfile; + bitvec_t *principal_pos; + bitvec_t *principal_neg; } enumeration_info_t; // calculate transitive closure; that is, fill current_level in every spot which must be marked with the current head (but was not already marked before), and -current_level in every opposite spot (including opposite to the head) @@ -608,6 +640,73 @@ static inline void output_thickening(const enumeration_info_t *info, signed char } } +/* new algorithm description: + + arguments: pos, neg, head (all readonly) + + - newpos = union (principal_pos[head], pos) + - newneg = union (principal_neg[head], neg) + - intersection(newpos, newneg) == 0 ? + - no: + - not slim, return + - yes: + - unknown = neg(union(newpos, newneg)) + - unknown == 0 ? + - yes: + - balanced, count++, return + - no: + - for all newhead in (1s of unknown): + - enumerate(newpos, newneg, newhead) + + needed bitwise ops: union, intersection, negation, empty?, iteration of 1s + + is there an sse op to find position of most/least significant 1? + + how can we handle negatives right to head?! +*/ + +static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos, const bitvec_t *neg, int head, int oldhead) +{ + bitvec_t newpos, newneg, known; + int newhead; + long count = 0; + + bv_union(&info->principal_pos[head], pos, &newpos); + bv_union(&info->principal_neg[head], neg, &newneg); + + for(int i = oldhead - 1; i > head; i--) { + bv_union(&info->principal_pos[info->size-1-i], &newpos, &newpos); + bv_union(&info->principal_neg[info->size-1-i], &newneg, &newneg); + } + + if(!bv_disjoint(&newpos, &newneg)) + return 0; + + bv_union(&newpos, &newneg, &known); + + if(bv_full(&known, info->size/2)) { + // found a balanced ideal + + fprintf(stderr, "Found balanced ideal: "); + bv_print(stderr, &newpos, info->size/2); + fprintf(stderr, " "); + bv_print(stderr, &newneg, info->size/2); + fprintf(stderr, "\n"); + + return 1; + } + + newhead = bv_last_zero_before(&known, head); + while(newhead >= 0) { + count += enumerate_tree(info, &newpos, &newneg, newhead, head); + newhead = bv_last_zero_before(&known, newhead); + } + + return count; +} + +/* + static long enumerate_tree(const enumeration_info_t *info, signed char *level, int current_level, int head) { ERROR(current_level >= HEAD_MARKER, "HEAD_MARKER too small!\n"); @@ -649,6 +748,8 @@ static long enumerate_tree(const enumeration_info_t *info, signed char *level, i return count; } +*/ + long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int size, const char *alphabet, FILE *outfile) { signed char *level; @@ -656,6 +757,7 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s enumeration_info_t info; queue_t queue; int current; + edgelist_t *edge; info.rank = coxeter_rank(type); info.order = coxeter_order(type); @@ -674,13 +776,68 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s if(graph[i].opposite == i) return 0; - level = (signed char*)malloc(info.size*sizeof(int)); - memset(level, 0, info.size*sizeof(int)); + if(info.size > 64*BV_QWORD_RANK) + return -1; + // generate principal ideals, needed bitvec operations: bv_clear, bv_set_bit, bv_get_bit + bitvec_t *principal_pos = (bitvec_t*)malloc(info.size*sizeof(bitvec_t)); + bitvec_t *principal_neg = (bitvec_t*)malloc(info.size*sizeof(bitvec_t)); + for(int i = 0; i < info.size; i++) { + bv_clear(&principal_pos[i]); + bv_clear(&principal_neg[i]); + bv_set_bit(&principal_pos[i], i); + bv_set_bit(&principal_neg[i], info.size - 1 - i); + queue_init(&queue); + queue_put(&queue, i); + while((current = queue_get(&queue)) != -1) { + for(edge = graph[current].bruhat_lower; edge; edge = edge->next) + if(!bv_get_bit(&principal_pos[i], edge->to)) { + bv_set_bit(&principal_pos[i], edge->to); + bv_set_bit(&principal_neg[i], info.size - 1 - edge->to); + queue_put(&queue, edge->to); + } + } + } + + // truncate them, as we only need the first info.size/2 elements + for(int i = 0; i < info.size; i++) + for(int j = info.size/2; j < info.size; j++) { + bv_clear_bit(&principal_pos[i], j); + bv_clear_bit(&principal_neg[i], j); + } + + info.principal_pos = principal_pos; + info.principal_neg = principal_neg; + + bitvec_t tmp; + + for(int i = 0; i < info.size; i++) { + fprintf(stderr, "Principal ideal %2d: ", i); + bv_print(stderr, &principal_pos[i], info.size/2); + fprintf(stderr, " "); + bv_print(stderr, &principal_neg[i], info.size/2); + fprintf(stderr, " "); + bv_intersection(&principal_pos[i], &principal_neg[i], &tmp); + bv_print(stderr, &tmp, info.size/2); + fprintf(stderr, "\n"); + } + + // enumerate balanced ideals + bitvec_t pos, neg; + bv_clear(&pos); + bv_clear(&neg); for(int i = info.size - 1; i >= 0; i--) - count += enumerate_tree(&info, level, 1, i); + count += enumerate_tree(&info, &pos, &neg, i, info.size); - free(level); + /* old algorithm: + level = (signed char*)malloc(info.size*sizeof(int)); + memset(level, 0, info.size*sizeof(int)); + + for(int i = info.size - 1; i >= 0; i--) + count += enumerate_tree(&info, level, 1, i); */ + + free(principal_pos); + free(principal_neg); return count; }