Including bitvectors, but slow and wrong

This commit is contained in:
Florian Stecker 2016-11-15 18:55:30 +01:00
parent 2a297a74ca
commit 562e9bb50a
3 changed files with 379 additions and 17 deletions

View File

@ -1,6 +1,7 @@
HEADERS=coxeter.h thickenings.h queue.h
OPTIONS=-O3 -m64 -march=native -flto -funroll-loops -std=gnu99
#OPTIONS=-O0 -g -std=gnu99
HEADERS=coxeter.h thickenings.h queue.h bitvec.h
#OPTIONS=-O3 -m64 -march=native -flto -funroll-loops -std=gnu99
OPTIONS=-O0 -g -std=gnu99
#OPTIONS=-O3 -m64 -march=native -funroll-loops -fno-inline -std=gnu99 -pg
all: generate process

204
bitvec.h Normal file
View File

@ -0,0 +1,204 @@
/*********************************************************************
* Filename: bitvec.h
*
* Description: Bit vectors implemented as uint64_t arrays, size
* fixed at compile time (in weylconfig.h). Supports
* efficient set operations: Union, difference, count.
* Uses SSE4 64-bit popcount instruction.
*
* Author: David Dumas <david@dumas.io>
*
* This program is free software distributed under the MIT license.
* See the file LICENSE for details.
********************************************************************/
#ifndef __BITVEC_H__
#define __BITVEC_H__ 1
// #define _GNU_SOURCE /* enable ffsll */
// #include <string.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
/* For _mm_popcnt_u64 */
#include <x86intrin.h>
typedef struct {
uint64_t v[BV_QWORD_RANK];
} bitvec_t;
/*
static inline unsigned long bv_hash(bitvec_t S)
{
unsigned long h = 5381;
int i,j;
uint8_t x;
for (i = 0; i < BV_QWORD_RANK*4; i++) {
x = (S.v[i/8] >> (8*(i % 8))) & 0xff;
h = ((h << 5) + h) ^ x;
}
return h;
}
static inline int bv_equal(bitvec_t x,bitvec_t y)
{
int i;
for (i=0; i < BV_QWORD_RANK; i++) {
if (x.v[i] != y.v[i]) {
return 0;
}
}
return 1;
// return ((x.v[0] == y.v[0]) && (x.v[1] == y.v[1]));
}
static inline void bv_difference(bitvec_t *pile, bitvec_t toremove)
{
int i;
for (i=0; i < BV_QWORD_RANK; i++) {
pile->v[i] &= ~toremove.v[i];
}
}
*/
/* /\* static inline *\/ int old_popcount64(uint64_t x) */
/* { */
/* int i; */
/* for (i = 0; x; x >>= 1) { */
/* i += x & 1; */
/* } */
/* return i; */
/* } */
/*
static inline int popcount64(uint64_t x)
{
return _mm_popcnt_u64(x);
}
static inline int bv_popcount(bitvec_t x)
{
int i;
int accum=0;
for (i=0; i<BV_QWORD_RANK; i++) {
accum += popcount64(x.v[i]);
}
return accum;
}
*/
static inline void bv_clear_bit(bitvec_t *x, int k)
{
x->v[k/64] &= ~((uint64_t)1 << (k % 64));
}
static inline void bv_set_bit(bitvec_t *x, int k)
{
x->v[k/64] |= ((uint64_t)1 << (k % 64));
}
static inline int bv_get_bit(const bitvec_t *x, int k)
{
return (x->v[k/64] >> (k % 64)) & 0x1;
}
static inline void bv_clear(bitvec_t *x)
{
int i;
for (i=0;i<BV_QWORD_RANK;i++) {
x->v[i] = 0;
}
}
static inline int bv_is_zero(const bitvec_t *x)
{
int i;
for (i=0;i<BV_QWORD_RANK;i++) {
if (x->v[i])
return 0;
}
return 1;
}
static inline void bv_print(FILE *f, const bitvec_t *x, int len)
{
for(int i = 0; i < len; i++) {
fputc(bv_get_bit(x, i) ? '1' : '0', f);
}
}
static inline void bv_union(const bitvec_t *x, const bitvec_t *y, bitvec_t *result)
{
int i;
for (i=0; i < BV_QWORD_RANK; i++) {
result->v[i] = x->v[i] | y->v[i];
}
}
static inline void bv_intersection(const bitvec_t *x, const bitvec_t *y, bitvec_t *result)
{
int i;
for (i=0; i < BV_QWORD_RANK; i++) {
result->v[i] = x->v[i] & y->v[i];
}
}
static inline int bv_disjoint(const bitvec_t *x, const bitvec_t *y)
{
for(int i = 0; i < BV_QWORD_RANK; i++)
if(x->v[i] & y->v[i])
return 0;
return 1;
}
static inline int bv_full(const bitvec_t *x, int len)
{
int i;
for(i = 0; i < len/64; i++)
if(x->v[i] != (uint64_t)-1)
return 0;
return (x->v[i] & ((1<<(len%64))-1)) == ((1<<(len%64))-1);
}
// most sigificant set bit in bits 0...start-1
static inline int bv_last_zero_before(const bitvec_t *x, int start)
{
// naive implementation
int i;
for(i = start - 1; i >= 0; i--)
if(!bv_get_bit(x, i))
break;
return i;
}
/* static inline int bv_lowest_bit_set(bitvec_t x) */
/* { */
/* int i=0, k; */
/* for (i=0;i<BV_QWORD_RANK;i++) { */
/* if (k=ffsll((long long)x.v[i])) */
/* return 64*i + k - 1; */
/* } */
/* return -1; */
/* } */
/* static inline int bv_lowest_bit_set(bitvec_t x) */
/* { */
/* int k; */
/* for (k=0;k<BV_BIT_RANK;k++) { */
/* if (bv_get_bit(x,k)) */
/* return k; */
/* } */
/* return -1; */
/* } */
#endif /* __BITVEC_H__ */

View File

@ -10,6 +10,9 @@
#include "coxeter.h"
#include "queue.h"
#define BV_QWORD_RANK 10
#include "bitvec.h"
char *alphabetize(int *word, int len, const char *alphabet, char *buffer)
{
if(len == 0) {
@ -69,7 +72,7 @@ void prepare_graph(semisimple_type_t type, node_t *graph)
int *graph_data;
node_t *graph_unsorted;
int *wordlength_order, *reverse_wordlength_order, *seen;
int *ordering, *reverse_ordering, *seen;
// initialize
@ -82,8 +85,8 @@ void prepare_graph(semisimple_type_t type, node_t *graph)
graph_data = (int*)malloc(order*rank*sizeof(int));
graph_unsorted = (node_t*)malloc(order*sizeof(node_t));
wordlength_order = (int*)malloc(order*sizeof(int));
reverse_wordlength_order = (int*)malloc(order*sizeof(int));
ordering = (int*)malloc(order*sizeof(int));
reverse_ordering = (int*)malloc(order*sizeof(int));
seen = (int*)malloc(order*sizeof(int));
for(int i = 0; i < order; i++) {
@ -119,15 +122,15 @@ void prepare_graph(semisimple_type_t type, node_t *graph)
// sort by wordlength
for(int i = 0; i < order; i++)
wordlength_order[i] = i;
qsort_r(wordlength_order, order, sizeof(int), compare_wordlength, graph_unsorted); // so wordlength_order is a map new index -> old index
ordering[i] = i;
qsort_r(ordering, order, sizeof(int), compare_wordlength, graph_unsorted); // so ordering is a map new index -> old index
for(int i = 0; i < order; i++)
reverse_wordlength_order[wordlength_order[i]] = i; // reverse_wordlength_order is a map old index -> new index
reverse_ordering[ordering[i]] = i; // reverse_ordering is a map old index -> new index
for(int i = 0; i < order; i++) {
// we have only set left and wordlength so far, so just copy these
graph[i].wordlength = graph_unsorted[wordlength_order[i]].wordlength;
graph[i].wordlength = graph_unsorted[ordering[i]].wordlength;
for(int j = 0; j < rank; j++)
graph[i].left[j] = reverse_wordlength_order[graph_unsorted[wordlength_order[i]].left[j]]; // rewrite references
graph[i].left[j] = reverse_ordering[graph_unsorted[ordering[i]].left[j]]; // rewrite references
}
// find words
@ -269,10 +272,37 @@ void prepare_graph(semisimple_type_t type, node_t *graph)
}
}
// additional sorting step to force opposite property (opposite of j is at n - j - 1)
for(int i = 0; i < order; i++)
reverse_ordering[i] = -1;
for(int i = 0, j = 0; i < order; i++) { // i = old index, j = new index
if(reverse_ordering[i] == -1) {
reverse_ordering[i] = j;
ordering[j] = i;
reverse_ordering[graph[i].opposite] = order - j - 1;
ordering[order - j - 1] = graph[i].opposite;
j++;
}
}
memcpy(graph_unsorted, graph, order*sizeof(node_t));
for(int i = 0; i < order; i++) {
graph[i] = graph_unsorted[ordering[i]];
graph[i].opposite = reverse_ordering[graph[i].opposite];
for(int j = 0; j < rank; j++) {
graph[i].left[j] = reverse_ordering[graph[i].left[j]];
graph[i].right[j] = reverse_ordering[graph[i].right[j]];
}
for(edge = graph[i].bruhat_lower; edge; edge = edge->next)
edge->to = reverse_ordering[edge->to];
for(edge = graph[i].bruhat_higher; edge; edge = edge->next)
edge->to = reverse_ordering[edge->to];
}
free(graph_data);
free(graph_unsorted);
free(wordlength_order);
free(reverse_wordlength_order);
free(ordering);
free(reverse_ordering);
free(seen);
}
@ -544,6 +574,8 @@ typedef struct {
int printstep;
const char *alphabet;
FILE *outfile;
bitvec_t *principal_pos;
bitvec_t *principal_neg;
} enumeration_info_t;
// calculate transitive closure; that is, fill current_level in every spot which must be marked with the current head (but was not already marked before), and -current_level in every opposite spot (including opposite to the head)
@ -608,6 +640,73 @@ static inline void output_thickening(const enumeration_info_t *info, signed char
}
}
/* new algorithm description:
arguments: pos, neg, head (all readonly)
- newpos = union (principal_pos[head], pos)
- newneg = union (principal_neg[head], neg)
- intersection(newpos, newneg) == 0 ?
- no:
- not slim, return
- yes:
- unknown = neg(union(newpos, newneg))
- unknown == 0 ?
- yes:
- balanced, count++, return
- no:
- for all newhead in (1s of unknown):
- enumerate(newpos, newneg, newhead)
needed bitwise ops: union, intersection, negation, empty?, iteration of 1s
is there an sse op to find position of most/least significant 1?
how can we handle negatives right to head?!
*/
static long enumerate_tree(const enumeration_info_t *info, const bitvec_t *pos, const bitvec_t *neg, int head, int oldhead)
{
bitvec_t newpos, newneg, known;
int newhead;
long count = 0;
bv_union(&info->principal_pos[head], pos, &newpos);
bv_union(&info->principal_neg[head], neg, &newneg);
for(int i = oldhead - 1; i > head; i--) {
bv_union(&info->principal_pos[info->size-1-i], &newpos, &newpos);
bv_union(&info->principal_neg[info->size-1-i], &newneg, &newneg);
}
if(!bv_disjoint(&newpos, &newneg))
return 0;
bv_union(&newpos, &newneg, &known);
if(bv_full(&known, info->size/2)) {
// found a balanced ideal
fprintf(stderr, "Found balanced ideal: ");
bv_print(stderr, &newpos, info->size/2);
fprintf(stderr, " ");
bv_print(stderr, &newneg, info->size/2);
fprintf(stderr, "\n");
return 1;
}
newhead = bv_last_zero_before(&known, head);
while(newhead >= 0) {
count += enumerate_tree(info, &newpos, &newneg, newhead, head);
newhead = bv_last_zero_before(&known, newhead);
}
return count;
}
/*
static long enumerate_tree(const enumeration_info_t *info, signed char *level, int current_level, int head)
{
ERROR(current_level >= HEAD_MARKER, "HEAD_MARKER too small!\n");
@ -649,6 +748,8 @@ static long enumerate_tree(const enumeration_info_t *info, signed char *level, i
return count;
}
*/
long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int size, const char *alphabet, FILE *outfile)
{
signed char *level;
@ -656,6 +757,7 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s
enumeration_info_t info;
queue_t queue;
int current;
edgelist_t *edge;
info.rank = coxeter_rank(type);
info.order = coxeter_order(type);
@ -674,13 +776,68 @@ long enumerate_balanced_thickenings(semisimple_type_t type, node_t *graph, int s
if(graph[i].opposite == i)
return 0;
level = (signed char*)malloc(info.size*sizeof(int));
memset(level, 0, info.size*sizeof(int));
if(info.size > 64*BV_QWORD_RANK)
return -1;
// generate principal ideals, needed bitvec operations: bv_clear, bv_set_bit, bv_get_bit
bitvec_t *principal_pos = (bitvec_t*)malloc(info.size*sizeof(bitvec_t));
bitvec_t *principal_neg = (bitvec_t*)malloc(info.size*sizeof(bitvec_t));
for(int i = 0; i < info.size; i++) {
bv_clear(&principal_pos[i]);
bv_clear(&principal_neg[i]);
bv_set_bit(&principal_pos[i], i);
bv_set_bit(&principal_neg[i], info.size - 1 - i);
queue_init(&queue);
queue_put(&queue, i);
while((current = queue_get(&queue)) != -1) {
for(edge = graph[current].bruhat_lower; edge; edge = edge->next)
if(!bv_get_bit(&principal_pos[i], edge->to)) {
bv_set_bit(&principal_pos[i], edge->to);
bv_set_bit(&principal_neg[i], info.size - 1 - edge->to);
queue_put(&queue, edge->to);
}
}
}
// truncate them, as we only need the first info.size/2 elements
for(int i = 0; i < info.size; i++)
for(int j = info.size/2; j < info.size; j++) {
bv_clear_bit(&principal_pos[i], j);
bv_clear_bit(&principal_neg[i], j);
}
info.principal_pos = principal_pos;
info.principal_neg = principal_neg;
bitvec_t tmp;
for(int i = 0; i < info.size; i++) {
fprintf(stderr, "Principal ideal %2d: ", i);
bv_print(stderr, &principal_pos[i], info.size/2);
fprintf(stderr, " ");
bv_print(stderr, &principal_neg[i], info.size/2);
fprintf(stderr, " ");
bv_intersection(&principal_pos[i], &principal_neg[i], &tmp);
bv_print(stderr, &tmp, info.size/2);
fprintf(stderr, "\n");
}
// enumerate balanced ideals
bitvec_t pos, neg;
bv_clear(&pos);
bv_clear(&neg);
for(int i = info.size - 1; i >= 0; i--)
count += enumerate_tree(&info, level, 1, i);
count += enumerate_tree(&info, &pos, &neg, i, info.size);
free(level);
/* old algorithm:
level = (signed char*)malloc(info.size*sizeof(int));
memset(level, 0, info.size*sizeof(int));
for(int i = info.size - 1; i >= 0; i--)
count += enumerate_tree(&info, level, 1, i); */
free(principal_pos);
free(principal_neg);
return count;
}