BIdxInsert.cc

00001 /* 
00002    EyeDB Object Database Management System
00003    Copyright (C) 1994-2008 SYSRA
00004    
00005    EyeDB is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009    
00010    EyeDB is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014    
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with this library; if not, write to the Free Software
00017    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA 
00018 */
00019 
00020 /*
00021    Authors: Stuart Pook
00022             Eric Viara <viara@sysra.com>
00023 */
00024 
00025 #include <eyedbconfig.h>
00026 
00027 #include <eyedblib/thread.h>
00028 #include        <stdlib.h>
00029 #include        <string.h>
00030 #include        <assert.h>
00031 #include <iostream>
00032 
00033 #include <eyedbsm/eyedbsm.h>
00034 #include        "BIdxBTree.h"
00035 #include        "IdxP.h"
00036 
00037 //#define ESM_HIDX_REGISTER
00038 
00039 namespace eyedbsm {
00040 
00041   static Status
00042   bTreeSplitChild(BIdx::InCore * x, int i, BIdx::InCore * y, BIdx::InCore * z)
00043   {
00044     Status s;
00045     int j;
00046 
00047     //cout << "\n **************** bTreeSplitChild\n";
00048 
00049     unsigned int degree = y->idx->getDegree();
00050     BIdx * idx = y->idx;
00051     z->node->leaf = y->node->leaf;
00052     z->node->n = degree - 1;
00053     for (j = 0; j < degree - 1; j++)
00054       z->kdCopy(j, y, j + degree);
00055     if (!y->node->leaf)
00056       for (j = 0; j < degree; j++)
00057         z->node->c[j] = y->node->c[j + degree];
00058     for (j = x->node->n; j >= i + 1; j--)
00059       x->node->c[j + 1] = x->node->c[j];
00060     x->node->c[i + 1] = z->oid;
00061 
00062     for (j = x->node->n++ - 1; j >= i; j--)
00063       x->kdCopy(j + 1, x, j);
00064     x->kdCopy(i, y, degree - 1);
00065     y->node->n = degree - 1;
00066     if (s = y->write())
00067       return s;
00068     if (s = z->write())
00069       return s;
00070     return x->write();
00071   }
00072 
00073   //#define CMP_TRACE
00074   //#define DICHO_VERIF
00075 #define DICHO
00076   //#define DICHO_PARTIAL
00077 
00078 #ifdef DICHO
00079   int
00080   find(int i, BIdx::InCore *x, void const * key, void const * data)
00081   {
00082     if (i < 0)
00083       return i;
00084     int start, end, k;
00085 
00086     start = 0;
00087     end = i+1;
00088     k = (i+1) >> 1;
00089 
00090     int nn;
00091 #ifdef CMP_TRACE
00092     cout << "start = " << start << ", end = " << end << ", k = " << k
00093          << ", i = " << i << "\n";
00094 #endif
00095     for (nn = 1; ; nn++) {
00096       int old_k = k;
00097       int r = x->cmp(k, key, data, OP1_SWAP);
00098 #ifdef CMP_TRACE
00099       cout << "cmp k = " << k << ", r = " << r << "\n";
00100 #endif
00101       if (!r) {
00102         for (int j = k-1; j >= 0; j--) {
00103           if (x->cmp(j, key, data, OP1_SWAP)) {
00104 #ifdef CMP_TRACE
00105             if (j+1 != k) 
00106               cout << "... minimum is #" << (j+1) << endl;
00107 #endif
00108             k = j+1;
00109             break;
00110           }
00111         }
00112         break;
00113       }
00114 
00115       if (r > 0) {
00116         end = k;
00117         k = start + ((k - start) >> 1);
00118       }
00119       else {
00120         start = k;
00121         k = start + ((end - k) >> 1);
00122       }
00123 
00124       if (end - start <= 1 && (k == old_k || x->cmp(k, key, data, OP1_SWAP))) {
00125 #ifdef CMP_TRACE
00126         cout << "break start = " << start << ", end = " << end << "\n";
00127 #endif
00128         k = start;
00129         break;
00130       }
00131     }
00132     if (k == 0 && x->cmp(k, key, data, OP1_SWAP) > 0)
00133       k = -1;
00134 #ifdef CMP_TRACE
00135     cout << "dichotomic found #" << k << " has made " << nn <<
00136       " comparisons\n";
00137 #endif
00138 
00139 #ifdef DICHO_VERIF
00140     int j;
00141     for (j = i; j >= 0 && x->cmp(j, key, data, OP1_SWAP) >= 0; j--)
00142       ;
00143 
00144     if (k != j)
00145       cout << "k " << k << " vs. j " << j;
00146 
00147     assert(k == j);
00148 #endif
00149 
00150 #ifndef DICHO_PARTIAL
00151     if (k != i)
00152       x->kdCopy(k + 2, x, k + 1, i - k);
00153 #endif
00154     return k;
00155   }
00156 #endif
00157 
00158   static Status
00159   bTreeInsertNonfull(BIdx * b, BIdx::InCore * x, void const * key, void const * data)
00160   {
00161     BIdx::InCore z(b);
00162     BIdx::InCore a(b);
00163     int i;
00164     Status s;
00165     BIdx * idx = x->idx;
00166     unsigned int degree = idx->getDegree();
00167 
00168     while (!x->node->leaf) {
00169       for (i = x->node->n - 1; i >= 0 && x->cmp(i, key, data, OP1_SWAP) >= 0; i--)
00170         ;
00171       i++;
00172       if (s = z.read(&x->node->c[i]))
00173         return s;
00174       if (z.node->n == 2 * degree - 1) {
00175         if (s = a.create())
00176           return s;;
00177         if (s = bTreeSplitChild(x, i, &z, &a))
00178           return s;
00179         *x = (x->cmp(i, key, data, OP1_SWAP) <= 0) ? a : z;
00180       }
00181       else
00182         *x = z;
00183     }
00184 
00185     i = x->node->n - 1;
00186     x->node->n++;
00187 
00188     // the following loop can be improved:
00189     // 1. instead of making N kdCopy of 1 item (keySize and datSize), one
00190     //    could be perform the loop and then make only one kdCopy of N items
00191     // 2. a dichotomic comparison instead of performing the loop could reduce
00192     //    the number of comparison from N to log2(N)
00193         
00194 #ifdef CMP_TRACE
00195     cout << "[btree degree = " << degree <<
00196       "] starting comparing for inserting at #" << i << '\n';
00197 #endif
00198     int start_i = i;
00199 #ifdef DICHO
00200     i = find(i, x, key, data);
00201 #endif
00202 #if defined(DICHO_PARTIAL) || !defined(DICHO)
00203     i = start_i;
00204     for (; i >= 0 && x->cmp(i, key, data, OP1_SWAP) >= 0; i--) {
00205 #ifdef CMP_TRACE
00206       cout << "comparing for inserting #" << i << '\n';
00207 #endif
00208       x->kdCopy(i + 1, x, i);
00209     }
00210 #endif
00211 
00212 #ifdef CMP_TRACE
00213     cout << "direct found #" << i <<
00214       " has made " << (start_i - i + 1) << " comparisons\n";
00215 #endif
00216 
00217     unsigned int nkeys;
00218     const Idx::KeyType *keyType = idx->getKeyTypes(nkeys);
00219     if (keyType->type == Idx::tUnsignedChar ||
00220         keyType->type == Idx::tChar ||
00221         keyType->type == Idx::tSignedChar ||
00222         keyType->type == Idx::tString)
00223       x->kdCopy(i + 1, key, data);
00224     else {
00225       char xkey[Idx_max_type_size];
00226       Idx::h2x(xkey, key, *keyType);
00227       x->kdCopy(i + 1, xkey, data);
00228     }
00229     return x->write();
00230   }
00231 
00232   Status BIdx::insert(const void *key, const void *data, unsigned int datasz)
00233   {
00234     return statusMake(NOT_YET_IMPLEMENTED, "BIdx::insert(const void *key, const void *data, unsigned int datasz)");
00235   }
00236 
00237   //#define USE_CACHE
00238   Status BIdx::insert(void const * key, void const * data)
00239   {
00240     if (stat)
00241       return stat;
00242 
00243 #ifdef ESM_HIDX_REGISTER
00244     registerStart(dbh, AllOP);
00245 #endif
00246     IdxLock lockx(dbh, treeOid);
00247     Status s = lockx.lock();
00248     if (s)
00249       return s;
00250 
00251     BTree tree;
00252     BTree *xtree;
00253 #ifdef USE_CACHE
00254     if (stat = objectReadCache(dbh, 0, (void **)&xtree, DefaultLock,
00255                                &treeOid))
00256       return fatal();
00257     if (!xtree) {
00258       if (stat = readBTree(tree))
00259         return fatal();
00260       xtree = &tree;
00261     }
00262 #else
00263     if (stat = readBTree(tree))
00264       return fatal();
00265     xtree = &tree;
00266 #endif
00267     BIdx::InCore x(this);
00268     // this can be improved:
00269     // one can read only one part of the node: the fields keys, data and n
00270     // and if n is == 2 * degree - 1, then we read the c[] array:
00271     // we should swap the attributes leaf, n, c, keys, data to
00272     // leaf, n, keys, data, c to improved partial readings
00273     if (stat = x.read(&xtree->root))
00274       return stat;
00275     if (x.node->n == 2 * degree - 1) {
00276       BIdx::InCore r = x;
00277       if (stat = x.create())
00278         return stat;
00279       xtree->root = x.oid;
00280 #ifdef USE_CACHE
00281       if (xtree != &tree) {
00282         if (stat = objectWriteCache(dbh, 0, xtree, &treeOid))
00283           return fatal();
00284       }
00285       else if (stat = writeBTree(tree))
00286         return fatal();
00287 #else
00288       if (stat = writeBTree(tree))
00289         return fatal();
00290 #endif
00291       x.node->leaf = 0;
00292       x.node->n = 0;
00293       x.node->c[0] = r.oid;
00294       BIdx::InCore z(this);
00295       if (stat = z.create())
00296         return stat;
00297       if (stat = bTreeSplitChild(&x, 0, &r, &z))
00298         return stat;
00299     }
00300     stat = bTreeInsertNonfull(this, &x, key, data);
00301 #ifdef ESM_HIDX_REGISTER
00302     Register *reg;
00303     registerGet(dbh, &reg);
00304     fprintf(stdout, "BIdx::insert: ");
00305     registerTrace(stdout, reg);
00306     registerEnd(dbh);
00307 #endif
00308     if (!stat)
00309       return count_manage(1);
00310     return stat;
00311   }
00312 }

Generated on Mon Dec 22 18:15:50 2008 for eyedb by  doxygen 1.5.3