/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - This software is distributed in the hope that it will be - useful, but with NO WARRANTY OF ANY KIND. - No author or distributor accepts responsibility to anyone for the - consequences of using this software, or for whether it serves any - particular purpose or works at all, unless he or she says so in - writing. Everyone is granted permission to copy, modify and - redistribute this source code, for commercial or non-commercial - purposes, with the following restrictions: (1) the origin of this - source code must not be misrepresented; (2) modified versions must - be plainly marked as such; and (3) this notice may not be removed - or altered from any source or modified source distribution. *====================================================================*/ /* * boxfunc2.c * * Boxa/Box transform (shift, scale) and orthogonal rotation * BOXA *boxaTransform() * BOX *boxTransform() * BOXA *boxaTransformOrdered() * BOX *boxTransformOrdered() * BOXA *boxaRotateOrth() * BOX *boxRotateOrth() * * Boxa sort * BOXA *boxaSort() * BOXA *boxaBinSort() * BOXA *boxaSortByIndex() * BOXAA *boxaSort2d() * BOXAA *boxaSort2dByIndex() * * Other boxaa functions * l_int32 boxaaGetExtent() * BOXA *boxaaFlattenToBoxa() * l_int32 boxaaAlignBox() */ #include #include #include #include "allheaders.h" /* For more than this number of c.c. in a binarized image of * semi-perimeter (w + h) about 5000 or less, the O(n) binsort * is faster than the O(nlogn) shellsort. */ static const l_int32 MIN_COMPS_FOR_BIN_SORT = 500; /*---------------------------------------------------------------------* * Boxa/Box transform (shift, scale) and orthogonal rotation * *---------------------------------------------------------------------*/ /*! * boxaTransform() * * Input: boxa * shiftx, shifty * scalex, scaley * Return: boxad, or null on error * * Notes: * (1) This is a very simple function that first shifts, then scales. */ BOXA * boxaTransform(BOXA *boxas, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley) { l_int32 i, n; BOX *boxs, *boxd; BOXA *boxad; PROCNAME("boxaTransform"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); n = boxaGetCount(boxas); if ((boxad = boxaCreate(n)) == NULL) return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); for (i = 0; i < n; i++) { if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley); boxDestroy(&boxs); boxaAddBox(boxad, boxd, L_INSERT); } return boxad; } /*! * boxTransform() * * Input: box * shiftx, shifty * scalex, scaley * Return: boxd, or null on error * * Notes: * (1) This is a very simple function that first shifts, then scales. */ BOX * boxTransform(BOX *box, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley) { PROCNAME("boxTransform"); if (!box) return (BOX *)ERROR_PTR("box not defined", procName, NULL); return boxCreate((l_int32)(scalex * (box->x + shiftx) + 0.5), (l_int32)(scaley * (box->y + shifty) + 0.5), (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)), (l_int32)(L_MAX(1.0, scaley * box->h + 0.5))); } /*! * boxaTransformOrdered() * * Input: boxa * shiftx, shifty * scalex, scaley * xcen, ycen (center of rotation) * angle (in radians; clockwise is positive) * order (one of 6 combinations: L_TR_SC_RO, ...) * Return: boxd, or null on error * * Notes: * (1) This allows a sequence of linear transforms on each box. * the transforms are from the affine set, composed of * shift, scaling and rotation, and the order of the * transforms is specified. * (2) Although these operations appear to be on an infinite * 2D plane, in practice the region of interest is clipped * to a finite image. The center of rotation is usually taken * with respect to the image (either the UL corner or the * center). A translation can have two very different effects: * (a) Moves the boxes across the fixed image region. * (b) Moves the image origin, causing a change in the image * region and an opposite effective translation of the boxes. * This function should only be used for (a), where the image * region is fixed on translation. If the image region is * changed by the translation, use instead the functions * in affinecompose.c, where the image region and rotation * center can be computed from the actual clipping due to * translation of the image origin. * (3) See boxTransformOrdered() for usage and implementation details. */ BOXA * boxaTransformOrdered(BOXA *boxas, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley, l_int32 xcen, l_int32 ycen, l_float32 angle, l_int32 order) { l_int32 i, n; BOX *boxs, *boxd; BOXA *boxad; PROCNAME("boxaTransformOrdered"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); n = boxaGetCount(boxas); if ((boxad = boxaCreate(n)) == NULL) return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); for (i = 0; i < n; i++) { if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley, xcen, ycen, angle, order); boxDestroy(&boxs); boxaAddBox(boxad, boxd, L_INSERT); } return boxad; } /*! * boxTransformOrdered() * * Input: boxs * shiftx, shifty * scalex, scaley * xcen, ycen (center of rotation) * angle (in radians; clockwise is positive) * order (one of 6 combinations: L_TR_SC_RO, ...) * Return: boxd, or null on error * * Notes: * (1) This allows a sequence of linear transforms, composed of * shift, scaling and rotation, where the order of the * transforms is specified. * (2) The rotation is taken about a point specified by (xcen, ycen). * Let the components of the vector from the center of rotation * to the box center be (xdif, ydif): * xdif = (bx + 0.5 * bw) - xcen * ydif = (by + 0.5 * bh) - ycen * Then the box center after rotation has new components: * bxcen = xcen + xdif * cosa + ydif * sina * bycen = ycen + ydif * cosa - xdif * sina * where cosa and sina are the cos and sin of the angle, * and the enclosing box for the rotated box has size: * rw = |bw * cosa| + |bh * sina| * rh = |bh * cosa| + |bw * sina| * where bw and bh are the unrotated width and height. * Then the box UL corner (rx, ry) is * rx = bxcen - 0.5 * rw * ry = bycen - 0.5 * rh * (3) The center of rotation specified by args @xcen and @ycen * is the point BEFORE any translation or scaling. If the * rotation is not the first operation, this function finds * the actual center at the time of rotation. It does this * by making the following assumptions: * (1) Any scaling is with respect to the UL corner, so * that the center location scales accordingly. * (2) A translation does not affect the center of * the image; it just moves the boxes. * We always use assumption (1). However, assumption (2) * will be incorrect if the apparent translation is due * to a clipping operation that, in effect, moves the * origin of the image. In that case, you should NOT use * these simple functions. Instead, use the functions * in affinecompose.c, where the rotation center can be * computed from the actual clipping due to translation * of the image origin. */ BOX * boxTransformOrdered(BOX *boxs, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley, l_int32 xcen, l_int32 ycen, l_float32 angle, l_int32 order) { l_int32 bx, by, bw, bh, tx, ty, tw, th; l_int32 xcent, ycent; /* transformed center of rotation due to scaling */ l_float32 sina, cosa, xdif, ydif, rx, ry, rw, rh; BOX *boxd; PROCNAME("boxTransformOrdered"); if (!boxs) return (BOX *)ERROR_PTR("boxs not defined", procName, NULL); if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC && order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO) return (BOX *)ERROR_PTR("order invalid", procName, NULL); boxGetGeometry(boxs, &bx, &by, &bw, &bh); if (angle != 0.0) { sina = sin(angle); cosa = cos(angle); } if (order == L_TR_SC_RO) { tx = (l_int32)(scalex * (bx + shiftx) + 0.5); ty = (l_int32)(scaley * (by + shifty) + 0.5); tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); xcent = (l_int32)(scalex * xcen + 0.5); ycent = (l_int32)(scaley * ycen + 0.5); if (angle == 0.0) boxd = boxCreate(tx, ty, tw, th); else { xdif = tx + 0.5 * tw - xcent; ydif = ty + 0.5 * th - ycent; rw = L_ABS(tw * cosa) + L_ABS(th * sina); rh = L_ABS(th * cosa) + L_ABS(tw * sina); rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw, (l_int32)rh); } } else if (order == L_SC_TR_RO) { tx = (l_int32)(scalex * bx + shiftx + 0.5); ty = (l_int32)(scaley * by + shifty + 0.5); tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); xcent = (l_int32)(scalex * xcen + 0.5); ycent = (l_int32)(scaley * ycen + 0.5); if (angle == 0.0) boxd = boxCreate(tx, ty, tw, th); else { xdif = tx + 0.5 * tw - xcent; ydif = ty + 0.5 * th - ycent; rw = L_ABS(tw * cosa) + L_ABS(th * sina); rh = L_ABS(th * cosa) + L_ABS(tw * sina); rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw, (l_int32)rh); } } else if (order == L_RO_TR_SC) { if (angle == 0.0) { rx = bx; ry = by; rw = bw; rh = bh; } else { xdif = bx + 0.5 * bw - xcen; ydif = by + 0.5 * bh - ycen; rw = L_ABS(bw * cosa) + L_ABS(bh * sina); rh = L_ABS(bh * cosa) + L_ABS(bw * sina); rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; } tx = (l_int32)(scalex * (rx + shiftx) + 0.5); ty = (l_int32)(scaley * (ry + shifty) + 0.5); tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); boxd = boxCreate(tx, ty, tw, th); } else if (order == L_RO_SC_TR) { if (angle == 0.0) { rx = bx; ry = by; rw = bw; rh = bh; } else { xdif = bx + 0.5 * bw - xcen; ydif = by + 0.5 * bh - ycen; rw = L_ABS(bw * cosa) + L_ABS(bh * sina); rh = L_ABS(bh * cosa) + L_ABS(bw * sina); rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; } tx = (l_int32)(scalex * rx + shiftx + 0.5); ty = (l_int32)(scaley * ry + shifty + 0.5); tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); boxd = boxCreate(tx, ty, tw, th); } else if (order == L_TR_RO_SC) { tx = bx + shiftx; ty = by + shifty; if (angle == 0.0) { rx = tx; ry = ty; rw = bw; rh = bh; } else { xdif = tx + 0.5 * bw - xcen; ydif = ty + 0.5 * bh - ycen; rw = L_ABS(bw * cosa) + L_ABS(bh * sina); rh = L_ABS(bh * cosa) + L_ABS(bw * sina); rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; } tx = (l_int32)(scalex * rx + 0.5); ty = (l_int32)(scaley * ry + 0.5); tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); boxd = boxCreate(tx, ty, tw, th); } else { /* order == L_SC_RO_TR) */ tx = (l_int32)(scalex * bx + 0.5); ty = (l_int32)(scaley * by + 0.5); tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); xcent = (l_int32)(scalex * xcen + 0.5); ycent = (l_int32)(scaley * ycen + 0.5); if (angle == 0.0) { rx = tx; ry = ty; rw = tw; rh = th; } else { xdif = tx + 0.5 * tw - xcent; ydif = ty + 0.5 * th - ycent; rw = L_ABS(tw * cosa) + L_ABS(th * sina); rh = L_ABS(th * cosa) + L_ABS(tw * sina); rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; } tx = (l_int32)(rx + shiftx + 0.5); ty = (l_int32)(ry + shifty + 0.5); tw = (l_int32)(rw + 0.5); th = (l_int32)(rh + 0.5); boxd = boxCreate(tx, ty, tw, th); } return boxd; } /*! * boxaRotateOrth() * * Input: boxa * w, h (of image in which the boxa is embedded) * rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; * all rotations are clockwise) * Return: boxad, or null on error * * Notes: * (1) See boxRotateOrth() for details. */ BOXA * boxaRotateOrth(BOXA *boxas, l_int32 w, l_int32 h, l_int32 rotation) { l_int32 i, n; BOX *boxs, *boxd; BOXA *boxad; PROCNAME("boxaRotateOrth"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); if (rotation == 0) return boxaCopy(boxas, L_COPY); if (rotation < 1 || rotation > 3) return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL); n = boxaGetCount(boxas); if ((boxad = boxaCreate(n)) == NULL) return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); for (i = 0; i < n; i++) { if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); boxd = boxRotateOrth(boxs, w, h, rotation); boxDestroy(&boxs); boxaAddBox(boxad, boxd, L_INSERT); } return boxad; } /*! * boxRotateOrth() * * Input: box * w, h (of image in which the box is embedded) * rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; * all rotations are clockwise) * Return: boxd, or null on error * * Notes: * (1) Rotate the image with the embedded box by the specified amount. * (2) After rotation, the rotated box is always measured with * respect to the UL corner of the image. */ BOX * boxRotateOrth(BOX *box, l_int32 w, l_int32 h, l_int32 rotation) { l_int32 bx, by, bw, bh, xdist, ydist; PROCNAME("boxRotateOrth"); if (!box) return (BOX *)ERROR_PTR("box not defined", procName, NULL); if (rotation == 0) return boxCopy(box); if (rotation < 1 || rotation > 3) return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL); boxGetGeometry(box, &bx, &by, &bw, &bh); ydist = h - by - bh; /* below box */ xdist = w - bx - bw; /* to right of box */ if (rotation == 1) /* 90 deg cw */ return boxCreate(ydist, bx, bh, bw); else if (rotation == 2) /* 180 deg cw */ return boxCreate(xdist, ydist, bw, bh); else /* rotation == 3, 270 deg cw */ return boxCreate(by, xdist, bh, bw); } /*---------------------------------------------------------------------* * Boxa sort * *---------------------------------------------------------------------*/ /*! * boxaSort() * * Input: boxa * sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH, * L_SORT_BY_HEIGHT, L_SORT_BY_MIN_DIMENSION, * L_SORT_BY_MAX_DIMENSION, L_SORT_BY_PERIMETER, * L_SORT_BY_AREA, L_SORT_BY_ASPECT_RATIO) * sortorder (L_SORT_INCREASING, L_SORT_DECREASING) * &naindex ( index of sorted order into * original array) * Return: boxad (sorted version of boxas), or null on error */ BOXA * boxaSort(BOXA *boxas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex) { l_int32 i, n, x, y, w, h, size; BOXA *boxad; NUMA *na, *naindex; PROCNAME("boxaSort"); if (pnaindex) *pnaindex = NULL; if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && sorttype != L_SORT_BY_MIN_DIMENSION && sorttype != L_SORT_BY_MAX_DIMENSION && sorttype != L_SORT_BY_PERIMETER && sorttype != L_SORT_BY_AREA && sorttype != L_SORT_BY_ASPECT_RATIO) return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL); if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL); /* Use O(n) binsort if possible */ n = boxaGetCount(boxas); if (n > MIN_COMPS_FOR_BIN_SORT && ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) || (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) || (sorttype == L_SORT_BY_PERIMETER))) return boxaBinSort(boxas, sorttype, sortorder, pnaindex); /* Build up numa of specific data */ if ((na = numaCreate(n)) == NULL) return (BOXA *)ERROR_PTR("na not made", procName, NULL); for (i = 0; i < n; i++) { boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h); switch (sorttype) { case L_SORT_BY_X: numaAddNumber(na, x); break; case L_SORT_BY_Y: numaAddNumber(na, y); break; case L_SORT_BY_WIDTH: numaAddNumber(na, w); break; case L_SORT_BY_HEIGHT: numaAddNumber(na, h); break; case L_SORT_BY_MIN_DIMENSION: size = L_MIN(w, h); numaAddNumber(na, size); break; case L_SORT_BY_MAX_DIMENSION: size = L_MAX(w, h); numaAddNumber(na, size); break; case L_SORT_BY_PERIMETER: size = w + h; numaAddNumber(na, size); break; case L_SORT_BY_AREA: size = w * h; numaAddNumber(na, size); break; case L_SORT_BY_ASPECT_RATIO: numaAddNumber(na, (l_float32)w / (l_float32)h); break; default: L_WARNING("invalid sort type", procName); } } /* Get the sort index for data array */ if ((naindex = numaGetSortIndex(na, sortorder)) == NULL) return (BOXA *)ERROR_PTR("naindex not made", procName, NULL); /* Build up sorted boxa using sort index */ boxad = boxaSortByIndex(boxas, naindex); if (pnaindex) *pnaindex = naindex; else numaDestroy(&naindex); numaDestroy(&na); return boxad; } /*! * boxaBinSort() * * Input: boxa * sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH, * L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER) * sortorder (L_SORT_INCREASING, L_SORT_DECREASING) * &naindex ( index of sorted order into * original array) * Return: boxad (sorted version of boxas), or null on error * * Notes: * (1) For a large number of boxes (say, greater than 1000), this * O(n) binsort is much faster than the O(nlogn) shellsort. * For 5000 components, this is over 20x faster than boxaSort(). * (2) Consequently, boxaSort() calls this function if it will * likely go much faster. */ BOXA * boxaBinSort(BOXA *boxas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex) { l_int32 i, n, x, y, w, h; BOXA *boxad; NUMA *na, *naindex; PROCNAME("boxaBinSort"); if (pnaindex) *pnaindex = NULL; if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && sorttype != L_SORT_BY_PERIMETER) return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL); if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL); /* Generate Numa of appropriate box dimensions */ n = boxaGetCount(boxas); if ((na = numaCreate(n)) == NULL) return (BOXA *)ERROR_PTR("na not made", procName, NULL); for (i = 0; i < n; i++) { boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h); switch (sorttype) { case L_SORT_BY_X: numaAddNumber(na, x); break; case L_SORT_BY_Y: numaAddNumber(na, y); break; case L_SORT_BY_WIDTH: numaAddNumber(na, w); break; case L_SORT_BY_HEIGHT: numaAddNumber(na, h); break; case L_SORT_BY_PERIMETER: numaAddNumber(na, w + h); break; default: L_WARNING("invalid sort type", procName); } } /* Get the sort index for data array */ if ((naindex = numaGetBinSortIndex(na, sortorder)) == NULL) return (BOXA *)ERROR_PTR("naindex not made", procName, NULL); /* Build up sorted boxa using the sort index */ boxad = boxaSortByIndex(boxas, naindex); if (pnaindex) *pnaindex = naindex; else numaDestroy(&naindex); numaDestroy(&na); return boxad; } /*! * boxaSortByIndex() * * Input: boxas * naindex (na that maps from the new boxa to the input boxa) * Return: boxad (sorted), or null on error */ BOXA * boxaSortByIndex(BOXA *boxas, NUMA *naindex) { l_int32 i, n, index; BOX *box; BOXA *boxad; PROCNAME("boxaSortByIndex"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); if (!naindex) return (BOXA *)ERROR_PTR("naindex not defined", procName, NULL); n = boxaGetCount(boxas); boxad = boxaCreate(n); for (i = 0; i < n; i++) { numaGetIValue(naindex, i, &index); box = boxaGetBox(boxas, index, L_COPY); boxaAddBox(boxad, box, L_INSERT); } return boxad; } /*! * boxaSort2d() * * Input: boxas * &naa ( numaa with sorted indices * whose values are the indices of the input array) * delta1 (min overlap that permits aggregation of a box * onto a boxa of horizontally-aligned boxes; pass 1) * delta2 (min overlap that permits aggregation of a box * onto a boxa of horizontally-aligned boxes; pass 2) * minh1 (components less than this height either join an * existing boxa or are set aside for pass 2) * Return: boxaa (2d sorted version of boxa), or null on error * * Notes: * (1) The final result is a sort where the 'fast scan' direction is * left to right, and the 'slow scan' direction is from top * to bottom. Each boxa in the boxaa represents a sorted set * of boxes from left to right. * (2) Two passes are used to aggregate the boxas, which can corresond * to characters or words in a line of text. In pass 1, only * taller components, which correspond to xheight or larger, * are permitted to start a new boxa, whereas in pass 2, * the remaining vertically-challenged components are allowed * to join an existing boxa or start a new one. * (3) If delta1 < 0, the first pass allows aggregation when * boxes in the same boxa do not overlap vertically. * The distance by which they can miss and still be aggregated * is the absolute value |delta1|. Similar for delta2 on * the second pass. * (4) On the first pass, any component of height less than minh1 * cannot start a new boxa; it's put aside for later insertion. * (5) On the second pass, any small component that doesn't align * with an existing boxa can start a new one. * (6) This can be used to identify lines of text from * character or word bounding boxes. */ BOXAA * boxaSort2d(BOXA *boxas, NUMAA **pnaad, l_int32 delta1, l_int32 delta2, l_int32 minh1) { l_int32 i, index, h, nt, ne, n, m, ival; BOX *box; BOXA *boxa, *boxae, *boxan, *boxat1, *boxat2, *boxav, *boxavs; BOXAA *baa, *baad; NUMA *naindex, *nae, *nan, *nah, *nav, *nat1, *nat2, *nad; NUMAA *naa, *naad; PROCNAME("boxaSort2d"); if (pnaad) *pnaad = NULL; if (!boxas) return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL); /* Sort from left to right */ if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex)) == NULL) return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL); /* First pass: assign taller boxes to boxa by row */ nt = boxaGetCount(boxa); baa = boxaaCreate(0); naa = numaaCreate(0); boxae = boxaCreate(0); /* save small height boxes here */ nae = numaCreate(0); /* keep track of small height boxes */ for (i = 0; i < nt; i++) { box = boxaGetBox(boxa, i, L_CLONE); boxGetGeometry(box, NULL, NULL, NULL, &h); if (h < minh1) { /* save for 2nd pass */ boxaAddBox(boxae, box, L_INSERT); numaAddNumber(nae, i); } else { n = boxaaGetCount(baa); boxaaAlignBox(baa, box, delta1, &index); if (index < n) { /* append to an existing boxa */ boxaaAddBox(baa, index, box, L_INSERT); } else { /* doesn't align, need new boxa */ boxan = boxaCreate(0); boxaAddBox(boxan, box, L_INSERT); boxaaAddBoxa(baa, boxan, L_INSERT); nan = numaCreate(0); numaaAddNuma(naa, nan, L_INSERT); } numaGetIValue(naindex, i, &ival); numaaAddNumber(naa, index, ival); } } boxaDestroy(&boxa); numaDestroy(&naindex); /* Second pass: feed in small height boxes; * TODO: this correctly, using local y position! */ ne = boxaGetCount(boxae); for (i = 0; i < ne; i++) { box = boxaGetBox(boxae, i, L_CLONE); n = boxaaGetCount(baa); boxaaAlignBox(baa, box, delta2, &index); if (index < n) { /* append to an existing boxa */ boxaaAddBox(baa, index, box, L_INSERT); } else { /* doesn't align, need new boxa */ boxan = boxaCreate(0); boxaAddBox(boxan, box, L_INSERT); boxaaAddBoxa(baa, boxan, L_INSERT); nan = numaCreate(0); numaaAddNuma(naa, nan, L_INSERT); } numaGetIValue(nae, i, &ival); /* location in original boxas */ numaaAddNumber(naa, index, ival); } /* Sort each boxa in the boxaa */ m = boxaaGetCount(baa); for (i = 0; i < m; i++) { boxat1 = boxaaGetBoxa(baa, i, L_CLONE); boxat2 = boxaSort(boxat1, L_SORT_BY_X, L_SORT_INCREASING, &nah); boxaaReplaceBoxa(baa, i, boxat2); nat1 = numaaGetNuma(naa, i, L_CLONE); nat2 = numaSortByIndex(nat1, nah); numaaReplaceNuma(naa, i, nat2); boxaDestroy(&boxat1); numaDestroy(&nat1); numaDestroy(&nah); } /* Sort boxa vertically within boxaa, using the first box * in each boxa. */ m = boxaaGetCount(baa); boxav = boxaCreate(m); /* holds first box in each boxa in baa */ naad = numaaCreate(m); if (pnaad) *pnaad = naad; baad = boxaaCreate(m); for (i = 0; i < m; i++) { boxat1 = boxaaGetBoxa(baa, i, L_CLONE); box = boxaGetBox(boxat1, 0, L_CLONE); boxaAddBox(boxav, box, L_INSERT); boxaDestroy(&boxat1); } boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav); for (i = 0; i < m; i++) { numaGetIValue(nav, i, &index); boxa = boxaaGetBoxa(baa, index, L_CLONE); boxaaAddBoxa(baad, boxa, L_INSERT); nad = numaaGetNuma(naa, index, L_CLONE); numaaAddNuma(naad, nad, L_INSERT); } /* fprintf(stderr, "box count = %d, numaa count = %d\n", nt, numaaGetNumberCount(naad)); */ boxaaDestroy(&baa); boxaDestroy(&boxav); boxaDestroy(&boxavs); boxaDestroy(&boxae); numaDestroy(&nav); numaDestroy(&nae); numaaDestroy(&naa); if (!pnaad) numaaDestroy(&naad); return baad; } /*! * boxaSort2dByIndex() * * Input: boxas * naa (numaa that maps from the new baa to the input boxa) * Return: baa (sorted boxaa), or null on error */ BOXAA * boxaSort2dByIndex(BOXA *boxas, NUMAA *naa) { l_int32 ntot, boxtot, i, j, n, nn, index; BOX *box; BOXA *boxa; BOXAA *baa; NUMA *na; PROCNAME("boxaSort2dByIndex"); if (!boxas) return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL); if (!naa) return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL); /* Check counts */ ntot = numaaGetNumberCount(naa); boxtot = boxaGetCount(boxas); if (ntot != boxtot) return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL); n = numaaGetCount(naa); baa = boxaaCreate(n); for (i = 0; i < n; i++) { na = numaaGetNuma(naa, i, L_CLONE); nn = numaGetCount(na); boxa = boxaCreate(nn); for (j = 0; j < nn; j++) { numaGetIValue(na, i, &index); box = boxaGetBox(boxas, index, L_COPY); boxaAddBox(boxa, box, L_INSERT); } boxaaAddBoxa(baa, boxa, L_INSERT); numaDestroy(&na); } return baa; } /*---------------------------------------------------------------------* * Other Boxaa functions * *---------------------------------------------------------------------*/ /*! * boxaaGetExtent() * * Input: boxaa * &w ( width) * &h ( height) * &box (, minimum box containing all boxa * in boxaa) * Return: 0 if OK, 1 on error * * Notes: * (1) The returned w and h are the minimum size image * that would contain all boxes untranslated. */ l_int32 boxaaGetExtent(BOXAA *boxaa, l_int32 *pw, l_int32 *ph, BOX **pbox) { l_int32 i, j, n, m, x, y, w, h, xmax, ymax, xmin, ymin; BOXA *boxa; PROCNAME("boxaaGetExtent"); if (!pw && !ph && !pbox) return ERROR_INT("no ptrs defined", procName, 1); if (pbox) *pbox = NULL; if (pw) *pw = 0; if (ph) *ph = 0; if (!boxaa) return ERROR_INT("boxaa not defined", procName, 1); n = boxaaGetCount(boxaa); if (n == 0) return ERROR_INT("no boxa in boxaa", procName, 1); xmax = ymax = 0; xmin = ymin = 100000000; for (i = 0; i < n; i++) { boxa = boxaaGetBoxa(boxaa, i, L_CLONE); m = boxaGetCount(boxa); for (j = 0; j < m; j++) { boxaGetBoxGeometry(boxa, j, &x, &y, &w, &h); xmin = L_MIN(xmin, x); ymin = L_MIN(ymin, y); xmax = L_MAX(xmax, x + w); ymax = L_MAX(ymax, y + h); } } if (pw) *pw = xmax; if (ph) *ph = ymax; if (pbox) *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin); return 0; } /*! * boxaaFlattenToBoxa() * * Input: boxaa * &naindex ( the boxa index in the boxaa) * copyflag (L_COPY or L_CLONE) * Return: boxa, or null on error * * Notes: * (1) This 'flattens' the boxaa to a boxa, taking the boxes in * order in the first boxa, then the second, etc. * (2) If &naindex is defined, we generate a Numa that gives, for * each box in the boxaa, the index of the boxa to which it belongs. */ BOXA * boxaaFlattenToBoxa(BOXAA *baa, NUMA **pnaindex, l_int32 copyflag) { l_int32 i, j, m, n; BOXA *boxa, *boxat; BOX *box; NUMA *naindex; PROCNAME("boxaaFlattenToBoxa"); if (pnaindex) *pnaindex = NULL; if (!baa) return (BOXA *)ERROR_PTR("baa not defined", procName, NULL); if (copyflag != L_COPY && copyflag != L_CLONE) return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); if (pnaindex) { naindex = numaCreate(0); *pnaindex = naindex; } n = boxaaGetCount(baa); boxa = boxaCreate(n); for (i = 0; i < n; i++) { boxat = boxaaGetBoxa(baa, i, L_CLONE); m = boxaGetCount(boxat); for (j = 0; j < m; j++) { box = boxaGetBox(boxat, j, copyflag); boxaAddBox(boxa, box, L_INSERT); if (pnaindex) numaAddNumber(naindex, i); /* save 'row' number */ } boxaDestroy(&boxat); } return boxa; } /*! * boxaaAlignBox() * * Input: boxaa * box (to be aligned with the last of one of the boxa * in boxaa, if possible) * delta (amount by which consecutive components can miss * in overlap and still be included in the array) * &index (of boxa with best overlap, or if none match, * this is the index of the next boxa to be generated) * Return: 0 if OK, 1 on error * * Notes: * (1) This is not greedy; it finds the boxa whose last box has * the biggest overlap with the input box. */ l_int32 boxaaAlignBox(BOXAA *baa, BOX *box, l_int32 delta, l_int32 *pindex) { l_int32 i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex; BOXA *boxa; PROCNAME("boxaaAlignBox"); if (!baa) return ERROR_INT("baa not defined", procName, 1); if (!box) return ERROR_INT("box not defined", procName, 1); if (!pindex) return ERROR_INT("&index not defined", procName, 1); n = boxaaGetCount(baa); boxGetGeometry(box, NULL, &y, NULL, &h); maxovlp = -10000000; for (i = 0; i < n; i++) { boxa = boxaaGetBoxa(baa, i, L_CLONE); if ((m = boxaGetCount(boxa)) == 0) { L_WARNING("no boxes in boxa", procName); continue; } boxaGetBoxGeometry(boxa, m - 1, NULL, &yt, NULL, &ht); /* last one */ boxaDestroy(&boxa); /* Overlap < 0 means the components do not overlap vertically */ if (yt >= y) ovlp = y + h - 1 - yt; else ovlp = yt + ht - 1 - y; if (ovlp > maxovlp) { maxovlp = ovlp; maxindex = i; } } if (maxovlp + delta >= 0) *pindex = maxindex; else *pindex = n; return 0; }