我的方法將是
- 的範圍內有兩個極限(下限和上限)
- 每個範圍分區的空間分爲三個部分(下面,裏面,上述)
- 每個限制分區的空間分成兩個部分(下面,above_or_equal)
所以該方法可以是:
- 號範圍
- 分解範圍分爲兩個限制
- 把限制成樹,在包含兩個清單引用他們(範圍節點一個列表中使用此限制作爲下限節點,一爲上限)
- 找到一個字符串你走
- 樹,每次STE實際上,你實際上超越了限制,並獲得了你左右哪個限制以及左/右/內的哪些範圍。
- 你需要兩個額外的列表(範圍數)來做這個遍歷。
- 這些列表可以是位圖
- 每當您跨越邊界時,您都會從列表中添加範圍編號並將其從另一個列表中刪除。
- 一旦你的範圍內(X> =下限& & X <上限; 與對應於相同的範圍內當然的限制)algorihtm飾面。
- (給定的,這實際上是具有最低號碼的範圍內:第一匹配)這可以被檢測到
- 如果兩個列表共享一個或多個成員
- 我們想要的最低編號的重疊構件。
由於這種方法是一種樹搜索,它具有O(日誌(N))的複雜性。
UPDATE:關於第二個想法,位圖是不存儲用戶名單或結果好辦法。鏈表(實際上是兩個)更好。代碼是300行。我應該在這裏發佈嗎?
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define COUNTOF(a) (sizeof a/sizeof a[0])
#define WANT_DUPLICATES 0
struct range {
/* separate linked lists for {lo,hi})
** for limits that this range participates in. */
struct range *next_lo;
struct range *next_hi;
unsigned num;
char *str_lo, *str_hi;
} ranges[] =
{{NULL,NULL,0, "a","an"}, {NULL,NULL,1, "an", "bb"}
,{NULL,NULL,2, "bb", "d"}, {NULL,NULL,3, "c", "h"}
,{NULL,NULL,4, "bb", "c"}, {NULL,NULL,5, "c", "d"}
};
#define NRANGE COUNTOF(ranges)
void add_range(struct range *pp);
void list_dmp(FILE *fp, int isupper, struct range *bp);
struct treetwo {
struct treetwo *prev;
struct treetwo *next;
char *str;
struct range *list_lo; /* ranges that use str as lower limit */
struct range *list_hi; /* ranges that use str as upper limit */
};
struct treetwo *root = NULL;
struct treetwo ** find_hnd(struct treetwo **tpp, char *str);
void tree_dmp(FILE *fp, struct treetwo *tp, char *msg, unsigned indent);
struct result {
unsigned size;
unsigned used;
struct {
unsigned num;
unsigned mask;
} *entries;
};
#define MASK_BELOW_LO 1
#define MASK_ABOVE_LO 2
#define MASK_BELOW_HI 4
#define MASK_ABOVE_HI 8
int result_resize(struct result *res, unsigned newsize);
void init_structures(void);
struct result *find_matches (char *str);
unsigned update_state(struct result *rp, struct treetwo *tp, int isabove);
int main (void)
{
char buff[100];
struct result *res;
size_t pos;
unsigned idx;
static char *legend[4] = { "unknown", "below", "above", "impossible"};
init_structures();
tree_dmp(stderr, root, "Root", 0);
while (fgets (buff, sizeof buff, stdin)) {
pos=strcspn(buff, "\r\n");
buff[pos] = 0;
res = find_matches (buff);
for (idx=0; idx < res->used; idx++) {
unsigned num = res->entries[idx].num;
unsigned mask = res->entries[idx].mask;
fprintf(stdout, "[%u]Range%u %x: '%s' %s '%s' and '%s' %s '%s'\n"
, idx, num, mask
, buff, legend[mask & 3], ranges[num].str_lo
, buff, legend[(mask>>2) & 3], ranges[num].str_hi
);
}
}
return 0;
}
unsigned update_state(struct result *rp, struct treetwo *tp, int isabove)
{
struct range *p;
unsigned mask_lo, mask_hi;
unsigned hitcnt,idx;
/* State: (lower limit)
** 0 : unknown
** MASK_BELOW_LO: below limit
** MASK_ABOVE_LO: above limit
** 3: impossible
** State: (upper limit)
** 0 : unknown
** MASK_BELOW_HI: below limit
** MASK_ABOVE_HI: above limit
** c: impossible
** Combined states:
** required state 2|4 := 6
** 5: unreachable
** a: unreachable
** 9: impossible
** f: impossible
*/
if (!tp) return 0;
hitcnt=0;
mask_lo = (isabove>=0) ? MASK_ABOVE_LO : MASK_BELOW_LO;
mask_hi = (isabove>=0) ? MASK_ABOVE_HI : MASK_BELOW_HI;
fprintf(stderr , "Update_state(start{%s}, isabove=%d, mask=%x,%x)\n"
, tp->str , isabove, mask_lo, mask_hi);
fprintf(stderr , "Update_state(Lo=%s)=", tp->str);
list_dmp(stderr , 0, tp->list_lo);
idx=0;
for (p = tp->list_lo; p ; p = p->next_lo) {
unsigned num = p->num;
fprintf(stderr , "Update_state:[%u] |= %u", num, mask_lo);
for ( ;idx < rp->used;idx++) { if (rp->entries[idx].num >= num) break; }
if (idx < rp->used) {
fprintf(stderr , " Old was:%u\n", rp->entries[idx].mask);
rp->entries[idx].mask |= mask_lo;
if (rp->entries[idx].mask == (MASK_ABOVE_LO|MASK_BELOW_HI)) hitcnt++;
continue;
}
if (idx >= rp->used) {
if (rp->used >= rp->size && result_resize(rp, rp->size ? rp->size*2 : 8)) break;
fprintf(stderr , " New at:%u\n", idx);
rp->entries[idx].num = num;
rp->entries[idx].mask = mask_lo;
rp->used++;
}
}
fprintf(stderr , "Update_state(Hi=%s)=", tp->str);
list_dmp(stderr , 1, tp->list_hi);
idx=0;
for (p = tp->list_hi; p ; p = p->next_hi) {
unsigned num = p->num;
fprintf(stderr , "Update_state:[%u] |= %u", num, mask_lo);
for ( ;idx < rp->used;idx++) { if (rp->entries[idx].num >= num) break; }
if (idx < rp->used) {
fprintf(stderr , " Old was:%u\n", rp->entries[idx].mask);
rp->entries[idx].mask |= mask_hi;
if (rp->entries[idx].mask == (MASK_ABOVE_LO|MASK_BELOW_HI)) hitcnt++;
continue;
}
if (idx >= rp->used) {
if (rp->used >= rp->size && result_resize(rp, rp->size ? rp->size*2 : 8)) break;
fprintf(stderr , " New at:%u\n", idx);
rp->entries[idx].num = num;
rp->entries[idx].mask = mask_hi;
rp->used++;
}
}
return hitcnt;
}
struct result *find_matches (char *str)
{
int rc;
struct treetwo **hnd;
struct result *res = malloc (sizeof *res);
unsigned dst,src;
res->used=res->size=0; res->entries=0;
for (hnd= &root; *hnd; hnd = (rc < 0) ? &(*hnd)->prev : &(*hnd)->next) {
rc = strcmp(str, (*hnd)->str);
fprintf(stderr, "####\nStr=%s Node={%s} rc=%d\n"
, str, (*hnd)->str, rc);
list_dmp(stderr , 0, (*hnd)->list_lo);
list_dmp(stderr , 1, (*hnd)->list_hi);
rc = update_state(res, *hnd , rc);
#if WANT_DUPLICATES
continue;
#else
/* if we don't want duplicates we can bail out on the first match */
if (rc) break;
#endif
}
/* Now cleanup the results.
** Below(lower limit) and above(upper limit) and variations can be removed.
** Some results are incomplete, because one of there limits is out
** of reach (shadowed by a narrower range). We'll have to recompute these.
** The result structure is compacted: if entries are deleted, the remaining ones are shifted down.
** Note: part of this cleanup (removal of unreacheables) could be done in update_state(),
** that would keep the array with partial results as short as possible.
*/
for (dst=src=0; src < res->used; src++) {
int rc;
unsigned num = res->entries[src].num;
rescan:
switch (res->entries[src].mask & 0xf) {
default: break;
case 0: /* impossible */
goto rescan;
#if WANT_DUPLICATES
case MASK_ABOVE_LO:
rc = strcmp(str, ranges[num].str_hi);
res->entries[src].mask |= (rc >=0) ? MASK_ABOVE_HI : MASK_BELOW_HI;
goto rescan;
case MASK_BELOW_HI:
rc = strcmp(str, ranges[num].str_lo);
res->entries[src].mask |= (rc >=0) ? MASK_ABOVE_LO : MASK_BELOW_LO;
goto rescan;
#endif
case MASK_BELOW_HI|MASK_ABOVE_LO:
if (dst != src) res->entries[dst] = res->entries[src];
dst++;
}
}
fprintf(stderr, "####\nFinal pass: %u/%u\n", dst, res->used);
res->used = dst;
return res;
}
void init_structures(void)
{
unsigned idx;
for (idx = 0; idx < NRANGE; idx++) {
add_range(&ranges[idx]);
}
}
void list_dmp(FILE *fp, int isupper, struct range *bp)
{
fprintf(fp, "%s", (isupper) ? "Upper" :"Lower" );
for ( ; bp ; bp = (isupper) ? bp->next_hi : bp->next_lo) {
fprintf(fp, " %u:{%s,%s}"
, bp->num , bp->str_lo , bp->str_hi
);
}
fprintf(stdout, "\n");
}
void add_range(struct range *pp)
{
struct treetwo **tpp;
struct range **rpp;
fprintf(stderr, "Inserting range %u->{%s,%s}\n", pp->num, pp->str_lo, pp->str_hi);
/* find low boundary for this interval */
tpp = find_hnd (&root, pp->str_lo);
if (!*tpp) {
fprintf(stderr, "Creating node for %u->%s (low)\n", pp->num, pp->str_lo);
*tpp = malloc(sizeof **tpp);
(*tpp)->list_lo = NULL;
(*tpp)->list_hi = NULL;
(*tpp)->str = pp->str_lo;
}
for (rpp = &(*tpp)->list_lo; *rpp ; rpp = &(*rpp)->next_lo) {;}
*rpp = pp;
fprintf(stderr, "Added range %u->{%s,%s} to treenode(%s)->list_lo\n"
, pp->num, pp->str_lo, pp->str_hi
, (*tpp)->str
);
/* find high boundary */
tpp = find_hnd (&root, pp->str_hi);
if (!*tpp) {
fprintf(stderr, "Creating node for %u->%s (High)\n", pp->num, pp->str_hi);
*tpp = malloc(sizeof **tpp);
(*tpp)->list_lo = NULL;
(*tpp)->list_hi = NULL;
(*tpp)->str = pp->str_hi;
}
for (rpp = &(*tpp)->list_hi; *rpp ; rpp = &(*rpp)->next_hi) {;}
*rpp = pp;
fprintf(stderr, "Added range %u->{%s,%s} to treenode(%s)->list_hi\n"
, pp->num, pp->str_lo, pp->str_hi
, (*tpp)->str
);
}
struct treetwo ** find_hnd(struct treetwo **tpp, char *str)
{
int rc;
for ( ; *tpp; tpp = (rc < 0) ? &(*tpp)->prev : &(*tpp)->next) {
rc = strcmp(str, (*tpp)->str);
if (!rc) break;
}
return tpp;
}
void tree_dmp(FILE *fp, struct treetwo *tp, char *msg, unsigned indent)
{
unsigned uu;
if (!tp) return;
if (!msg) msg = "";
for (uu=0; uu < indent; uu++) { fputc(' ', fp); }
fprintf(fp, "%s:{%s}\n", msg, tp->str);
for (uu=0; uu < indent+1; uu++) { fputc(' ', fp); }
list_dmp(fp , 0, tp->list_lo);
for (uu=0; uu < indent+1; uu++) { fputc(' ', fp); }
list_dmp(fp , 1, tp->list_hi);
tree_dmp(fp, tp->prev, "Prev", indent+2);
tree_dmp(fp, tp->next, "Next", indent+2);
}
int result_resize(struct result *res, unsigned newsize)
{
void *old;
old = res->entries;
res->entries = realloc (res->entries , newsize * sizeof *res->entries);
if (!res->entries) {
res->entries = old; return -1;
}
res->size = newsize;
if (res->used > newsize) res->used = newsize;
return 0;
}
能否請你解釋一下更好的第一段?當範圍以「as」開始(因此「ap」不在其中)時,「蘋果」是如何在範圍2中?此範圍的最大長度爲2,蘋果有5個字符。 – m0skit0 2012-02-17 12:44:23
編輯:真的沒有意義 – UmNyobe 2012-02-17 12:51:28
@UmNyobe我不這麼認爲。我認爲任何字符串開頭的字詞小於'as'的字符都適合範圍[a,as)例如。 「a」,「ace」,「琥珀」,但不是「阿斯」或「阿特伍德」。 – jrok 2012-02-17 12:52:17