2016-11-28 50 views
5

我正在使用Microsoft Computer Vision讀取收據。處理OCR /計算機視覺結果以匹配收據結構

我得到的結果被排序到按列分組的區域中,例如,數量,產品名稱,數量在三個不同的地區。

我寧願如果整個產品列表是一個區域,並且每一行都是產品。

是否有任何方法來配置計算機視覺來完成此任務,或者更有可能是因爲所有單詞的位置都可用,所以可以使用後處理結果中的任何好技術或庫。

貝婁是收據和計算機視覺效果的圖像。

receipt

{ 
    "language": "sv", 
    "textAngle": 2.0999999999999632, 
    "orientation": "Up", 
    "regions": [ 
    { 
     "boundingBox": "1012,450,660,326", 
     "lines": [ 
     { 
      "boundingBox": "1362,450,76,30", 
      "words": [ 
      { 
       "boundingBox": "1362,450,76,30", 
       "text": "JULA" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1207,486,465,49", 
      "words": [ 
      { 
       "boundingBox": "1207,502,172,33", 
       "text": "Ekslinsan" 
      }, 
      { 
       "boundingBox": "1400,497,51,30", 
       "text": "3B," 
      }, 
      { 
       "boundingBox": "1479,491,95,33", 
       "text": "25467" 
      }, 
      { 
       "boundingBox": "1595,486,77,32", 
       "text": "VALA" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1304,539,265,38", 
      "words": [ 
      { 
       "boundingBox": "1304,539,265,38", 
       "text": "SE5S6944785601" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1245,584,369,44", 
      "words": [ 
      { 
       "boundingBox": "1245,594,148,34", 
       "text": "Telefon:" 
      }, 
      { 
       "boundingBox": "1421,584,193,37", 
       "text": "042-324040" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1012,695,269,35", 
      "words": [ 
      { 
       "boundingBox": "1012,702,75,28", 
       "text": "Kund" 
      }, 
      { 
       "boundingBox": "1109,695,172,33", 
       "text": "072202787" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1109,738,289,38", 
      "words": [ 
      { 
       "boundingBox": "1109,744,133,32", 
       "text": "LILLVIK" 
      }, 
      { 
       "boundingBox": "1265,738,133,32", 
       "text": "ANDREAS" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "1085,845,14,516", 
     "lines": [ 
     { 
      "boundingBox": "1090,845,9,29", 
      "words": [ 
      { 
       "boundingBox": "1090,845,9,29", 
       "text": "1" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1087,1037,9,28", 
      "words": [ 
      { 
       "boundingBox": "1087,1037,9,28", 
       "text": "1" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1086,1133,9,27", 
      "words": [ 
      { 
       "boundingBox": "1086,1133,9,27", 
       "text": "I" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1085,1332,9,29", 
      "words": [ 
      { 
       "boundingBox": "1085,1332,9,29", 
       "text": "1" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "1122,839,454,573", 
     "lines": [ 
     { 
      "boundingBox": "1128,839,173,33", 
      "words": [ 
      { 
       "boundingBox": "1128,843,36,29", 
       "text": "ST" 
      }, 
      { 
       "boundingBox": "1186,839,115,30", 
       "text": "661107" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1127,879,389,41", 
      "words": [ 
      { 
       "boundingBox": "1127,887,232,33", 
       "text": "VERKTYGSLÅDR" 
      }, 
      { 
       "boundingBox": "1382,883,36,28", 
       "text": "JC" 
      }, 
      { 
       "boundingBox": "1441,882,16,26", 
       "text": "5" 
      }, 
      { 
       "boundingBox": "1481,879,35,28", 
       "text": "ÅR" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1126,935,173,34", 
      "words": [ 
      { 
       "boundingBox": "1126,940,36,29", 
       "text": "ST" 
      }, 
      { 
       "boundingBox": "1187,935,112,32", 
       "text": "181460" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1126,967,450,50", 
      "words": [ 
      { 
       "boundingBox": "1126,987,75,30", 
       "text": "BORR" 
      }, 
      { 
       "boundingBox": "1224,977,193,35", 
       "text": "GLAS/KRKEL" 
      }, 
      { 
       "boundingBox": "1440,974,16,27", 
       "text": "ø" 
      }, 
      { 
       "boundingBox": "1482,971,34,27", 
       "text": "10" 
      }, 
      { 
       "boundingBox": "1539,967,37,28", 
       "text": "MM" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1125,1027,173,37", 
      "words": [ 
      { 
       "boundingBox": "1125,1036,36,28", 
       "text": "ST" 
      }, 
      { 
       "boundingBox": "1185,1027,113,34", 
       "text": "181740" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1124,1062,432,49", 
      "words": [ 
      { 
       "boundingBox": "1124,1071,252,40", 
       "text": "UNIVERSALBORR" 
      }, 
      { 
       "boundingBox": "1400,1066,96,32", 
       "text": "8X120" 
      }, 
      { 
       "boundingBox": "1519,1062,37,30", 
       "text": "MM" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1123,1125,175,34", 
      "words": [ 
      { 
       "boundingBox": "1123,1129,36,30", 
       "text": "ST" 
      }, 
      { 
       "boundingBox": "1183,1125,115,32", 
       "text": "181738" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1122,1164,416,44", 
      "words": [ 
      { 
       "boundingBox": "1122,1170,255,38", 
       "text": "UNIVERSRLBORR" 
      }, 
      { 
       "boundingBox": "1501,1164,37,31", 
       "text": "MM" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1123,1225,170,33", 
      "words": [ 
      { 
       "boundingBox": "1123,1228,36,30", 
       "text": "ST" 
      }, 
      { 
       "boundingBox": "1183,1225,110,32", 
       "text": "316401" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1123,1270,355,39", 
      "words": [ 
      { 
       "boundingBox": "1123,1275,216,34", 
       "text": "LÅSCYLINDER" 
      }, 
      { 
       "boundingBox": "1362,1270,116,33", 
       "text": "2-PACK" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1123,1327,177,34", 
      "words": [ 
      { 
       "boundingBox": "1123,1330,37,31", 
       "text": "ST" 
      }, 
      { 
       "boundingBox": "1183,1327,117,32", 
       "text": "396026" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1124,1373,356,39", 
      "words": [ 
      { 
       "boundingBox": "1124,1377,216,35", 
       "text": "LÅSCYLINDER" 
      }, 
      { 
       "boundingBox": "1363,1373,117,33", 
       "text": "2-PRCK" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "1644,820,118,524", 
     "lines": [ 
     { 
      "boundingBox": "1658,820,96,31", 
      "words": [ 
      { 
       "boundingBox": "1658,820,96,31", 
       "text": "79,00" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1659,912,97,31", 
      "words": [ 
      { 
       "boundingBox": "1659,916,50,27", 
       "text": "44," 
      }, 
      { 
       "boundingBox": "1719,912,37,28", 
       "text": "90" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1659,1004,98,32", 
      "words": [ 
      { 
       "boundingBox": "1659,1007,51,29", 
       "text": "69," 
      }, 
      { 
       "boundingBox": "1720,1004,37,28", 
       "text": "90" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1661,1103,97,35", 
      "words": [ 
      { 
       "boundingBox": "1661,1103,97,35", 
       "text": "49,90" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1644,1309,118,35", 
      "words": [ 
      { 
       "boundingBox": "1644,1309,118,35", 
       "text": "299,00" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "1064,1469,620,45", 
     "lines": [ 
     { 
      "boundingBox": "1064,1469,620,45", 
      "words": [ 
      { 
       "boundingBox": "1064,1481,237,33", 
       "text": "-Rabattcheck" 
      }, 
      { 
       "boundingBox": "1324,1486,51,24", 
       "text": "nr:" 
      }, 
      { 
       "boundingBox": "1384,1469,300,38", 
       "text": "935011035567095" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "1123,1584,159,82", 
     "lines": [ 
     { 
      "boundingBox": "1123,1584,159,33", 
      "words": [ 
      { 
       "boundingBox": "1123,1584,159,33", 
       "text": "DELSUMMA" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1143,1635,116,31", 
      "words": [ 
      { 
       "boundingBox": "1143,1635,116,31", 
       "text": "Rabatt" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "1609,1570,180,189", 
     "lines": [ 
     { 
      "boundingBox": "1609,1570,160,36", 
      "words": [ 
      { 
       "boundingBox": "1609,1575,11,31", 
       "text": "|" 
      }, 
      { 
       "boundingBox": "1648,1570,121,34", 
       "text": "041,70" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1690,1621,99,34", 
      "words": [ 
      { 
       "boundingBox": "1690,1621,99,34", 
       "text": "50,00" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1651,1725,120,34", 
      "words": [ 
      { 
       "boundingBox": "1651,1727,53,32", 
       "text": "991" 
      }, 
      { 
       "boundingBox": "1715,1746,9,13", 
       "text": "," 
      }, 
      { 
       "boundingBox": "1732,1725,39,32", 
       "text": "70" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "992,1737,310,1226", 
     "lines": [ 
     { 
      "boundingBox": "1123,1737,179,35", 
      "words": [ 
      { 
       "boundingBox": "1123,1737,179,35", 
       "text": "SLUTSUMMA" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1036,2756,227,35", 
      "words": [ 
      { 
       "boundingBox": "1036,2756,227,35", 
       "text": "Totalbelopp" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1140,2811,124,37", 
      "words": [ 
      { 
       "boundingBox": "1140,2811,53,35", 
       "text": "991" 
      }, 
      { 
       "boundingBox": "1207,2833,8,15", 
       "text": "/" 
      }, 
      { 
       "boundingBox": "1225,2811,39,34", 
       "text": "70" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "992,2927,271,36", 
      "words": [ 
      { 
       "boundingBox": "992,2928,159,35", 
       "text": "Säljare:" 
      }, 
      { 
       "boundingBox": "1182,2927,81,33", 
       "text": "7688" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "1330,2754,145,92", 
     "lines": [ 
     { 
      "boundingBox": "1330,2754,144,34", 
      "words": [ 
      { 
       "boundingBox": "1330,2754,39,33", 
       "text": "Ex" 
      }, 
      { 
       "boundingBox": "1394,2754,80,34", 
       "text": "Moms" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1352,2809,123,37", 
      "words": [ 
      { 
       "boundingBox": "1352,2809,123,37", 
       "text": "793,36" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "1563,2752,126,92", 
     "lines": [ 
     { 
      "boundingBox": "1563,2752,125,33", 
      "words": [ 
      { 
       "boundingBox": "1563,2752,82,33", 
       "text": "Moms" 
      }, 
      { 
       "boundingBox": "1670,2755,18,27", 
       "text": "%" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1586,2808,103,36", 
      "words": [ 
      { 
       "boundingBox": "1586,2808,103,36", 
       "text": "25,00" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "1780,2751,123,93", 
     "lines": [ 
     { 
      "boundingBox": "1820,2751,83,33", 
      "words": [ 
      { 
       "boundingBox": "1820,2751,83,33", 
       "text": "Moms" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1780,2807,123,37", 
      "words": [ 
      { 
       "boundingBox": "1780,2807,123,37", 
       "text": "198,34" 
      } 
      ] 
     } 
     ] 
    }, 
    { 
     "boundingBox": "985,2924,966,573", 
     "lines": [ 
     { 
      "boundingBox": "1523,2924,83,33", 
      "words": [ 
      { 
       "boundingBox": "1523,2924,83,33", 
       "text": "7618" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1288,2926,167,33", 
      "words": [ 
      { 
       "boundingBox": "1288,2939,17,7", 
       "text": "-" 
      }, 
      { 
       "boundingBox": "1330,2926,125,33", 
       "text": "Sabina" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1182,2981,468,36", 
      "words": [ 
      { 
       "boundingBox": "1182,2983,38,34", 
       "text": "24" 
      }, 
      { 
       "boundingBox": "1245,2982,146,34", 
       "text": "oktober" 
      }, 
      { 
       "boundingBox": "1416,2982,82,34", 
       "text": "2016" 
      }, 
      { 
       "boundingBox": "1547,2982,10,33", 
       "text": "1" 
      }, 
      { 
       "boundingBox": "1571,2981,79,34", 
       "text": "7:20" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "991,2985,103,33", 
      "words": [ 
      { 
       "boundingBox": "991,2985,103,33", 
       "text": "Datum" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1161,3040,403,34", 
      "words": [ 
      { 
       "boundingBox": "1161,3040,96,34", 
       "text": "44601" 
      }, 
      { 
       "boundingBox": "1288,3040,140,34", 
       "text": "Kvitto:" 
      }, 
      { 
       "boundingBox": "1460,3040,104,34", 
       "text": "51756" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "990,3042,103,33", 
      "words": [ 
      { 
       "boundingBox": "990,3042,103,33", 
       "text": "Kassa" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1096,3157,728,40", 
      "words": [ 
      { 
       "boundingBox": "1096,3159,105,38", 
       "text": "Spara" 
      }, 
      { 
       "boundingBox": "1225,3157,163,39", 
       "text": "kvittot," 
      }, 
      { 
       "boundingBox": "1418,3157,127,39", 
       "text": "gäller" 
      }, 
      { 
       "boundingBox": "1570,3169,63,26", 
       "text": "som" 
      }, 
      { 
       "boundingBox": "1657,3158,167,39", 
       "text": "garanti." 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1268,3217,388,39", 
      "words": [ 
      { 
       "boundingBox": "1268,3217,103,39", 
       "text": "Öppet" 
      }, 
      { 
       "boundingBox": "1397,3218,62,38", 
       "text": "köp" 
      }, 
      { 
       "boundingBox": "1484,3218,41,37", 
       "text": "30" 
      }, 
      { 
       "boundingBox": "1550,3218,106,38", 
       "text": "dager" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1290,3276,317,39", 
      "words": [ 
      { 
       "boundingBox": "1290,3276,192,38", 
       "text": "VÄLKOMMEN" 
      }, 
      { 
       "boundingBox": "1506,3278,101,37", 
       "text": "ÅTER!" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1116,3335,719,42", 
      "words": [ 
      { 
       "boundingBox": "1116,3337,41,36", 
       "text": "Om" 
      }, 
      { 
       "boundingBox": "1182,3335,82,38", 
       "text": "ditt" 
      }, 
      { 
       "boundingBox": "1290,3346,84,28", 
       "text": "namn" 
      }, 
      { 
       "boundingBox": "1398,3337,63,38", 
       "text": "och" 
      }, 
      { 
       "boundingBox": "1485,3349,261,28", 
       "text": "personnummer" 
      }, 
      { 
       "boundingBox": "1771,3338,64,37", 
       "text": "har" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "1032,3395,894,42", 
      "words": [ 
      { 
       "boundingBox": "1032,3397,146,36", 
       "text": "lämnats" 
      }, 
      { 
       "boundingBox": "1204,3395,62,38", 
       "text": "för" 
      }, 
      { 
       "boundingBox": "1290,3395,61,38", 
       "text": "att" 
      }, 
      { 
       "boundingBox": "1377,3399,194,36", 
       "text": "genomföra" 
      }, 
      { 
       "boundingBox": "1596,3399,61,36", 
       "text": "ett" 
      }, 
      { 
       "boundingBox": "1685,3399,241,38", 
       "text": "JulaPro-köp" 
      } 
      ] 
     }, 
     { 
      "boundingBox": "985,3455,966,42", 
      "words": [ 
      { 
       "boundingBox": "985,3456,193,37", 
       "text": "behandlar" 
      }, 
      { 
       "boundingBox": "1203,3455,85,37", 
       "text": "Jula" 
      }, 
      { 
       "boundingBox": "1312,3456,84,37", 
       "text": "dina" 
      }, 
      { 
       "boundingBox": "1421,3458,195,39", 
       "text": "uppgifter" 
      }, 
      { 
       "boundingBox": "1645,3462,12,33", 
       "text": "i" 
      }, 
      { 
       "boundingBox": "1686,3458,173,38", 
       "text": "enlighet" 
      }, 
      { 
       "boundingBox": "1886,3461,65,36", 
       "text": "med" 
      } 
      ] 
     } 
     ] 
    } 
    ] 
} 
+0

你找到解決方案? – RAVI

+0

我們提取所有將它們放在一個列表中的單詞。稍後我們將使用Azure機器學習來確定屬於哪個屬性以及它屬於哪種屬性。但現在我們只是使用單獨的單詞的邊框來創建可點擊的區域,用戶需要爲不同的屬性選擇正確的框。 – Lillvik

回答

2

這不是計算機視覺的問題,它的NLP /文本模式識別問題。換句話說,沒有一個OCR會做你想做的事情;他們只從圖像中提取文本。

通常的做法是收集許多不同類型的收據,研究它們的結構,然後使用基於規則的方法或基於機器學習的方法對每條信息進行分類。分類器可以具有{ItemName,ItemPrice,Subtotal,Total,Heading,Other}等類別。您可以使用邊界框來形成網格單元格,並將相鄰單元格用作特徵。這是一個重要的理解,需要良好的ML技能才能產生高精度的輸出。

See here for tutorial

而且看一看一些開源項目:

+0

我們計劃將微軟計算機視覺與Azure機器學習一起使用。如果Azure ML中有任何模塊可以完成此任務嗎? – Lillvik