我在一個程序,提取一個PDF文件文本在一個特定的區域工作,我使用Java和的iText庫。 現在,我可以通過使用此代碼錄音面積配位提取數據:Java:如何使用iText從PDF文件中選擇區域提取文本?
import java.io.IOException;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.FilteredTextRenderListener;
import com.itextpdf.text.pdf.parser.LocationTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import com.itextpdf.text.pdf.parser.RegionTextRenderFilter;
import com.itextpdf.text.pdf.parser.RenderFilter;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
/**
* Créer par Malek Boubakri le 03/06/2015 à 15:45.
*/
public class ExtractPageContentArea {
//
public void parsePdf(float x,float y,float width,float height,String pdf) throws IOException {
PdfReader reader = new PdfReader(pdf);
Rectangle rect = new Rectangle(x, y, width, height);
RenderFilter filter = new RegionTextRenderFilter(rect);
TextExtractionStrategy strategy;
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
System.out.println(PdfTextExtractor.getTextFromPage(reader, i, strategy));
}
reader.close();
}
}
這個代碼可以繪製矩形和使用保存需要配位:
import java.awt.BorderLayout;
import java.awt.Graphics;
import java.awt.Rectangle;
import java.awt.event.MouseEvent;
import java.awt.event.MouseListener;
import java.awt.event.MouseMotionListener;
import java.util.ArrayList;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.SwingConstants;
public class MouseTracker extends JFrame implements MouseListener, MouseMotionListener {
private static final long serialVersionUID = 1L;
private final JLabel mousePosition;
int x1, x2, y1, y2;
int w, h;
private final JLabel recStart;
private final JLabel recStop;
private final JLabel cords; // set up GUI and register mouse event handlers
private final ArrayList<Rectangle> rectangles = new ArrayList<Rectangle>();
private boolean isNewRect = true;
public MouseTracker() {
super("Rectangle Drawer");
this.mousePosition = new JLabel();
this.mousePosition.setHorizontalAlignment(SwingConstants.CENTER);
getContentPane().add(this.mousePosition, BorderLayout.CENTER);
JLabel text1 = new JLabel();
text1.setText("At the center the mouse pointer's coordinates will be displayed.");
getContentPane().add(text1, BorderLayout.SOUTH);
this.recStart = new JLabel();
getContentPane().add(this.recStart, BorderLayout.WEST);
this.recStop = new JLabel();
getContentPane().add(this.recStop, BorderLayout.EAST);
this.cords = new JLabel();
getContentPane().add(this.cords, BorderLayout.NORTH);
addMouseListener(this); // listens for own mouse and
addMouseMotionListener(this); // mouse-motion events
setSize(800, 600);
setVisible(true);
}
// MouseListener event handlers // handle event when mouse released immediately after press
public void mouseClicked(final MouseEvent event) {
this.mousePosition.setText("Clicked at [" + event.getX() + ", " + event.getY() + "]");
repaint();
}
// handle event when mouse pressed
public void mousePressed(final MouseEvent event) {
this.mousePosition.setText("Pressed at [" + (this.x1 = event.getX()) + ", " + (this.y1 = event.getY()) + "]");
this.recStart.setText("Start: [" + this.x1 + ", " + this.y1 + "]");
repaint();
}
// handle event when mouse released after dragging
public void mouseReleased(final MouseEvent event) {
this.mousePosition.setText("Released at [" + (this.x2 = event.getX()) + ", " + (this.y2 = event.getY()) + "]");
this.recStop.setText("End: [" + this.x2 + ", " + this.y2 + "]");
Rectangle rectangle = getRectangleFromPoints();
this.rectangles.add(rectangle);
this.w = this.h = this.x1 = this.y1 = this.x2 = this.y2 = 0;
this.isNewRect = true;
repaint();
}
private Rectangle getRectangleFromPoints() {
int width = this.x1 - this.x2;
int height = this.y1 - this.y2;
Rectangle rectangle = new Rectangle(width < 0 ? this.x1
: this.x2, height < 0 ? this.y1
: this.y2, Math.abs(width), Math.abs(height));
return rectangle;
}
// handle event when mouse enters area
public void mouseEntered(final MouseEvent event) {
this.mousePosition.setText("Mouse entered at [" + event.getX() + ", " + event.getY() + "]");
repaint();
}
// handle event when mouse exits area
public void mouseExited(final MouseEvent event) {
this.mousePosition.setText("Mouse outside window");
repaint();
}
// MouseMotionListener event handlers // handle event when user drags mouse with button pressed
public void mouseDragged(final MouseEvent event) {
this.mousePosition.setText("Dragged at [" + (this.x2 = event.getX()) + ", " + (this.y2 = event.getY()) + "]"); // call repaint which calls paint repaint();
this.isNewRect = false;
repaint();
}
// handle event when user moves mouse
public void mouseMoved(final MouseEvent event) {
this.mousePosition.setText("Moved at [" + event.getX() + ", " + event.getY() + "]");
repaint();
}
@Override
public void paint(final Graphics g) {
super.paint(g); // clear the frame surface
g.drawString("Start Rec Here", this.x1, this.y1);
g.drawString("End Rec Here", this.x2, this.y2);
Rectangle newRectangle = getRectangleFromPoints();
if (!this.isNewRect) {
g.drawRect(newRectangle.x, newRectangle.y, newRectangle.width, newRectangle.height);
}
for(Rectangle rectangle : this.rectangles) {
g.drawRect(rectangle.x, rectangle.y, rectangle.width, rectangle.height);
}
this.cords.setText("w = " + this.w + ", h = " + this.h);
}
public static void main(final String args[]) {
MouseTracker application = new MouseTracker();
application.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
}
}
我想要使用這些協調來指定PDF文件中的區域,我真的不知道如何合併這兩個功能,如何將繪圖空間放在文檔上方nt以及如何使用文本協調配合矩形協調。
如何得出上述另一面板?
我應該將PDF轉換爲圖像並將其放在後面去做?
如果我應該請任何人都可以提出一個好的和免費的OCR庫!
如果有什麼模糊只是評論! 任何人都可以把我放在路上!因爲我真的迷路了。
等待你help..and感謝(對不起我的英文不好)
感謝名單,@Freek德Bruijin我的工作就可以了,現在..我希望它的作品畢竟:(否則我有另一個想法!plzz圍繞:) –
多次嘗試後,我confermed的「玻璃面板「不是我的應用程序的正確解決方案。因爲它不能超過特定的區域或組件。它只能用於根目錄。 我現在正在處理另一個解決方案,並且everthing正確。感謝名單再次@Freek德布魯因 –