首先引入Maven依赖,如下
org.apache.poi poi 3.13 org.apache.poi poi-scratchpad 3.13 org.apache.poi openxml4j 1.0-beta org.apache.poi poi-ooxml 3.13 dom4j dom4j 1.6.1 org.apache.geronimo.specs geronimo-stax-api_1.0_spec 1.0 org.apache.poi ooxml-schemas 1.1 org.apache.xmlbeans xmlbeans 2.3.0
下面编写Java工具类,代码如下
import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStream;import java.net.URL;import org.apache.poi.POIXMLTextExtractor;import org.apache.poi.hwpf.extractor.WordExtractor;import org.apache.poi.xwpf.extractor.XWPFWordExtractor;import org.apache.poi.xwpf.usermodel.XWPFDocument;public class WordReader { public synchronized static String read(String url){ if (url.endsWith("doc") || url.endsWith("DOC")) { return readWord2003(url); }else if (url.endsWith("docx") || url.endsWith("DOCX")) { return readWord2007(url); }else { return ""; } } private static String readWord2007(String url) { POIXMLTextExtractor ex = null; XWPFDocument xwpf = null; InputStream is = null; try { is = new URL(url).openStream(); xwpf = new XWPFDocument(is); ex = new XWPFWordExtractor(xwpf); return ex.getText(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally{ try { ex.close(); } catch (IOException e) { e.printStackTrace(); } try { xwpf.close(); } catch (IOException e) { e.printStackTrace(); } try { is.close(); } catch (IOException e) { e.printStackTrace(); } } return null; } private static String readWord2003(String url) { WordExtractor wordExtractor = null; InputStream fis = null; try { fis = new URL(url).openStream(); wordExtractor = new WordExtractor(fis); String content = wordExtractor.getText(); return content; } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally{ try { wordExtractor.close(); } catch (IOException e) { e.printStackTrace(); } try { fis.close(); } catch (IOException e) { e.printStackTrace(); } } return null; } public static void main(String[] args) { System.out.println(read("http://file.neeq.com.cn/upload/A0/B0/C2/F236.doc")); System.out.println(read("http://file.neeq.com.cn/upload/A0/B0/C2/F260.docx")); }}