001/** 002 * Copyright (c) 2007-2008, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024/* 025 * This file was copied from the Apache UIMA examples source code base from 026 * org.apache.uima.examples.cpe.XCasWriterCasConsumer and modified. The apache 027 * licence that applies to the original work is provided in the next comment. 028 */ 029 030/* 031 * Licensed to the Apache Software Foundation (ASF) under one 032 * or more contributor license agreements. See the NOTICE file 033 * distributed with this work for additional information 034 * regarding copyright ownership. The ASF licenses this file 035 * to you under the Apache License, Version 2.0 (the 036 * "License"); you may not use this file except in compliance 037 * with the License. You may obtain a copy of the License at 038 * 039 * http://www.apache.org/licenses/LICENSE-2.0 040 * 041 * Unless required by applicable law or agreed to in writing, 042 * software distributed under the License is distributed on an 043 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 044 * KIND, either express or implied. See the License for the 045 * specific language governing permissions and limitations 046 * under the License. 047 */ 048 049package org.cleartk.util.ae; 050 051import java.io.File; 052import java.io.IOException; 053 054import org.apache.uima.UimaContext; 055import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 056import org.apache.uima.jcas.JCas; 057import org.apache.uima.resource.ResourceInitializationException; 058import org.apache.uima.util.FileUtils; 059import org.cleartk.util.ViewUriUtil; 060import org.apache.uima.fit.component.JCasAnnotator_ImplBase; 061import org.apache.uima.fit.descriptor.ConfigurationParameter; 062 063/** 064 * <br> 065 * Copyright (c) 2007-2008, Regents of the University of Colorado <br> 066 * All rights reserved. 067 * 068 * 069 * 070 * A simple CAS consumer that creates plain text files from the document text given to each CAS 071 * 072 * @author Philip Ogren 073 */ 074 075public class PlainTextWriter extends JCasAnnotator_ImplBase { 076 077 public static final String PARAM_OUTPUT_DIRECTORY_NAME = "outputDirectoryName"; 078 079 @ConfigurationParameter( 080 name = PARAM_OUTPUT_DIRECTORY_NAME, 081 mandatory = true, 082 description = "takes a path to directory into which output files will be written.") 083 private String outputDirectoryName; 084 085 private File outputDirectory; 086 087 @Override 088 public void initialize(UimaContext context) throws ResourceInitializationException { 089 super.initialize(context); 090 091 this.outputDirectory = new File(outputDirectoryName); 092 if (!this.outputDirectory.exists()) { 093 this.outputDirectory.mkdirs(); 094 } 095 } 096 097 @Override 098 public void process(JCas jCas) throws AnalysisEngineProcessException { 099 String id = new File(ViewUriUtil.getURI(jCas)).getName(); 100 File outFile = new File(this.outputDirectory, id + ".txt"); 101 try { 102 FileUtils.saveString2File(jCas.getDocumentText(), outFile); 103 } catch (IOException e) { 104 throw new AnalysisEngineProcessException(e); 105 } 106 } 107 108 public void setOutputDirectoryName(String outputDirectoryName) { 109 this.outputDirectoryName = outputDirectoryName; 110 } 111 112}