import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.logging.Level; import java.util.logging.Logger; /* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ /** * * @author vincz */ public class AnnotWrite { public static void main(String[] args) { //name of the folder where the annotation and the txt files are located final File folderHL = new File("d:/Dokumentumok/divany_UJ"); //final File folderHL = new File("d:\\kutcsopgep\\propaganda\\datasets\\test-articles\\"); listFilesForFolder(folderHL); } //listing the files public static void listFilesForFolder(final File folder) { for (final File fileEntry : folder.listFiles()) { if (fileEntry.isDirectory()) { listFilesForFolder(fileEntry); } else { if (fileEntry.getName().endsWith("txt")) { writeAnnot(fileEntry); } } } } //writing the annotations private static void writeAnnot(File fileEntry) { String line = null; try { BufferedReader bufferedReader = new BufferedReader(new InputStreamReader( new FileInputStream(fileEntry + ".annotation"), "UTF-8")); // new FileInputStream("d:\\kutcsopgep\\propaganda\\datasets\\test-articles\\test-task-TC-template.out"), "UTF-8")); // new FileInputStream(fileEntry.toString().replace("txt", "") + "task2-TC.labels"), "UTF-8")); while ((line = bufferedReader.readLine()) != null) { String[] piece = line.split("\t"); //textAnnotator - piece should be 3, start 1 stop 2 if (piece.length != 3) { continue; } System.out.println(fileEntry); int start = Integer.parseInt(piece[1]); int stop = Integer.parseInt(piece[2]); String doc = readFileToString(fileEntry, "UTF-8"); // if (fileEntry.toString().contains(piece[0])) { System.err.println(fileEntry + "\t" + start + "\t" + stop + "\t" + piece[0] + "\t" + doc.substring(start, stop)); //} } bufferedReader.close(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //reading the txt files public static String readFileToString(File fileEntry, String cEncoding) { StringBuilder b = new StringBuilder(); try { BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileEntry), cEncoding)); String line; while ((line = reader.readLine()) != null) { b.append(line).append("\n"); } reader.close(); } catch (IOException e) { System.err.println("Problem with file: " + fileEntry); return new String(); } return b.toString(); } }