public class TableIndexer
extends java.lang.Object
Constructor and Description |
---|
TableIndexer() |
Modifier and Type | Method and Description |
---|---|
void |
closeIndexes() |
java.lang.Integer |
getKeyColumnIndex() |
static java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> |
makeDistinctValuesMap(de.uni_mannheim.informatik.dws.winter.webtables.Table table)
makeDistinctValuesMap
This method creates a hashmap containing the distinct values of the table's columns
the input is a Table, the output is a hashmap with the following structure:
{
column1: {first_value_of_column1: occurence_count, second_value_of_column1: occurence_count,..},
column2: {first_value_of_column2: occurence_count, second_value_of_column2: occurence_count,..},
:
}
|
void |
setColumnNameIndexWriter(org.apache.lucene.index.IndexWriter columnNameIndexWriter) |
void |
setKeyColumnIndex(java.lang.Integer keyColumnIndex) |
void |
setKeyColumnIndexWriter(org.apache.lucene.index.IndexWriter keyColumnIndexWriter) |
void |
setTableIndexWriter(org.apache.lucene.index.IndexWriter tableIndexWriter) |
boolean |
writeTableToColumnNameIndex(de.uni_mannheim.informatik.dws.winter.webtables.Table table,
java.io.File dataFile,
java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> distinctTableValues)
writeTableToColumnNameIndex
This method saves the names of the table's columns to the ColumnNameIndex
It does the following steps:
1. loop through every column in the table:
1.1.
|
boolean |
writeTableToIndexes(java.io.File dataFile)
writeTableToIndexes
This method saves information about the csv-dataFile to the following Indexes: KeyColumnIndex, ColumnNameIndex, TableIndex
This method does following steps:
1. read the csv-table into a table-object
2. determine the key-column of the table
3. if a key column was detected:
3.1. create a hashmap with the distinct values of every column (this will be needed for the TableIndex and the KeyColumnIndex)
3.2. call the indexing-methods for the individual Indexes
|
boolean |
writeTableToKeyColumnIndex(de.uni_mannheim.informatik.dws.winter.webtables.Table table,
java.lang.String tablename,
int keyColumnIndex)
writeTableToKeyColumnIndex
This method saves the key column of the table to the KeyColumnIndex
It does the following steps:
1. make the keyColumnString (=the concatenated values of the table's keyColumn (seperated by " "))
2. save a document with the following values to the keyColumnIndex: tableHeader, columnHeader, keyColumnString, keyColumnIndex
|
static boolean |
writeTableToTableIndex_old(de.uni_mannheim.informatik.dws.winter.webtables.Table table,
java.io.File dataFile,
java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> distinctTableValues,
org.apache.lucene.index.IndexWriter tableIndexWriter) |
boolean |
writeTableToTableIndex(de.uni_mannheim.informatik.dws.winter.webtables.Table table,
java.io.File dataFile,
java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> distinctTableValues)
writeTableToTableIndex
This method saves the distinct values of every of the table's columns to the TableIndex
It does the following steps:
1. loop through all the columns in the table
1.1. loop through all the distinct values in this column
1.1.1. save a document with following values to the TableIndex: id, tableHeader, columnHeader, columnDataType, tableCardinality, columnDistinctValues, valueMultiplicity, value, fullTablePath, isPrimaryKey, originalValue
|
public void setKeyColumnIndexWriter(org.apache.lucene.index.IndexWriter keyColumnIndexWriter)
public void setColumnNameIndexWriter(org.apache.lucene.index.IndexWriter columnNameIndexWriter)
public void setTableIndexWriter(org.apache.lucene.index.IndexWriter tableIndexWriter)
public void setKeyColumnIndex(java.lang.Integer keyColumnIndex)
public java.lang.Integer getKeyColumnIndex()
public void closeIndexes()
public boolean writeTableToIndexes(java.io.File dataFile) throws java.io.IOException
dataFile
- keyColumnIndexWriter
- columnNameIndexWriter
- tableIndexWriter
- java.io.IOException
public static java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> makeDistinctValuesMap(de.uni_mannheim.informatik.dws.winter.webtables.Table table)
table
- public boolean writeTableToKeyColumnIndex(de.uni_mannheim.informatik.dws.winter.webtables.Table table, java.lang.String tablename, int keyColumnIndex)
table
- tablename
- keyColumnIndexWriter
- keyColumnIndex
- public boolean writeTableToColumnNameIndex(de.uni_mannheim.informatik.dws.winter.webtables.Table table, java.io.File dataFile, java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> distinctTableValues)
table
- dataFile
- distinctTableValues
- columnNameIndexWriter
- public boolean writeTableToTableIndex(de.uni_mannheim.informatik.dws.winter.webtables.Table table, java.io.File dataFile, java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> distinctTableValues)
table
- dataFile
- distinctTableValues
- tableIndexWriter
- public static boolean writeTableToTableIndex_old(de.uni_mannheim.informatik.dws.winter.webtables.Table table, java.io.File dataFile, java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> distinctTableValues, org.apache.lucene.index.IndexWriter tableIndexWriter)