Skip to content

Commit f6c7409

Browse files
committed
HIVE-29413: Avoid code duplication by updating getPartCols method for iceberg tables
1 parent 51e4ab0 commit f6c7409

17 files changed

+65
-42
lines changed

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,7 @@ private List<FieldSchema> getColumnsByPattern() throws HiveException {
6666

6767
private List<FieldSchema> getCols() throws HiveException {
6868
Table table = context.getDb().getTable(desc.getTableName());
69-
List<FieldSchema> allColumns = new ArrayList<>();
70-
allColumns.addAll(table.getCols());
71-
allColumns.addAll(table.getPartCols());
72-
return allColumns;
69+
return new ArrayList<>(table.getAllCols());
7370
}
7471

7572
private Matcher getMatcher() {

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ private Deserializer getDeserializer(Table table) throws SQLException {
130130
private void getColumnsNoColumnPath(Table table, Partition partition, List<FieldSchema> cols) throws HiveException {
131131
cols.addAll(partition == null || table.getTableType() == TableType.VIRTUAL_VIEW ?
132132
table.getCols() : partition.getCols());
133-
if (!desc.isFormatted()) {
133+
if (!desc.isFormatted() && !table.hasNonNativePartitionSupport()) {
134134
cols.addAll(table.getPartCols());
135135
}
136136

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,7 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column
174174
List<FieldSchema> partitionColumns = null;
175175
// TODO (HIVE-29413): Refactor to a generic getPartCols() implementation
176176
if (table.isPartitioned()) {
177-
partitionColumns = table.hasNonNativePartitionSupport() ?
178-
table.getStorageHandler().getPartitionKeys(table) :
179-
table.getPartCols();
177+
partitionColumns = table.getPartCols();
180178
}
181179
if (CollectionUtils.isNotEmpty(partitionColumns) &&
182180
conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) {

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import java.util.List;
2424
import java.util.Map;
2525
import java.util.Set;
26-
import java.util.Map.Entry;
2726

2827
import org.apache.hadoop.hive.conf.HiveConf;
2928
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;

ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,7 @@ public List<String> getValues() {
9191
values = new ArrayList<>();
9292

9393
// TODO (HIVE-29413): Refactor to a generic getPartCols() implementation
94-
for (FieldSchema fs : table.hasNonNativePartitionSupport()
95-
? table.getStorageHandler().getPartitionKeys(table)
96-
: table.getPartCols()) {
94+
for (FieldSchema fs : table.getPartCols()) {
9795
String val = partSpec.get(fs.getName());
9896
values.add(val);
9997
}

ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ protected void initialize(Table table,
173173
// set default if location is not set and this is a physical
174174
// table partition (not a view partition)
175175
if (table.getDataLocation() != null) {
176-
Path partPath = new Path(table.getDataLocation(), Warehouse.makePartName(table.getPartCols(), tPartition.getValues()));
176+
Path partPath = new Path(table.getDataLocation(),
177+
Warehouse.makePartName(table.getPartCols(), tPartition.getValues()));
177178
tPartition.getSd().setLocation(partPath.toString());
178179
}
179180
}

ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
5858
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
5959
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
60+
import org.apache.hadoop.hive.metastore.HiveMetaHook;
6061
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
6162
import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
6263
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
@@ -118,6 +119,8 @@ public class Table implements Serializable {
118119
private transient StorageHandlerInfo storageHandlerInfo;
119120
private transient MaterializedViewMetadata materializedViewMetadata;
120121

122+
private List<FieldSchema> cachedPartCols;
123+
121124
private TableSpec tableSpec;
122125

123126
private boolean materializedTable;
@@ -193,6 +196,9 @@ public Table makeCopy() {
193196

194197
newTab.setMetaTable(this.getMetaTable());
195198
newTab.setSnapshotRef(this.getSnapshotRef());
199+
if (this.cachedPartCols != null) {
200+
newTab.cachedPartCols = new ArrayList<>(this.cachedPartCols);
201+
}
196202
return newTab;
197203
}
198204

@@ -214,6 +220,7 @@ public org.apache.hadoop.hive.metastore.api.Table getTTable() {
214220
*/
215221
public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) {
216222
this.tTable = tTable;
223+
clearCachedPartCols();
217224
}
218225

219226
/**
@@ -248,7 +255,7 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) {
248255
org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table();
249256
{
250257
t.setSd(sd);
251-
t.setPartitionKeys(new ArrayList<FieldSchema>());
258+
t.setPartitionKeys(null);
252259
t.setParameters(new HashMap<String, String>());
253260
t.setTableType(TableType.MANAGED_TABLE.toString());
254261
t.setDbName(databaseName);
@@ -595,24 +602,43 @@ public boolean equals(Object obj) {
595602
}
596603

597604
public List<FieldSchema> getPartCols() {
598-
List<FieldSchema> partKeys = tTable.getPartitionKeys();
599-
if (partKeys == null) {
600-
partKeys = new ArrayList<>();
601-
tTable.setPartitionKeys(partKeys);
605+
if (cachedPartCols != null) {
606+
return cachedPartCols;
607+
}
608+
List<FieldSchema> partKeys;
609+
if (isTableTypeSet() && hasNonNativePartitionSupport()) {
610+
partKeys = getStorageHandler().getPartitionKeys(this);
611+
} else {
612+
partKeys = tTable.getPartitionKeys();
613+
if (partKeys == null) {
614+
partKeys = new ArrayList<>();
615+
tTable.setPartitionKeys(partKeys);
616+
}
602617
}
618+
cachedPartCols = partKeys;
603619
return partKeys;
604620
}
605621

622+
private void clearCachedPartCols() {
623+
cachedPartCols = null;
624+
}
625+
626+
private boolean isTableTypeSet() {
627+
if (tTable.getParameters() == null) {
628+
return false;
629+
}
630+
String tableType = tTable.getParameters().get(HiveMetaHook.TABLE_TYPE);
631+
return tableType != null;
632+
}
633+
606634
public FieldSchema getPartColByName(String colName) {
607635
return getPartCols().stream()
608636
.filter(key -> key.getName().toLowerCase().equals(colName))
609637
.findFirst().orElse(null);
610638
}
611639

612640
public List<String> getPartColNames() {
613-
List<FieldSchema> partCols = hasNonNativePartitionSupport() ?
614-
getStorageHandler().getPartitionKeys(this) : getPartCols();
615-
return partCols.stream().map(FieldSchema::getName)
641+
return getPartCols().stream().map(FieldSchema::getName)
616642
.collect(Collectors.toList());
617643
}
618644

@@ -761,14 +787,22 @@ private List<FieldSchema> getColsInternal(boolean forMs) {
761787
* @return List&lt;FieldSchema&gt;
762788
*/
763789
public List<FieldSchema> getAllCols() {
764-
ArrayList<FieldSchema> f_list = new ArrayList<FieldSchema>();
765-
f_list.addAll(getCols());
766-
f_list.addAll(getPartCols());
767-
return f_list;
790+
List<FieldSchema> allCols = new ArrayList<>(getCols());
791+
Set<String> colNames = new HashSet<>();
792+
for (FieldSchema col : allCols) {
793+
colNames.add(col.getName());
794+
}
795+
for (FieldSchema col : getPartCols()) {
796+
if (!colNames.contains(col.getName())) {
797+
allCols.add(col);
798+
}
799+
}
800+
return allCols;
768801
}
769802

770803
public void setPartCols(List<FieldSchema> partCols) {
771804
tTable.setPartitionKeys(partCols);
805+
clearCachedPartCols();
772806
}
773807

774808
public String getCatName() {
@@ -812,7 +846,7 @@ public void setOutputFormatClass(String name) throws HiveException {
812846
}
813847

814848
public boolean isPartitioned() {
815-
return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) :
849+
return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) :
816850
CollectionUtils.isNotEmpty(getPartCols());
817851
}
818852

@@ -1153,7 +1187,7 @@ public static void validateColumns(List<FieldSchema> columns, List<FieldSchema>
11531187
}
11541188
colNames.add(colName);
11551189
}
1156-
if (partCols != null) {
1190+
if (partCols != null && !icebergTable) {
11571191
// there is no overlap between columns and partitioning columns
11581192
for (FieldSchema partCol: partCols) {
11591193
String colName = normalize(partCol.getName());

ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -807,8 +807,7 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
807807
for (FieldNode col : cols) {
808808
int index = originalOutputColumnNames.indexOf(col.getFieldName());
809809
Table tab = cppCtx.getParseContext().getViewProjectToTableSchema().get(op);
810-
List<FieldSchema> fullFieldList = new ArrayList<FieldSchema>(tab.getCols());
811-
fullFieldList.addAll(tab.getPartCols());
810+
List<FieldSchema> fullFieldList = new ArrayList<>(tab.getAllCols());
812811
cppCtx.getParseContext().getColumnAccessInfo()
813812
.add(tab.getCompleteName(), fullFieldList.get(index).getName());
814813
}

ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ private void analyzeAcidExport(ASTNode ast, Table exportTable, ASTNode tokRefOrN
175175
//now generate insert statement
176176
//insert into newTableName select * from ts <where partition spec>
177177
StringBuilder rewrittenQueryStr = generateExportQuery(
178-
newTable.getPartCols(), tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName);
178+
newTable.getPartCols(),
179+
tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName);
179180
ReparseResult rr = ParseUtils.parseRewrittenQuery(ctx, rewrittenQueryStr);
180181
Context rewrittenCtx = rr.rewrittenCtx;
181182
rewrittenCtx.setIsUpdateDeleteMerge(false); //it's set in parseRewrittenQuery()

ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ public ColumnStatsAutoGatherContext(SemanticAnalyzer sa, HiveConf conf,
8383
this.isInsertInto = isInsertInto;
8484
this.origCtx = ctx;
8585
columns = tbl.getCols();
86-
partitionColumns = tbl.getPartCols();
86+
// current behaviour intact until we have getCols() giving only non-partition columns for non native tables as well
87+
partitionColumns = tbl.hasNonNativePartitionSupport() ? new ArrayList<>() : tbl.getPartCols();
8788
}
8889

8990
public List<LoadFileDesc> getLoadFileWork() {

0 commit comments

Comments
 (0)