Skip to content

Commit d97e174

Browse files
committed
HIVE-29413: Avoid code duplication by updating getPartCols method for iceberg tables
1 parent 51e4ab0 commit d97e174

19 files changed

+96
-66
lines changed

iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,6 @@ POSTHOOK: type: DESCTABLE
177177
POSTHOOK: Input: default@tbl_orc
178178
# col_name data_type comment
179179
a int
180-
b string
181-
c string
182180

183181
# Partition Information
184182
# col_name data_type comment
@@ -453,8 +451,6 @@ POSTHOOK: type: DESCTABLE
453451
POSTHOOK: Input: default@tbl_parquet
454452
# col_name data_type comment
455453
a int
456-
b string
457-
c string
458454

459455
# Partition Information
460456
# col_name data_type comment
@@ -729,8 +725,6 @@ POSTHOOK: type: DESCTABLE
729725
POSTHOOK: Input: default@tbl_avro
730726
# col_name data_type comment
731727
a int
732-
b string
733-
c string
734728

735729
# Partition Information
736730
# col_name data_type comment
@@ -1066,9 +1060,6 @@ POSTHOOK: type: DESCTABLE
10661060
POSTHOOK: Input: default@tbl_orc_mixed
10671061
# col_name data_type comment
10681062
a int
1069-
b double
1070-
c int
1071-
d string
10721063

10731064
# Partition Information
10741065
# col_name data_type comment
@@ -1513,9 +1504,6 @@ POSTHOOK: type: DESCTABLE
15131504
POSTHOOK: Input: default@tbl_parquet_mixed
15141505
# col_name data_type comment
15151506
a int
1516-
b double
1517-
c int
1518-
d string
15191507

15201508
# Partition Information
15211509
# col_name data_type comment
@@ -1960,9 +1948,6 @@ POSTHOOK: type: DESCTABLE
19601948
POSTHOOK: Input: default@tbl_avro_mixed
19611949
# col_name data_type comment
19621950
a int
1963-
b double
1964-
c int
1965-
d string
19661951

19671952
# Partition Information
19681953
# col_name data_type comment

iceberg/iceberg-handler/src/test/results/positive/iceberg_drop_column.q.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ POSTHOOK: Input: default@ice_tbl
2727
col_name data_type comment
2828
strcol string
2929
intcol int
30-
pcol string
31-
datecol date
30+
pcol string Transform: identity
31+
datecol date Transform: identity
3232

3333
# Partition Information
3434
# col_name data_type comment

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,7 @@ private List<FieldSchema> getColumnsByPattern() throws HiveException {
6666

6767
private List<FieldSchema> getCols() throws HiveException {
6868
Table table = context.getDb().getTable(desc.getTableName());
69-
List<FieldSchema> allColumns = new ArrayList<>();
70-
allColumns.addAll(table.getCols());
71-
allColumns.addAll(table.getPartCols());
72-
return allColumns;
69+
return new ArrayList<>(table.getAllCols());
7370
}
7471

7572
private Matcher getMatcher() {

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,7 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column
174174
List<FieldSchema> partitionColumns = null;
175175
// TODO (HIVE-29413): Refactor to a generic getPartCols() implementation
176176
if (table.isPartitioned()) {
177-
partitionColumns = table.hasNonNativePartitionSupport() ?
178-
table.getStorageHandler().getPartitionKeys(table) :
179-
table.getPartCols();
177+
partitionColumns = table.getPartCols();
180178
}
181179
if (CollectionUtils.isNotEmpty(partitionColumns) &&
182180
conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) {

ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import java.util.List;
2424
import java.util.Map;
2525
import java.util.Set;
26-
import java.util.Map.Entry;
2726

2827
import org.apache.hadoop.hive.conf.HiveConf;
2928
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;

ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@
116116
import org.apache.hadoop.hive.ql.Context;
117117
import org.apache.hadoop.hive.ql.ErrorMsg;
118118
import org.apache.hadoop.hive.ql.DriverState;
119+
import org.apache.hadoop.hive.ql.ddl.DDLUtils;
119120
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
120121
import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
121122
import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
@@ -2267,6 +2268,20 @@ public static String formatBinaryString(byte[] array, int start, int length) {
22672268
return sb.toString();
22682269
}
22692270

2271+
public static List<FieldSchema> getFieldSchemasForStatsColumnGather(Table tbl, List<FieldSchema> cols) {
2272+
Set<String> partLower = new HashSet<>();
2273+
for (FieldSchema fs : tbl.getPartCols()) {
2274+
partLower.add(fs.getName().toLowerCase());
2275+
}
2276+
List<FieldSchema> dataOnly = new ArrayList<>();
2277+
for (FieldSchema fs : cols) {
2278+
if (!partLower.contains(fs.getName().toLowerCase())) {
2279+
dataOnly.add(fs);
2280+
}
2281+
}
2282+
return dataOnly;
2283+
}
2284+
22702285
public static List<String> getColumnNamesFromSortCols(List<Order> sortCols) {
22712286
if(sortCols == null) {
22722287
return Collections.emptyList();

ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,7 @@ public List<String> getValues() {
9191
values = new ArrayList<>();
9292

9393
// TODO (HIVE-29413): Refactor to a generic getPartCols() implementation
94-
for (FieldSchema fs : table.hasNonNativePartitionSupport()
95-
? table.getStorageHandler().getPartitionKeys(table)
96-
: table.getPartCols()) {
94+
for (FieldSchema fs : table.getPartCols()) {
9795
String val = partSpec.get(fs.getName());
9896
values.add(val);
9997
}

ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ protected void initialize(Table table,
173173
// set default if location is not set and this is a physical
174174
// table partition (not a view partition)
175175
if (table.getDataLocation() != null) {
176-
Path partPath = new Path(table.getDataLocation(), Warehouse.makePartName(table.getPartCols(), tPartition.getValues()));
176+
Path partPath = new Path(table.getDataLocation(),
177+
Warehouse.makePartName(table.getPartCols(), tPartition.getValues()));
177178
tPartition.getSd().setLocation(partPath.toString());
178179
}
179180
}

ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
5858
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
5959
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
60+
import org.apache.hadoop.hive.metastore.HiveMetaHook;
6061
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
6162
import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
6263
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
@@ -118,6 +119,8 @@ public class Table implements Serializable {
118119
private transient StorageHandlerInfo storageHandlerInfo;
119120
private transient MaterializedViewMetadata materializedViewMetadata;
120121

122+
private List<FieldSchema> cachedPartCols;
123+
121124
private TableSpec tableSpec;
122125

123126
private boolean materializedTable;
@@ -193,6 +196,9 @@ public Table makeCopy() {
193196

194197
newTab.setMetaTable(this.getMetaTable());
195198
newTab.setSnapshotRef(this.getSnapshotRef());
199+
if (this.cachedPartCols != null) {
200+
newTab.cachedPartCols = new ArrayList<>(this.cachedPartCols);
201+
}
196202
return newTab;
197203
}
198204

@@ -214,6 +220,7 @@ public org.apache.hadoop.hive.metastore.api.Table getTTable() {
214220
*/
215221
public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) {
216222
this.tTable = tTable;
223+
clearCachedPartCols();
217224
}
218225

219226
/**
@@ -595,12 +602,34 @@ public boolean equals(Object obj) {
595602
}
596603

597604
public List<FieldSchema> getPartCols() {
598-
List<FieldSchema> partKeys = tTable.getPartitionKeys();
599-
if (partKeys == null) {
600-
partKeys = new ArrayList<>();
601-
tTable.setPartitionKeys(partKeys);
605+
if (cachedPartCols != null) {
606+
return cachedPartCols;
607+
}
608+
if (!hasNonNativePartitionSupport()
609+
|| (DDLUtils.isIcebergTable(this) && !isHmsIcebergTableTypeSet())) {
610+
List<FieldSchema> partKeys = tTable.getPartitionKeys();
611+
if (partKeys == null) {
612+
partKeys = new ArrayList<>();
613+
tTable.setPartitionKeys(partKeys);
614+
}
615+
cachedPartCols = partKeys;
616+
return partKeys;
617+
}
618+
List<FieldSchema> keys = getStorageHandler().getPartitionKeys(this);
619+
cachedPartCols = keys != null ? new ArrayList<>(keys) : new ArrayList<>();
620+
return cachedPartCols;
621+
}
622+
623+
private void clearCachedPartCols() {
624+
cachedPartCols = null;
625+
}
626+
627+
private boolean isHmsIcebergTableTypeSet() {
628+
if (tTable.getParameters() == null) {
629+
return false;
602630
}
603-
return partKeys;
631+
String tableType = tTable.getParameters().get(HiveMetaHook.TABLE_TYPE);
632+
return tableType != null && HiveMetaHook.ICEBERG.equalsIgnoreCase(tableType);
604633
}
605634

606635
public FieldSchema getPartColByName(String colName) {
@@ -610,9 +639,7 @@ public FieldSchema getPartColByName(String colName) {
610639
}
611640

612641
public List<String> getPartColNames() {
613-
List<FieldSchema> partCols = hasNonNativePartitionSupport() ?
614-
getStorageHandler().getPartitionKeys(this) : getPartCols();
615-
return partCols.stream().map(FieldSchema::getName)
642+
return getPartCols().stream().map(FieldSchema::getName)
616643
.collect(Collectors.toList());
617644
}
618645

@@ -728,7 +755,7 @@ private boolean isField(String col) {
728755
}
729756

730757
public List<FieldSchema> getCols() {
731-
return getColsInternal(false);
758+
return Utilities.getFieldSchemasForStatsColumnGather(this, getColsInternal(false));
732759
}
733760

734761
public List<FieldSchema> getColsForMetastore() {
@@ -761,14 +788,21 @@ private List<FieldSchema> getColsInternal(boolean forMs) {
761788
* @return List&lt;FieldSchema&gt;
762789
*/
763790
public List<FieldSchema> getAllCols() {
764-
ArrayList<FieldSchema> f_list = new ArrayList<FieldSchema>();
765-
f_list.addAll(getCols());
766-
f_list.addAll(getPartCols());
767-
return f_list;
791+
List<FieldSchema> allCols = new ArrayList<>(getCols());
792+
Set<String> colNames = allCols.stream()
793+
.map(FieldSchema::getName)
794+
.collect(Collectors.toSet());
795+
for (FieldSchema col : getPartCols()) {
796+
if (!colNames.contains(col.getName())) {
797+
allCols.add(col);
798+
}
799+
}
800+
return allCols;
768801
}
769802

770803
public void setPartCols(List<FieldSchema> partCols) {
771804
tTable.setPartitionKeys(partCols);
805+
clearCachedPartCols();
772806
}
773807

774808
public String getCatName() {
@@ -812,7 +846,7 @@ public void setOutputFormatClass(String name) throws HiveException {
812846
}
813847

814848
public boolean isPartitioned() {
815-
return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) :
849+
return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) :
816850
CollectionUtils.isNotEmpty(getPartCols());
817851
}
818852

ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -807,8 +807,7 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
807807
for (FieldNode col : cols) {
808808
int index = originalOutputColumnNames.indexOf(col.getFieldName());
809809
Table tab = cppCtx.getParseContext().getViewProjectToTableSchema().get(op);
810-
List<FieldSchema> fullFieldList = new ArrayList<FieldSchema>(tab.getCols());
811-
fullFieldList.addAll(tab.getPartCols());
810+
List<FieldSchema> fullFieldList = new ArrayList<>(tab.getAllCols());
812811
cppCtx.getParseContext().getColumnAccessInfo()
813812
.add(tab.getCompleteName(), fullFieldList.get(index).getName());
814813
}

0 commit comments

Comments
 (0)