반응형
개념
샤딩(Sharding)이란?
- 데이터를 물리적으로 서로 다른 데이터베이스에 분산 저장하여 관리하는 방식을 말한다.
- 주로 데이터가 많아질 경우 수평적 확장 및 성능 저하 방지하기 위해 사용된다.
- 장점
- 많은 양의 데이터를 처리할 때 성능이 향상된다.
- 각 샤드는 독립적이기 때문에 수평 확장이 쉽다.
- 단점
- 여러 데이터베이스에 걸쳐 데이터를 나눈기 때문에 관리가 복잡해진다.
- 여러 샤드에 걸쳐 데이터를 조회해야할 때 성능 저하가 발생한다.
ShardingSphere란?
- RDBMS의 샤딩 처리를 지원하는 아파치 오픈소스 프로젝트를 말한다.
예제
MySQL 실행
docker-compose.yml
version: "3.3"
services:
mysql01:
image: mysql:9.0.1
ports:
- "13306:3306"
environment:
TZ: Asia/Seoul
MYSQL_DATABASE: test
MYSQL_ROOT_PASSWORD: 123456
command:
- "--character-set-server=utf8mb4"
- "--collation-server=utf8mb4_unicode_ci"
mysql02:
image: mysql:9.0.1
ports:
- "23306:3306"
environment:
TZ: Asia/Seoul
MYSQL_DATABASE: test
MYSQL_ROOT_PASSWORD: 123456
command:
- "--character-set-server=utf8mb4"
- "--collation-server=utf8mb4_unicode_ci"
실행
docker-compose up -d
테이블 생성
- 모든 데이터베이스에 아래 쿼리를 실행하여 테이블 생성
CREATE TABLE Message
(
id CHAR(36),
sender VARCHAR(100),
receiver VARCHAR(100),
title VARCHAR(100),
contents VARCHAR(500),
createdAt DATETIME(3) NOT NULL,
PRIMARY KEY (id, sender) -- 샤딩키는 기본키에 포함되어야 관리하기 좋음.
);
build.gradle.kts
dependencies {
// ...
// for jpa
implementation("org.springframework.boot:spring-boot-starter-data-jpa")
runtimeOnly("com.mysql:mysql-connector-j")
// for sharding
implementation("org.apache.shardingsphere:shardingsphere-jdbc:5.5.0") {
exclude(group = "org.apache.shardingsphere", module = "shardingsphere-test-util")
}
}
application.yml
spring:
sharding:
datasource:
- driver-class-name: com.mysql.cj.jdbc.Driver
jdbc-url: jdbc:mysql://localhost:13306/test
username: root
password: 123456
- driver-class-name: com.mysql.cj.jdbc.Driver
jdbc-url: jdbc:mysql://localhost:23306/test
username: root
password: 123456
sharding-tables:
- table-name: Message
sharding-key: sender
sharding-algorithm: hash
jpa:
open-in-view: false
properties:
hibernate:
hbm2ddl:
auto: none
show_sql: true
format_sql: false
use_sql_comments: false
implicit_naming_strategy: org.hibernate.boot.model.naming.ImplicitNamingStrategyLegacyJpaImpl
physical_naming_strategy: org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl
logging:
level:
org.hibernate.orm.jdbc.bind: trace
ShardingProperties
@ConfigurationProperties(prefix = "spring.sharding")
data class ShardingProperties(
val dataSource: List<HikariDataSource>,
val shardingTables: List<ShardingTable>?,
val broadcastTables: List<String>?,
) {
data class ShardingTable(
val tableName: String,
val shardingKey: String,
val shardingAlgorithm: ShardingAlgorithm,
) {
enum class ShardingAlgorithm {
hash
}
}
}
HashShardingAlgorithm
class HashShardingAlgorithm : StandardShardingAlgorithm<Comparable<*>> {
override fun doSharding(dataSourceNames: MutableCollection<String>, shardingValue: PreciseShardingValue<Comparable<*>>): String {
val hash = crc32(shardingValue.value.toString()) % dataSourceNames.size
val dataSourceName = "sharding_ds_$hash"
if (dataSourceNames.contains(dataSourceName)) {
return dataSourceName
}
throw IllegalStateException("$dataSourceName is not supported")
}
override fun doSharding(dataSourceNames: MutableCollection<String>, shardingValue: RangeShardingValue<Comparable<*>>): MutableCollection<String> {
TODO("Not yet implemented")
}
private fun crc32(value: String): Long {
val crc32 = CRC32()
crc32.update(value.toByteArray(StandardCharsets.UTF_8))
return crc32.value
}
}
ShardingConfig
@EnableConfigurationProperties(ShardingProperties::class)
@Configuration
class ShardingConfig(
private val shardingProperties: ShardingProperties,
) {
@Bean
fun shardingDataSource(): DataSource {
val dataSourceMap = shardingProperties.dataSource
.mapIndexed { index, dataSource -> "sharding_ds_$index" to dataSource }
.toMap()
val configs = listOf(
createShardingRuleConfig(),
createBroadcastRuleConfig(),
)
return ShardingSphereDataSourceFactory.createDataSource(dataSourceMap, configs, Properties())
}
private fun createShardingRuleConfig(): ShardingRuleConfiguration {
val shardingConfig = ShardingRuleConfiguration()
// hash 알고리즘 설정
val stringShardingAlgorithmProperties = Properties()
stringShardingAlgorithmProperties["strategy"] = "STANDARD"
stringShardingAlgorithmProperties["algorithmClassName"] = HashShardingAlgorithm::class.java.getName()
shardingConfig.shardingAlgorithms[ShardingProperties.ShardingTable.ShardingAlgorithm.hash.name] = AlgorithmConfiguration("CLASS_BASED", stringShardingAlgorithmProperties)
// sharding table 설정
shardingProperties.shardingTables?.forEach {
val tableConfig = ShardingTableRuleConfiguration(it.tableName, "sharding_ds_\${0..${shardingProperties.dataSource.size - 1}}.${it.tableName}")
tableConfig.databaseShardingStrategy = StandardShardingStrategyConfiguration(it.shardingKey, it.shardingAlgorithm.name)
shardingConfig.tables.add(tableConfig)
}
return shardingConfig
}
private fun createBroadcastRuleConfig(): BroadcastRuleConfiguration {
return BroadcastRuleConfiguration(shardingProperties.broadcastTables ?: listOf())
}
}
Message
@IdClass(Message.PrimaryKey::class)
@Entity
data class Message(
@Id
val id: String,
@Id
val sender: String,
val receiver: String,
var title: String,
var contents: String,
val createdAt: LocalDateTime,
) {
data class PrimaryKey(
val id: String = UUID.randomUUID().toString(),
val sender: String = "",
) : Serializable
}
MessageRepository
@Repository
interface MessageRepository : JpaRepository<Message, Message.PrimaryKey> {
fun findAllBySender(sender: String): List<Message>
}
MessageTest
@SpringBootTest
class MessageTest {
@Autowired
private lateinit var messageRepository: MessageRepository
@Test
fun test() {
messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-1", "john-1", "title-1", "contents-1", LocalDateTime.now()))
messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-2", "john-2", "title-2", "contents-2", LocalDateTime.now()))
messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-3", "john-3", "title-3", "contents-3", LocalDateTime.now()))
messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-4", "john-4", "title-4", "contents-4", LocalDateTime.now()))
messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-5", "john-5", "title-5", "contents-5", LocalDateTime.now()))
// id, sender 복합키가 기본키이기 때문에 update 쿼리의 where 조건에도 id, sender가 모두 추가됨. sender 기반으로 특정 샤드를 찾아 쿼리 수행
messageRepository.save(messageRepository.findAllBySender("tyler-2").first().copy(title = "title-22"))
val message1 = messageRepository.findAllBySender("tyler-1").first()
val message2 = messageRepository.findAllBySender("tyler-2").first()
assertEquals("title-1", message1.title)
assertEquals("title-22", message2.title)
}
}
기타
특정 샤드를 직접 지정해서 쿼리를 수행해야한다면?
HintManager.getInstance().use { hintManager ->
hintManager.setDataSourceName("sharding_ds_0") // 특정 샤드에 대한 DataSource 이름 지정
entityManager.persist(Member(0, "member"))
}
참고
반응형
'Development > Spring' 카테고리의 다른 글
[Spring] Partitioning (2) | 2024.09.13 |
---|---|
[Spring] Circuit Breaker(with resilience4j) (0) | 2024.07.15 |
[Spring] multi-module 프로젝트 구성하기(kotlin, gradle) (0) | 2024.07.07 |
[Spring] Exposed (0) | 2024.04.10 |
[Spring] HTTP Interface(Deprecated) (0) | 2024.03.19 |