반응형

개념

샤딩(Sharding)이란?

  • 데이터를 물리적으로 서로 다른 데이터베이스에 분산 저장하여 관리하는 방식을 말한다.
  • 주로 데이터가 많아질 경우 수평적 확장 및 성능 저하 방지하기 위해 사용된다.
  • 장점
    • 많은 양의 데이터를 처리할 때 성능이 향상된다.
    • 각 샤드는 독립적이기 때문에 수평 확장이 쉽다.
  • 단점
    • 여러 데이터베이스에 걸쳐 데이터를 나눈기 때문에 관리가 복잡해진다.
    • 여러 샤드에 걸쳐 데이터를 조회해야할 때 성능 저하가 발생한다.

ShardingSphere란?

  • RDBMS의 샤딩 처리를 지원하는 아파치 오픈소스 프로젝트를 말한다.

예제

MySQL 실행

docker-compose.yml

version: "3.3"
services:
  mysql01:
    image: mysql:9.0.1
    ports:
      - "13306:3306"
    environment:
      TZ: Asia/Seoul
      MYSQL_DATABASE: test
      MYSQL_ROOT_PASSWORD: 123456
    command:
      - "--character-set-server=utf8mb4"
      - "--collation-server=utf8mb4_unicode_ci"

  mysql02:
    image: mysql:9.0.1
    ports:
      - "23306:3306"
    environment:
      TZ: Asia/Seoul
      MYSQL_DATABASE: test
      MYSQL_ROOT_PASSWORD: 123456
    command:
      - "--character-set-server=utf8mb4"
      - "--collation-server=utf8mb4_unicode_ci"

실행

docker-compose up -d

테이블 생성

  • 모든 데이터베이스에 아래 쿼리를 실행하여 테이블 생성
CREATE TABLE Message
(
    id        CHAR(36),
    sender    VARCHAR(100),
    receiver  VARCHAR(100),
    title     VARCHAR(100),
    contents  VARCHAR(500),
    createdAt DATETIME(3) NOT NULL,
    PRIMARY KEY (id, sender) -- 샤딩키는 기본키에 포함되어야 관리하기 좋음.
);

build.gradle.kts

dependencies {
    // ...

    // for jpa
    implementation("org.springframework.boot:spring-boot-starter-data-jpa")
    runtimeOnly("com.mysql:mysql-connector-j")

    // for sharding
    implementation("org.apache.shardingsphere:shardingsphere-jdbc:5.5.0") {
        exclude(group = "org.apache.shardingsphere", module = "shardingsphere-test-util")
    }
}

application.yml

spring:
  sharding:
    datasource:
      - driver-class-name: com.mysql.cj.jdbc.Driver
        jdbc-url: jdbc:mysql://localhost:13306/test
        username: root
        password: 123456
      - driver-class-name: com.mysql.cj.jdbc.Driver
        jdbc-url: jdbc:mysql://localhost:23306/test
        username: root
        password: 123456
    sharding-tables:
      - table-name: Message
        sharding-key: sender
        sharding-algorithm: hash

  jpa:
    open-in-view: false
    properties:
      hibernate:
        hbm2ddl:
          auto: none
        show_sql: true
        format_sql: false
        use_sql_comments: false
        implicit_naming_strategy: org.hibernate.boot.model.naming.ImplicitNamingStrategyLegacyJpaImpl
        physical_naming_strategy: org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl

logging:
  level:
    org.hibernate.orm.jdbc.bind: trace

ShardingProperties

@ConfigurationProperties(prefix = "spring.sharding")
data class ShardingProperties(
    val dataSource: List<HikariDataSource>,
    val shardingTables: List<ShardingTable>?,
    val broadcastTables: List<String>?,
) {
    data class ShardingTable(
        val tableName: String,
        val shardingKey: String,
        val shardingAlgorithm: ShardingAlgorithm,
    ) {
        enum class ShardingAlgorithm {
            hash
        }
    }
}

HashShardingAlgorithm

class HashShardingAlgorithm : StandardShardingAlgorithm<Comparable<*>> {
    override fun doSharding(dataSourceNames: MutableCollection<String>, shardingValue: PreciseShardingValue<Comparable<*>>): String {
        val hash = crc32(shardingValue.value.toString()) % dataSourceNames.size
        val dataSourceName = "sharding_ds_$hash"
        if (dataSourceNames.contains(dataSourceName)) {
            return dataSourceName
        }
        throw IllegalStateException("$dataSourceName is not supported")
    }

    override fun doSharding(dataSourceNames: MutableCollection<String>, shardingValue: RangeShardingValue<Comparable<*>>): MutableCollection<String> {
        TODO("Not yet implemented")
    }

    private fun crc32(value: String): Long {
        val crc32 = CRC32()
        crc32.update(value.toByteArray(StandardCharsets.UTF_8))
        return crc32.value
    }
}

ShardingConfig

@EnableConfigurationProperties(ShardingProperties::class)
@Configuration
class ShardingConfig(
    private val shardingProperties: ShardingProperties,
) {
    @Bean
    fun shardingDataSource(): DataSource {
        val dataSourceMap = shardingProperties.dataSource
            .mapIndexed { index, dataSource -> "sharding_ds_$index" to dataSource }
            .toMap()

        val configs = listOf(
            createShardingRuleConfig(),
            createBroadcastRuleConfig(),
        )

        return ShardingSphereDataSourceFactory.createDataSource(dataSourceMap, configs, Properties())
    }

    private fun createShardingRuleConfig(): ShardingRuleConfiguration {
        val shardingConfig = ShardingRuleConfiguration()

        // hash 알고리즘 설정
        val stringShardingAlgorithmProperties = Properties()
        stringShardingAlgorithmProperties["strategy"] = "STANDARD"
        stringShardingAlgorithmProperties["algorithmClassName"] = HashShardingAlgorithm::class.java.getName()
        shardingConfig.shardingAlgorithms[ShardingProperties.ShardingTable.ShardingAlgorithm.hash.name] = AlgorithmConfiguration("CLASS_BASED", stringShardingAlgorithmProperties)

        // sharding table 설정
        shardingProperties.shardingTables?.forEach {
            val tableConfig = ShardingTableRuleConfiguration(it.tableName, "sharding_ds_\${0..${shardingProperties.dataSource.size - 1}}.${it.tableName}")
            tableConfig.databaseShardingStrategy = StandardShardingStrategyConfiguration(it.shardingKey, it.shardingAlgorithm.name)
            shardingConfig.tables.add(tableConfig)
        }

        return shardingConfig
    }

    private fun createBroadcastRuleConfig(): BroadcastRuleConfiguration {
        return BroadcastRuleConfiguration(shardingProperties.broadcastTables ?: listOf())
    }
}

Message

@IdClass(Message.PrimaryKey::class)
@Entity
data class Message(
    @Id
    val id: String,
    @Id
    val sender: String,
    val receiver: String,
    var title: String,
    var contents: String,
    val createdAt: LocalDateTime,
) {
    data class PrimaryKey(
        val id: String = UUID.randomUUID().toString(),
        val sender: String = "",
    ) : Serializable
}

MessageRepository

@Repository
interface MessageRepository : JpaRepository<Message, Message.PrimaryKey> {
    fun findAllBySender(sender: String): List<Message>
}

MessageTest

@SpringBootTest
class MessageTest {
    @Autowired
    private lateinit var messageRepository: MessageRepository

    @Test
    fun test() {
        messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-1", "john-1", "title-1", "contents-1", LocalDateTime.now()))
        messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-2", "john-2", "title-2", "contents-2", LocalDateTime.now()))
        messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-3", "john-3", "title-3", "contents-3", LocalDateTime.now()))
        messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-4", "john-4", "title-4", "contents-4", LocalDateTime.now()))
        messageRepository.save(Message(UUID.randomUUID().toString(), "tyler-5", "john-5", "title-5", "contents-5", LocalDateTime.now()))

        // id, sender 복합키가 기본키이기 때문에 update 쿼리의 where 조건에도 id, sender가 모두 추가됨. sender 기반으로 특정 샤드를 찾아 쿼리 수행
        messageRepository.save(messageRepository.findAllBySender("tyler-2").first().copy(title = "title-22"))

        val message1 = messageRepository.findAllBySender("tyler-1").first()
        val message2 = messageRepository.findAllBySender("tyler-2").first()

        assertEquals("title-1", message1.title)
        assertEquals("title-22", message2.title)
    }
}

기타

특정 샤드를 직접 지정해서 쿼리를 수행해야한다면?

HintManager.getInstance().use { hintManager ->
    hintManager.setDataSourceName("sharding_ds_0") // 특정 샤드에 대한 DataSource 이름 지정
    entityManager.persist(Member(0, "member"))
}

참고

반응형

+ Recent posts